In [1]:
## Read CSV files of SEC Data 
## Combine into DataFrames and Export for use with other JupyterLab Files 

In [2]:
import pandas as pd

import glob

from pathlib import Path
import csv

import os
import requests
import json

# from dotenv import load_dotenv
# load_dotenv()

In [3]:
## Load pickle for exports and imports of data  
import pickle 
def load_obj(path):
    with open(path, 'rb') as f:
        return pickle.load(f)
    
def save_obj(obj, path ):
    with open(path, 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

In [4]:
## Read all SEC data and create a big DataFrame 

## Declare Header Variables to make Code work 
header = "SETTLEMENT DATE|CUSIP|SYMBOL|QUANTITY (FAILS)|DESCRIPTION|PRICE"

## Make FTD read Function 
def read_ftd_file(csv_file_path):
    df = pd.read_csv(
    Path(csv_file_path),
    index_col=None
    )
    
    df = df.iloc[:-2 , :]

    Header = "Header"
    df = df.rename(columns={header:Header})
    df = df.Header.str.split("|",expand=True)

    df = df.rename(columns={0:'Date',1:'CUSIP',2:'SYMBOL',3:'QUANTITY_FAILS',4:'DESCRIPTION',5:'PRICE'})
    
    ## Description column has commas in data and is not relevant anyways 
    ## Price column has errors and missing data. Source daily price from IEX later
    df.drop(['DESCRIPTION','PRICE'], axis=1,inplace=True)
    df['Date'] = pd.to_datetime(df['Date'])
    df.set_index('Date',inplace=True)
    
    return df

In [5]:
## Set Path variables for SEC data files 
path = r'C:\Users\watso\OneDrive\Projects\FTD_Project_Personal\SEC_Files_CSV' # USE YOUR PATH 
all_files = glob.glob(path + "/*.csv")

ftd_df = pd.DataFrame()

for filename in all_files:
    if filename == 0:
        ftd_df = read_ftd_file(filename)
    else:
        ftd_df2 = read_ftd_file(filename)
        ftd_df = pd.concat([ftd_df,ftd_df2],axis='rows')

ftd_df

Unnamed: 0_level_0,CUSIP,SYMBOL,QUANTITY_FAILS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-01-02,B38564108,EURN,465894
2020-01-02,G0080J104,ACTT,18022
2020-01-02,G01125106,AFYA,70
2020-01-02,G0120M109,AGBA,97
2020-01-02,G0129K104,AYR,139
...,...,...,...
2021-10-14,98981L100,ZMDTF,131
2021-10-14,989817101,ZUMZ,358
2021-10-14,989825104,ZURVY,1544
2021-10-14,98985X100,ZY,49700


In [6]:
cusip_symbol_df = ftd_df[['SYMBOL','CUSIP']]
#cusip_symbol_df = cusip_symbol_df.reset_index(drop=True)
cusip_symbol_df = cusip_symbol_df.drop_duplicates()
cusip_symbol_df = cusip_symbol_df.sort_values('SYMBOL')
cusip_symbol_df = cusip_symbol_df.reset_index(drop=True)
#cusip_symbol_df = cusip_symbol_df.set_index('SYMBOL')
cusip_symbol_df

Unnamed: 0,SYMBOL,CUSIP
0,,Q0819E102
1,1208PS,466391208
2,3126REORGPAY,G48833126
3,4207REORGPYMNT,723664207
4,5116REORGPYMNT,674215116
...,...,...
21568,ZYXI,98986M103
21569,ZZHGY,98955F105
21570,ZZLL,98880P202
21571,ZZZOD,98959W203


In [7]:
## Export DFs
ftd_df.to_csv('AnalysisResources/analysis_ftd_all_data.csv')
cusip_symbol_df.to_csv('AnalysisResources/analysis_symbol_all_list.csv')
save_obj(ftd_df, 'AnalysisResources/analysis_ftd_all_data.pkl')
save_obj(cusip_symbol_df, 'AnalysisResources/analysis_symbol_all_list.pkl')

In [8]:
## SEC Data in a big DataFrame. Now to sort and append to specific stocks. 
## Continue in next Notebook file. 