In [None]:
import pandas as pd

import glob

from pathlib import Path
import csv

import os
import requests
import json

import yfinance as yf

from dotenv import load_dotenv
load_dotenv()

In [2]:
## Set start date variable - dataframes will be created starting from this date
start_date = '2020-09-14'

In [3]:
## Calls and Functions for reading data downloaded from SEC website 
header = "SETTLEMENT DATE|CUSIP|SYMBOL|QUANTITY (FAILS)|DESCRIPTION|PRICE"

path = r'C:/Users/watso/Desktop/FTDProject/SEC_Files_CSV' # USE YOUR PATH 
all_files = glob.glob(path + "/*.csv")

GME_CUSIP_number = "36467W109"   
GME_symbol = 'GME'

# CUSIP_number = "36467W109"   # Default  
# symbol = 'GME'               # Default

# Main function - passing the CUSIP number (most important) and setting the symbol will deliver
#                 FTD data sorted by CUSIP number 
# All other functions here exist to support this 
def return_dataframe(cusip_number,symbol):
    df = read_ftd_data_any_stock(cusip_number,symbol)
    df = fix_dataframe(df)
    return df

# Use return_dataframe as your function to call FTD Data 


# Imports and reads original SEC data , finds and returns dataframe of each CSV file 
def read_ftd_file(csv_path, cusip_number, symbol):
    data = pd.read_csv(
    Path(csv_path),
    index_col=False
    )
    
    data[header] = data[header].str.replace('|',',')
    new_data = data.rename(columns={header:'SETTLEMENT_DATE,CUSIP,SYMBOL,QUANTITY_FAILS,DESCRIPTION,PRICE'})
    Header = "Header"
    new_data = data.rename(columns={header:Header})
    
    new_data = new_data.Header.str.split(",",expand=True)
    new_data = new_data.rename(columns={0:'Date',1:'CUSIP',2:'SYMBOL',3:'QUANTITY_FAILS',4:'DESCRIPTION',5:'PRICE'})
    
    new_data = new_data.set_index("CUSIP")
    new_data = new_data.loc[cusip_number]
    
    new_data = new_data[['Date','QUANTITY_FAILS','PRICE']]
    new_data.rename(columns={'PRICE':symbol},inplace=True)
    new_data.set_index('Date',inplace=True)  
    
    new_data = new_data.reset_index()
    new_data['Date'] = pd.to_datetime(new_data['Date'])
    new_data.set_index('Date',inplace=True)
    new_data = new_data[['QUANTITY_FAILS']]
    new_data.rename(columns={'QUANTITY_FAILS':symbol+'_QUANTITY_FAILS'},inplace=True)
    
    return new_data


def fix_dataframe(dataframe):
    # For whatever reason, exporting data to CSV file, then reimporting back in using Pandas
    # solves all issues when it comes to reading the data in the column 
    dataframe.to_csv('../FilesTemp/temp.csv')
    dataframe = pd.read_csv(
        Path('../FilesTemp/temp.csv'),
        infer_datetime_format=True,
        parse_dates=True,
        index_col="Date",
    )
    dataframe.sort_index(inplace=True)
    dataframe.drop_duplicates(inplace=True)
    return dataframe

# Concat function to join DataFrames 
def concat_df(df_1,df_2):
    df = pd.concat([df_1,df_2],axis='rows')
    return df


# This function can be used to call data from the SEC files using any CUSIP number
def read_ftd_data_any_stock(cusip_number,symbol):
    new_df = pd.DataFrame()
    for filename in all_files:
        if filename == 0:
            new_df = read_ftd_file(csv_path=filename ,cusip_number=cusip_number,symbol=symbol)
        else:
            new_df2 = read_ftd_file(csv_path=filename ,cusip_number=cusip_number,symbol=symbol)
            new_df = concat_df(new_df,new_df2)
            
    return new_df

In [4]:
## IEX Setup and Test 

# Don't seem to need to use real API Key 
# Sandbox API key works just fine for data 

#iex_api_key = os.getenv("IEX_API_KEY")
iex_test_api_key = os.getenv("IEX_TEST_API_KEY")

base_url = 'https://cloud.iexapis.com/stable/'
sandbox_url = 'https://sandbox.iexapis.com/stable/'

#token = os.environ.get('IEX_API_KEY')
test_token = os.environ.get('IEX_TEST_API_KEY')

test_resp = requests.get(base_url + 'status')
test_resp

<Response [200]>

In [5]:
## IEX Calls and Functions 

def get_chart(stock_ticker, chart_range='14m'):
    resp_data = requests.get(sandbox_url+'stock/'+stock_ticker+'/chart/'+chart_range+'?token='+test_token)
    df = pd.DataFrame(resp_data.json())

    df.rename(columns={'date':'Date'},inplace=True)
    df['Date'] = pd.to_datetime(df['Date'])
    df.set_index('Date',inplace=True)
    
    return df 

def get_close_price(stock_ticker, chart_range='14m'):

    resp_data = requests.get(sandbox_url+'stock/'+stock_ticker+'/chart/'+chart_range+'?token='+test_token)
    df = pd.DataFrame(resp_data.json())

    df.rename(columns={'date':'Date'},inplace=True)
    df['Date'] = pd.to_datetime(df['Date'])
    df.set_index('Date',inplace=True)
    
    df = df[['close']]
    #df.rename(columns={'close':stock_ticker},inplace=True)
    
    return df

def get_daily_data(stock_ticker, chart_range='14m'):

    resp_data = requests.get(sandbox_url+'stock/'+stock_ticker+'/chart/'+chart_range+'?token='+test_token)
    df = pd.DataFrame(resp_data.json())

    df.rename(columns={'date':'Date'},inplace=True)
    df['Date'] = pd.to_datetime(df['Date'])
    df.set_index('Date',inplace=True)
    
    df = df[['close','open','low','high','volume']]
    #df.rename(columns={'close':stock_ticker},inplace=True)
    
    return df

def join_ftd_and_close(ftd_df,close_df):
    merged_df = ftd_df.merge(close_df, how='inner',right_index=True, left_index=True)
    return merged_df

In [6]:
def do_all(cusip_number,symbol,chart_range='14m'):
    # Default chart_range value = '1y' 
    # Get FTD Data
    ftd_df = return_dataframe(cusip_number,symbol)
    # Get Close Data 
    close_df = get_close_price(symbol,chart_range)
    # Merge Data together 
    merged_df = join_ftd_and_close(ftd_df,close_df)
    return merged_df

def do_more(cusip_number,symbol,chart_range='14m'):
    # Default chart_range value = '1y' 
    # Get FTD Data
    ftd_df = return_dataframe(cusip_number,symbol)
    # Get Close Data 
    close_df = get_daily_data(symbol,chart_range)
    # Merge Data together 
    merged_df = join_ftd_and_close(ftd_df,close_df)
    return merged_df

def do_all_and_export(cusip_number,symbol,chart_range='14m'):
    # Default chart_range value = '1y' 
    # Get FTD Data
    ftd_df = return_dataframe(cusip_number,symbol)
    # Get Close Data 
    close_df = get_daily_data(symbol,chart_range)
    # Merge Data together 
    merged_df = join_ftd_and_close(ftd_df,close_df)
    # Export Data 
    merged_df.to_csv('../FilesExport/'+symbol+'_all_data.csv')
    return merged_df

def export_ftd_csv_data(dataframe,symbol):    
    dataframe.to_csv('../FilesExport/'+symbol+'_ftd_data.csv')

In [7]:
def get_ticker_info(symbol):
    ticker = yf.Ticker(symbol)
    ticker_info = ticker.info
    sharesOutstanding = ticker_info['sharesOutstanding']
    floatShares = ticker_info['floatShares']
    dictionary = {
        'SharesOutstanding' : sharesOutstanding,
        'FloatShares' : floatShares
    }
    return dictionary

In [8]:
## Read FTD files to make list of tickers to get data  
## Load three FTD files to increase the amount of stock tickers to parse 
data = pd.read_csv(
    Path("../SEC_Files_CSV/sec_ftd_202109a.csv"),
    index_col=False
)
dat2 = pd.read_csv(
    Path("../SEC_Files_CSV/sec_ftd_202108b.csv"),
    index_col=False
)
dat3 = pd.read_csv(
    Path("../Resources/sec_ftd_1.csv"),  ## Original File parsed for original test of Code. 
    index_col=False                      ## Including it to make sure no tickers are missed 
)                                        ## relative to previous tests reading this data 

In [9]:
header = "SETTLEMENT DATE|CUSIP|SYMBOL|QUANTITY (FAILS)|DESCRIPTION|PRICE"
data[header] = data[header].str.replace('|',',')
new_data = data.rename(columns={header:'SETTLEMENT_DATE,CUSIP,SYMBOL,QUANTITY_FAILS,DESCRIPTION,PRICE'})
Header = "Header"
new_data = data.rename(columns={header:Header})
new_df = pd.DataFrame()
new_df = new_data.Header.str.split(",",expand=True)
new_df = new_df.rename(columns={0:'Date',1:'CUSIP',2:'SYMBOL',3:'QUANTITY_FAILS',4:'DESCRIPTION',5:'PRICE'})

dat2[header] = dat2[header].str.replace('|',',')
new_dat2 = dat2.rename(columns={header:'SETTLEMENT_DATE,CUSIP,SYMBOL,QUANTITY_FAILS,DESCRIPTION,PRICE'})
Header = "Header"
new_dat2 = dat2.rename(columns={header:Header})
new_d2 = pd.DataFrame()
new_d2 = new_dat2.Header.str.split(",",expand=True)
new_d2 = new_d2.rename(columns={0:'Date',1:'CUSIP',2:'SYMBOL',3:'QUANTITY_FAILS',4:'DESCRIPTION',5:'PRICE'})

dat3[header] = dat3[header].str.replace('|',',')
new_dat3 = dat3.rename(columns={header:'SETTLEMENT_DATE,CUSIP,SYMBOL,QUANTITY_FAILS,DESCRIPTION,PRICE'})
Header = "Header"
new_dat3 = dat3.rename(columns={header:Header})
new_d3 = pd.DataFrame()
new_d3 = new_dat3.Header.str.split(",",expand=True)
new_d3 = new_d3.rename(columns={0:'Date',1:'CUSIP',2:'SYMBOL',3:'QUANTITY_FAILS',4:'DESCRIPTION',5:'PRICE'})

  
  # Remove the CWD from sys.path while we load stuff.


In [10]:
cusip_df1 = new_df[['CUSIP','SYMBOL']]
cusip_df2 = new_d2[['CUSIP','SYMBOL']]
cusip_df3 = new_d3[['CUSIP','SYMBOL']]
cusip_df = concat_df(cusip_df1,cusip_df2)
cusip_df.drop_duplicates(inplace=True)
cusip_df.reset_index(inplace=True)
cusip_df.drop(columns='index',inplace=True)
cusip_df = concat_df(cusip_df,cusip_df3)
cusip_df.drop_duplicates(inplace=True)
cusip_df.reset_index(inplace=True)
cusip_df.drop(columns='index',inplace=True)
cusip_df.set_index('CUSIP',inplace=True)
cusip_df.dropna(inplace=True)
cusip_df.reset_index(inplace=True)
cusip_df

Unnamed: 0,CUSIP,SYMBOL
0,B38564108,EURN
1,D18190898,DB
2,G00748106,STWO
3,G00748114,STWOW
4,G0083D104,ACEVW
...,...,...
13656,98475Y105,YRIV
13657,986008100,YOKEY
13658,98880P202,ZZLL
13659,989424205,ZENO


In [11]:
cusip_df.to_csv('../Resources/cusip_df.csv')

In [11]:
## IEX Call function for sharesOutstanding 
## IEX Does not call Float data :
## returns a '0' for any stock since Dec 2020 
# GET /stock/{symbol}/stats/{stat?}

def get_outstanding_shares(stock_ticker):
    response = requests.get(sandbox_url+'stock/'+stock_ticker+'/stats/sharesOutstanding?token='+test_token)
    try:
        variable = response.json()
    except json.decoder.JSONDecodeError:     ## Exception to mark stock tickers that fail IEX call 
            variable = 0.01
    
    return variable

In [12]:
pct_variable = 1.0
length_of_df = len(cusip_df)

length_of_df

13662

In [27]:
def iterate_list_return(data_list, range_obj, title):
    dfObj = pd.DataFrame(columns=['CUSIP', 'SYMBOL', 'YTD_FTD_SUM','sharesOutstanding','YTD_PCT_OUTSTANDING'])

    for i in range_obj:
        cusip = data_list['CUSIP'][i]        
        symbol = data_list['SYMBOL'][i]

        try:
            temp_df = return_dataframe(cusip,symbol)
            temp_sum = temp_df.sum() 
            ticker_info = get_outstanding_shares(symbol)
            ytd_pct = temp_sum[0] / ticker_info * 100
            if ytd_pct >= pct_variable:
                dfObj = dfObj.append(
                    {'CUSIP': cusip, 
                     'SYMBOL': symbol,
                     'YTD_FTD_SUM': temp_sum[0],
                     'sharesOutstanding': ticker_info,
                     'YTD_PCT_OUTSTANDING' : ytd_pct}, 
                    ignore_index=True)            
        except KeyError: 
            continue
        except TypeError:
            continue
        

    dfObj.to_csv('../FilesExportFTD/'+title+'_df.csv')
    return dfObj

def iterate_list_export(data_list, range_obj, title):
    dfObj = pd.DataFrame(columns=['CUSIP', 'SYMBOL', 'YTD_FTD_SUM','sharesOutstanding','YTD_PCT_OUTSTANDING'])

    for i in range_obj:
        cusip = data_list['CUSIP'][i]        
        symbol = data_list['SYMBOL'][i]

        try:
            temp_df = return_dataframe(cusip,symbol)   # Parses all the SEC File Data 
            temp_sum = temp_df.sum()                   # Sums all the FTD File data 
            ticker_info = get_outstanding_shares(symbol)  # Acccess API, get the OutstandingShares 
            ytd_pct = temp_sum[0] / ticker_info * 100     # Calculate the Percent 
            if ytd_pct >= pct_variable:                   # Sort data great than 1% (pct_variable) 
                dfObj = dfObj.append(
                    {'CUSIP': cusip, 
                     'SYMBOL': symbol,
                     'YTD_FTD_SUM': temp_sum[0],
                     'sharesOutstanding': ticker_info,
                     'YTD_PCT_OUTSTANDING' : ytd_pct},    # Append data to list and its relevant columns 
                    ignore_index=True)            
        except KeyError:                          
            continue
        except TypeError:               # (no more than 1 day of FTD posted in a two week period)
            continue                    # issue with return_dataframe() and accessing a single entry in the SEC file data
        

    dfObj.to_csv('../FilesExportFTD/'+title+'_df.csv')

In [14]:
## Test with range of 100 to make sure functions work 
# range_1 = range(0,100)
# range_test = iterate_list_return(cusip_df,range_1,'range_1_test')
# range_test

In [18]:
## Use while loop to iterate through data and create CSV files to be read
## If data fails at any point, can locate roughly where it failed by reading 
## titles of exported files, and can fix the problem and continue from where it left off  

## Skipped 11700 , 11800

x = 11800
y = 11900

while x <= (length_of_df-200):
    range_var = range(x,y)    
    str_symbol1 = str(y)
    str_symbol2 = str(x)
    
    iterate_list_export(cusip_df,range_var,'range_'+str_symbol1+'_'+str_symbol2)
    x += 100
    y += 100
    #symbol += 1   



In [19]:
## Parses the final values of the list of CUSIPs and Symbols 
## Setup this way to avoid issues with length of DF in previous while loop
last_range = (length_of_df-200)
range_var = range(last_range,length_of_df)    
str_symbol1 = str(length_of_df)
str_symbol2 = str(last_range)

iterate_list_export(cusip_df,range_var,'range_'+str_symbol1+'_'+str_symbol2)    



In [29]:
## Fix missed data 
## Skipped 11700 , 11800
## Problem is with the cusip value 'None'

Int64Index([], dtype='int64')

In [45]:
range_1 = range(11700,11800)
range_test = iterate_list_return(cusip_df,range_1,'range_11800_11700')
range_test



Unnamed: 0,CUSIP,SYMBOL,YTD_FTD_SUM,sharesOutstanding,YTD_PCT_OUTSTANDING
