In [1]:
import pandas as pd

import glob

from pathlib import Path
import csv

import os
import requests
import json

import yfinance as yf

from dotenv import load_dotenv
load_dotenv()

True

In [2]:
## Set start date variable - dataframes will be created starting from this date
start_date = '2020-09-14'

In [3]:
## Calls and Functions for reading data downloaded from SEC website 
header = "SETTLEMENT DATE|CUSIP|SYMBOL|QUANTITY (FAILS)|DESCRIPTION|PRICE"

path = r'C:\Users\watso\Desktop\TherinFintech\GME_Shenanigans\SEC_Files_CSV' # USE YOUR PATH 
all_files = glob.glob(path + "/*.csv")

GME_CUSIP_number = "36467W109"   
GME_symbol = 'GME'

CUSIP_number = "36467W109"   # Default  
symbol = 'GME'               # Default

# Main function - passing the CUSIP number (most important) and setting the symbol will deliver
#                 FTD data sorted by CUSIP number 
# All other functions here exist to support this 
def return_dataframe(cusip_number,symbol):
    df = read_ftd_data_any_stock(cusip_number,symbol)
    df = fix_dataframe(df)
    return df

# Use return_dataframe as your function to call FTD Data 


def read_ftd_file(csv_path, cusip_number, symbol):
    data = pd.read_csv(
    Path(csv_path),
    index_col=False
    )
    
    data[header] = data[header].str.replace('|',',')
    new_data = data.rename(columns={header:'SETTLEMENT_DATE,CUSIP,SYMBOL,QUANTITY_FAILS,DESCRIPTION,PRICE'})
    Header = "Header"
    new_data = data.rename(columns={header:Header})
    
    new_data = new_data.Header.str.split(",",expand=True)
    new_data = new_data.rename(columns={0:'Date',1:'CUSIP',2:'SYMBOL',3:'QUANTITY_FAILS',4:'DESCRIPTION',5:'PRICE'})
    
    new_data = new_data.set_index("CUSIP")
    new_data = new_data.loc[cusip_number]
    
    new_data = new_data[['Date','QUANTITY_FAILS','PRICE']]
    new_data.rename(columns={'PRICE':symbol},inplace=True)
    new_data.set_index('Date',inplace=True)  
    
    new_data = new_data.reset_index()
    new_data['Date'] = pd.to_datetime(new_data['Date'])
    new_data.set_index('Date',inplace=True)
    new_data = new_data[['QUANTITY_FAILS']]
    new_data.rename(columns={'QUANTITY_FAILS':symbol+'_QUANTITY_FAILS'},inplace=True)
    
    return new_data

def read_ftd_data_any_stock(cusip_number,symbol):
    new_df = pd.DataFrame()
    for filename in all_files:
        if filename == 0:
            new_df = read_ftd_file(csv_path=filename ,cusip_number=cusip_number,symbol=symbol)
        else:
            new_df2 = read_ftd_file(csv_path=filename ,cusip_number=cusip_number,symbol=symbol)
            new_df = concat_df(new_df,new_df2)
            
    return new_df

def fix_dataframe(dataframe):
    # For whatever reason, exporting data to CSV file, then reimporting back in using Pandas
    # solves all issues when it comes to reading the data in the column 
    dataframe.to_csv('../FilesTemp/temp.csv')
    dataframe = pd.read_csv(
        Path('../FilesTemp/temp.csv'),
        infer_datetime_format=True,
        parse_dates=True,
        index_col="Date",
    )
    dataframe.sort_index(inplace=True)
    dataframe.drop_duplicates(inplace=True)
    return dataframe

def concat_df(df_1,df_2):
    df = pd.concat([df_1,df_2],axis='rows')
    return df

In [4]:
## IEX Setup and Test 

# Don't seem to need to use real API Key 
# Sandbox API key works just fine for data 

#iex_api_key = os.getenv("IEX_API_KEY")
iex_test_api_key = os.getenv("IEX_TEST_API_KEY")

base_url = 'https://cloud.iexapis.com/stable/'
sandbox_url = 'https://sandbox.iexapis.com/stable/'

#token = os.environ.get('IEX_API_KEY')
test_token = os.environ.get('IEX_TEST_API_KEY')

test_resp = requests.get(base_url + 'status')
test_resp

<Response [200]>

In [5]:
## IEX Calls and Functions 

def get_chart(stock_ticker, chart_range='14m'):
    resp_data = requests.get(sandbox_url+'stock/'+stock_ticker+'/chart/'+chart_range+'?token='+test_token)
    df = pd.DataFrame(resp_data.json())

    df.rename(columns={'date':'Date'},inplace=True)
    df['Date'] = pd.to_datetime(df['Date'])
    df.set_index('Date',inplace=True)
    
    return df 

def get_close_price(stock_ticker, chart_range='14m'):

    resp_data = requests.get(sandbox_url+'stock/'+stock_ticker+'/chart/'+chart_range+'?token='+test_token)
    df = pd.DataFrame(resp_data.json())

    df.rename(columns={'date':'Date'},inplace=True)
    df['Date'] = pd.to_datetime(df['Date'])
    df.set_index('Date',inplace=True)
    
    df = df[['close']]
    #df.rename(columns={'close':stock_ticker},inplace=True)
    
    return df

def get_daily_data(stock_ticker, chart_range='14m'):

    resp_data = requests.get(sandbox_url+'stock/'+stock_ticker+'/chart/'+chart_range+'?token='+test_token)
    df = pd.DataFrame(resp_data.json())

    df.rename(columns={'date':'Date'},inplace=True)
    df['Date'] = pd.to_datetime(df['Date'])
    df.set_index('Date',inplace=True)
    
    df = df[['close','open','low','high','volume']]
    #df.rename(columns={'close':stock_ticker},inplace=True)
    
    return df

def join_ftd_and_close(ftd_df,close_df):
    merged_df = ftd_df.merge(close_df, how='inner',right_index=True, left_index=True)
    return merged_df

In [6]:
def do_all(cusip_number,symbol,chart_range='14m'):
    # Default chart_range value = '1y' 
    # Get FTD Data
    ftd_df = return_dataframe(cusip_number,symbol)
    # Get Close Data 
    close_df = get_close_price(symbol,chart_range)
    # Merge Data together 
    merged_df = join_ftd_and_close(ftd_df,close_df)
    return merged_df

def do_more(cusip_number,symbol,chart_range='14m'):
    # Default chart_range value = '1y' 
    # Get FTD Data
    ftd_df = return_dataframe(cusip_number,symbol)
    # Get Close Data 
    close_df = get_daily_data(symbol,chart_range)
    # Merge Data together 
    merged_df = join_ftd_and_close(ftd_df,close_df)
    return merged_df

def do_all_and_export(cusip_number,symbol,chart_range='14m'):
    # Default chart_range value = '1y' 
    # Get FTD Data
    ftd_df = return_dataframe(cusip_number,symbol)
    # Get Close Data 
    close_df = get_daily_data(symbol,chart_range)
    # Merge Data together 
    merged_df = join_ftd_and_close(ftd_df,close_df)
    # Export Data 
    merged_df.to_csv('../FilesExport/'+symbol+'_all_data.csv')
    return merged_df

def export_ftd_csv_data(dataframe,symbol):    
    dataframe.to_csv('../FilesExport/'+symbol+'_ftd_data.csv')

In [7]:
def get_ticker_info(symbol):
    ticker = yf.Ticker(symbol)
    ticker_info = ticker.info
    sharesOutstanding = ticker_info['sharesOutstanding']
    floatShares = ticker_info['floatShares']
    dictionary = {
        'SharesOutstanding' : sharesOutstanding,
        'FloatShares' : floatShares
    }
    return dictionary

In [8]:
## Read FTD file 
data = pd.read_csv(
    Path("../Resources/sec_ftd_1.csv"),
    index_col=False
)
data.head()

Unnamed: 0,SETTLEMENT DATE|CUSIP|SYMBOL|QUANTITY (FAILS)|DESCRIPTION|PRICE
0,20210816|B38564108|EURN|33389|EURONAV NV ANTWE...
1,20210816|C00948106|AGRI|7719|AGRIFORCE GROWING...
2,20210816|D18190898|DB|66551|DEUTSCHE BANK AG N...
3,20210816|G0R21B112|ACTDW|495|ARCLIGHT CLEAN TR...
4,20210816|G00748114|STWOW|3700|ACON S2 ACQUISIT...


In [9]:
header = "SETTLEMENT DATE|CUSIP|SYMBOL|QUANTITY (FAILS)|DESCRIPTION|PRICE"
data[header] = data[header].str.replace('|',',')
new_data = data.rename(columns={header:'SETTLEMENT_DATE,CUSIP,SYMBOL,QUANTITY_FAILS,DESCRIPTION,PRICE'})
Header = "Header"
new_data = data.rename(columns={header:Header})
new_df = pd.DataFrame()
new_df = new_data.Header.str.split(",",expand=True)
new_df = new_df.rename(columns={0:'Date',1:'CUSIP',2:'SYMBOL',3:'QUANTITY_FAILS',4:'DESCRIPTION',5:'PRICE'})

  


In [10]:
new_df.head()

Unnamed: 0,Date,CUSIP,SYMBOL,QUANTITY_FAILS,DESCRIPTION,PRICE,6,7
0,20210816,B38564108,EURN,33389,EURONAV NV ANTWERPEN (BELGIUM),7.75,,
1,20210816,C00948106,AGRI,7719,AGRIFORCE GROWING SYS LTD COM,2.89,,
2,20210816,D18190898,DB,66551,DEUTSCHE BANK AG NAMEN AKT (DE,12.96,,
3,20210816,G0R21B112,ACTDW,495,ARCLIGHT CLEAN TRANSITION CORP,1.12,,
4,20210816,G00748114,STWOW,3700,ACON S2 ACQUISITION CORP WT EX,1.39,,


In [11]:
cusip_df = new_df[['CUSIP','SYMBOL']]
cusip_df.drop_duplicates(inplace=True)
cusip_df.drop(67689, inplace=True)
cusip_df.reset_index(inplace=True)
cusip_df.drop(columns='index',inplace=True)
cusip_df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


Unnamed: 0,CUSIP,SYMBOL
0,B38564108,EURN
1,C00948106,AGRI
2,D18190898,DB
3,G0R21B112,ACTDW
4,G00748114,STWOW
...,...,...
12367,98154X105,WSGF
12368,983134107,WYNN
12369,988869103,ZMRK
12370,98907K202,ZNTEU


In [12]:
cusip_symbol_list = new_df.set_index('CUSIP')
cusip_symbol_list = cusip_symbol_list.sort_index()
cusip_symbol_list = cusip_symbol_list[cusip_symbol_list.index.notnull()]
cusip_symbol_list = cusip_symbol_list[['SYMBOL']]
cusip_symbol_list = cusip_symbol_list.drop_duplicates()
cusip_symbol_list = cusip_symbol_list.reset_index()
cusip_symbol_list

Unnamed: 0,CUSIP,SYMBOL
0,000304105,AACAY
1,00032Q104,AADI
2,000360206,AAON
3,000361105,AIR
4,000375204,ABB
...,...,...
12363,Y8564M113,TGPPRA
12364,Y8564M204,TGPPRB
12365,Y8564W103,TK
12366,Y8565N300,TNK


In [28]:
# GET /stock/{symbol}/stats/{stat?}

def get_outstanding_shares(stock_ticker):
    response = requests.get(sandbox_url+'stock/'+stock_ticker+'/stats/sharesOutstanding?token='+test_token)
    try:
        variable = response.json()
    except json.decoder.JSONDecodeError:
            variable = 0.01
    
    return variable

In [14]:
pct_variable = 1.0
length_of_df = len(cusip_symbol_list)

length_of_df

12368

In [15]:
# list_of_FTD_data = []

# for i in range(100):
#     cusip = test_df3['CUSIP'][i]        
#     symbol = test_df3['SYMBOL'][i]
    
#     try:
#         temp_df = return_dataframe(cusip,symbol)
#         temp_sum = temp_df.sum() 
#         ticker_info = get_outstanding_shares(symbol)
#         ytd_pct = temp_sum[0] / ticker_info * 100
#         if ytd_pct >= pct_variable:
#             empty_dict = {
#                 'CUSIP':cusip,
#                 'SYMBOL':symbol,
#                 'YTD_FTD_SUM':temp_sum[0],
#                 'YTD_PCT_OUTSTANDING':ytd_pct
#             }
#             list_of_FTD_data.append(empty_dict)
#     except KeyError: 
#         continue
#     except TypeError:
#         continue   
        
# list_of_FTD_data

In [16]:
# list_of_FTD_data = []
# dfObj = pd.DataFrame(columns=['CUSIP', 'SYMBOL', 'YTD_FTD_SUM','YTD_PCT_OUTSTANDING'])

# for i in range(20):
#     cusip = test_df3['CUSIP'][i]        
#     symbol = test_df3['SYMBOL'][i]
    
#     try:
#         temp_df = return_dataframe(cusip,symbol)
#         temp_sum = temp_df.sum() 
#         ticker_info = get_outstanding_shares(symbol)
#         ytd_pct = temp_sum[0] / ticker_info * 100
#         if ytd_pct >= pct_variable:
#             dfObj = dfObj.append(
#                 {'CUSIP': cusip, 
#                  'SYMBOL': symbol,
#                  'YTD_FTD_SUM': temp_sum[0],
#                  'YTD_PCT_OUTSTANDING' : ytd_pct}, 
#                 ignore_index=True)            
# #             empty_dict = {
# #                 'CUSIP':cusip,
# #                 'SYMBOL':symbol,
# #                 'YTD_FTD_SUM':temp_sum[0],
# #                 'YTD_PCT_OUTSTANDING':ytd_pct
# #             }
# #             list_of_FTD_data.append(empty_dict)
#     except KeyError: 
#         continue
#     except TypeError:
#         continue   
        
# dfObj

In [17]:
# dfObj.to_csv('../FilesExport/dfObj_data.csv')

In [32]:
def iterate_list(data_list, range_int, title):
    dfObj = pd.DataFrame(columns=['CUSIP', 'SYMBOL', 'YTD_FTD_SUM','YTD_PCT_OUTSTANDING'])

    for i in range_int:
        cusip = data_list['CUSIP'][i]        
        symbol = data_list['SYMBOL'][i]

        try:
            temp_df = return_dataframe(cusip,symbol)
            temp_sum = temp_df.sum() 
            ticker_info = get_outstanding_shares(symbol)
            ytd_pct = temp_sum[0] / ticker_info * 100
            if ytd_pct >= pct_variable:
                dfObj = dfObj.append(
                    {'CUSIP': cusip, 
                     'SYMBOL': symbol,
                     'YTD_FTD_SUM': temp_sum[0],
                     'YTD_PCT_OUTSTANDING' : ytd_pct}, 
                    ignore_index=True)            
        except KeyError: 
            continue
        except TypeError:
            continue
        

    dfObj.to_csv('../FilesExportFTD/'+title+'_df.csv')
    return dfObj

def iterate_list_export(data_list, range_int, title):
    dfObj = pd.DataFrame(columns=['CUSIP', 'SYMBOL', 'YTD_FTD_SUM','YTD_PCT_OUTSTANDING'])

    for i in range_int:
        cusip = data_list['CUSIP'][i]        
        symbol = data_list['SYMBOL'][i]

        try:
            temp_df = return_dataframe(cusip,symbol)   # Parses all the SEC File Data 
            temp_sum = temp_df.sum()                   # Sums all the FTD File data 
            ticker_info = get_outstanding_shares(symbol)  # Acccess API, get the OutstandingShares 
            ytd_pct = temp_sum[0] / ticker_info * 100     # Calculate the Percent 
            if ytd_pct >= pct_variable:                   # Sort data great than 1% (pct_variable) 
                dfObj = dfObj.append(
                    {'CUSIP': cusip, 
                     'SYMBOL': symbol,
                     'YTD_FTD_SUM': temp_sum[0],
                     'YTD_PCT_OUTSTANDING' : ytd_pct},    # Append data to list and its relevant columns 
                    ignore_index=True)            
        except KeyError:                          
            continue
        except TypeError:               # (no more than 1 day of FTD posted in a two week period)
            continue                    # issue with return_dataframe() and accessing a single entry in the SEC file data
        

    dfObj.to_csv('../FilesExportFTD/'+title+'_df.csv')
    #return dfObj

In [19]:
# range_test0 = range(15)
# range_test1 = range(15,50)
# range_test2 = range(50)

In [20]:
# df_test1 = iterate_list(cusip_symbol_list,range_test0,'test1_data')
# df_test1

In [21]:
# df_test2 = iterate_list(cusip_symbol_list,range_test1,'test2_data')
# df_test2

In [22]:
# df_test3 = iterate_list(cusip_symbol_list,range_test2,'test3_data')
# df_test3

In [23]:
range_1 = range(0,100)
range_2 = range(100,200)
range_3 = range(200,300)
range_4 = range(300,400)
range_5 = range(400,500)

range_1000 = range(500)

In [24]:
ftd_list_1 = iterate_list(cusip_symbol_list,range_1,'range_1_test')
ftd_list_1



Unnamed: 0,CUSIP,SYMBOL,YTD_FTD_SUM,YTD_PCT_OUTSTANDING
0,000899104,ADMA,17297195,12.76483
1,00162Q452,AMLP,13691470,8.868795
2,00162Q460,ACES,3110311,22.392659
3,00162Q478,DTEC,1464338,29.080677
4,00162Q718,IDOG,976450,14.714492
5,00162Q783,RIGS,912437,16.273069
6,00164V103,AMCX,10513662,32.784511
7,00165C104,AMC,282751345,53.065151
8,00211Y506,ABIO,4140983,27.914435
9,00214Q104,ARKK,35895762,19.513169


In [25]:
ftd_list_2 = iterate_list(cusip_symbol_list,range_2,'range_2_test')
ftd_list_2



Unnamed: 0,CUSIP,SYMBOL,YTD_FTD_SUM,YTD_PCT_OUTSTANDING
0,003013109,FCO,541636,5.986565
1,003057106,ACP,4129085,22.685064
2,003260106,PPLT,954120,7.148383
3,003261104,BCI,1310498,4.656973
4,003261203,BCD,298807,19.723911
5,003263100,GLTR,4613084,41.193966
6,003264108,SIVR,12129564,27.796293
7,00326W106,ASGI,1820403,19.738169
8,00430H102,AXDX,1393706,2.252796
9,00444T100,ACRX,5748557,4.80386


In [27]:
ftd_list_3 = iterate_list(cusip_symbol_list,range_3,'range_3_test')
ftd_list_3



Unnamed: 0,CUSIP,SYMBOL,YTD_FTD_SUM,YTD_PCT_OUTSTANDING
0,00688A106,ADIL,5847010,27.574909
1,007025109,ADTX,8983780,60.942565
2,007624307,ADXS,28028956,18.854429
3,00768Y487,DWUS,1355119,50.729277
4,00770K202,AMTX,31362194,97.909488
5,007975402,AEZS,20122680,16.33467


In [30]:
range_1 = range(0,100)
range_2 = range(100,200)
length_of_df

12368

In [34]:
x = 0
y = 100
#symbol = 4
while x <= (length_of_df-200):
    range_var = range(x,y)    
    str_symbol1 = str(y)
    str_symbol2 = str(x)
    
    iterate_list_export(cusip_symbol_list,range_var,'range_'+str_symbol1+'_'+str_symbol2)
    x += 100
    y += 100
    #symbol += 1   



In [31]:
symbol = 4
str_symbol = str(symbol)
str_symbol

'4'

In [35]:
#last_range = [12200,12368]
range_var = range(12200,12368)    
str_symbol1 = str(12368)
str_symbol2 = str(12200)

iterate_list_export(cusip_symbol_list,range_var,'range_'+str_symbol1+'_'+str_symbol2)
    

