In [39]:
import pandas as pd

import glob

from pathlib import Path
import csv

import os
import requests
import json

import yfinance as yf

from dotenv import load_dotenv
load_dotenv()

True

In [2]:
## Set start date variable - dataframes will be created starting from this date
start_date = '2020-09-14'

In [3]:
## Calls and Functions for reading data downloaded from SEC website 
header = "SETTLEMENT DATE|CUSIP|SYMBOL|QUANTITY (FAILS)|DESCRIPTION|PRICE"

path = r'C:\Users\watso\Desktop\TherinFintech\GME_Shenanigans\SEC_Files_CSV' # USE YOUR PATH 
all_files = glob.glob(path + "/*.csv")

GME_CUSIP_number = "36467W109"   
GME_symbol = 'GME'

CUSIP_number = "36467W109"   # Default  
symbol = 'GME'               # Default

# Main function - passing the CUSIP number (most important) and setting the symbol will deliver
#                 FTD data sorted by CUSIP number 
# All other functions here exist to support this 
def return_dataframe(cusip_number,symbol):
    df = read_ftd_data_any_stock(cusip_number,symbol)
    df = fix_dataframe(df)
    return df

# Use return_dataframe as your function to call FTD Data 


def read_ftd_file(csv_path, cusip_number, symbol):
    data = pd.read_csv(
    Path(csv_path),
    index_col=False
    )
    
    data[header] = data[header].str.replace('|',',')
    new_data = data.rename(columns={header:'SETTLEMENT_DATE,CUSIP,SYMBOL,QUANTITY_FAILS,DESCRIPTION,PRICE'})
    Header = "Header"
    new_data = data.rename(columns={header:Header})
    
    new_data = new_data.Header.str.split(",",expand=True)
    new_data = new_data.rename(columns={0:'Date',1:'CUSIP',2:'SYMBOL',3:'QUANTITY_FAILS',4:'DESCRIPTION',5:'PRICE'})
    
    new_data = new_data.set_index("CUSIP")
    new_data = new_data.loc[cusip_number]
    
    new_data = new_data[['Date','QUANTITY_FAILS','PRICE']]
    new_data.rename(columns={'PRICE':symbol},inplace=True)
    new_data.set_index('Date',inplace=True)  
    
    new_data = new_data.reset_index()
    new_data['Date'] = pd.to_datetime(new_data['Date'])
    new_data.set_index('Date',inplace=True)
    new_data = new_data[['QUANTITY_FAILS']]
    new_data.rename(columns={'QUANTITY_FAILS':symbol+'_QUANTITY_FAILS'},inplace=True)
    
    return new_data

def read_ftd_data_any_stock(cusip_number,symbol):
    new_df = pd.DataFrame()
    for filename in all_files:
        if filename == 0:
            new_df = read_ftd_file(csv_path=filename ,cusip_number=cusip_number,symbol=symbol)
        else:
            new_df2 = read_ftd_file(csv_path=filename ,cusip_number=cusip_number,symbol=symbol)
            new_df = concat_df(new_df,new_df2)
            
    return new_df

def fix_dataframe(dataframe):
    # For whatever reason, exporting data to CSV file, then reimporting back in using Pandas
    # solves all issues when it comes to reading the data in the column 
    dataframe.to_csv('../FilesTemp/temp.csv')
    dataframe = pd.read_csv(
        Path('../FilesTemp/temp.csv'),
        infer_datetime_format=True,
        parse_dates=True,
        index_col="Date",
    )
    dataframe.sort_index(inplace=True)
    dataframe.drop_duplicates(inplace=True)
    return dataframe

def concat_df(df_1,df_2):
    df = pd.concat([df_1,df_2],axis='rows')
    return df

In [4]:
## IEX Setup and Test 

# Don't seem to need to use real API Key 
# Sandbox API key works just fine for data 

#iex_api_key = os.getenv("IEX_API_KEY")
iex_test_api_key = os.getenv("IEX_TEST_API_KEY")

base_url = 'https://cloud.iexapis.com/stable/'
sandbox_url = 'https://sandbox.iexapis.com/stable/'

#token = os.environ.get('IEX_API_KEY')
test_token = os.environ.get('IEX_TEST_API_KEY')

test_resp = requests.get(base_url + 'status')
test_resp

<Response [200]>

In [5]:
## IEX Calls and Functions 

def get_chart(stock_ticker, chart_range='14m'):
    resp_data = requests.get(sandbox_url+'stock/'+stock_ticker+'/chart/'+chart_range+'?token='+test_token)
    df = pd.DataFrame(resp_data.json())

    df.rename(columns={'date':'Date'},inplace=True)
    df['Date'] = pd.to_datetime(df['Date'])
    df.set_index('Date',inplace=True)
    
    return df 

def get_close_price(stock_ticker, chart_range='14m'):

    resp_data = requests.get(sandbox_url+'stock/'+stock_ticker+'/chart/'+chart_range+'?token='+test_token)
    df = pd.DataFrame(resp_data.json())

    df.rename(columns={'date':'Date'},inplace=True)
    df['Date'] = pd.to_datetime(df['Date'])
    df.set_index('Date',inplace=True)
    
    df = df[['close']]
    #df.rename(columns={'close':stock_ticker},inplace=True)
    
    return df

def get_daily_data(stock_ticker, chart_range='14m'):

    resp_data = requests.get(sandbox_url+'stock/'+stock_ticker+'/chart/'+chart_range+'?token='+test_token)
    df = pd.DataFrame(resp_data.json())

    df.rename(columns={'date':'Date'},inplace=True)
    df['Date'] = pd.to_datetime(df['Date'])
    df.set_index('Date',inplace=True)
    
    df = df[['close','open','low','high','volume']]
    #df.rename(columns={'close':stock_ticker},inplace=True)
    
    return df

def join_ftd_and_close(ftd_df,close_df):
    merged_df = ftd_df.merge(close_df, how='inner',right_index=True, left_index=True)
    return merged_df

In [6]:
def do_all(cusip_number,symbol,chart_range='14m'):
    # Default chart_range value = '1y' 
    # Get FTD Data
    ftd_df = return_dataframe(cusip_number,symbol)
    # Get Close Data 
    close_df = get_close_price(symbol,chart_range)
    # Merge Data together 
    merged_df = join_ftd_and_close(ftd_df,close_df)
    return merged_df

def do_more(cusip_number,symbol,chart_range='14m'):
    # Default chart_range value = '1y' 
    # Get FTD Data
    ftd_df = return_dataframe(cusip_number,symbol)
    # Get Close Data 
    close_df = get_daily_data(symbol,chart_range)
    # Merge Data together 
    merged_df = join_ftd_and_close(ftd_df,close_df)
    return merged_df

def do_all_and_export(cusip_number,symbol,chart_range='14m'):
    # Default chart_range value = '1y' 
    # Get FTD Data
    ftd_df = return_dataframe(cusip_number,symbol)
    # Get Close Data 
    close_df = get_daily_data(symbol,chart_range)
    # Merge Data together 
    merged_df = join_ftd_and_close(ftd_df,close_df)
    # Export Data 
    merged_df.to_csv('../FilesExport/'+symbol+'_all_data.csv')
    return merged_df

def export_ftd_csv_data(dataframe,symbol):    
    dataframe.to_csv('../FilesExport/'+symbol+'_ftd_data.csv')

In [38]:
def get_ticker_info(symbol):
    ticker = yf.Ticker(symbol)
    ticker_info = ticker.info
    sharesOutstanding = ticker_info['sharesOutstanding']
    floatShares = ticker_info['floatShares']
    dictionary = {
        'SharesOutstanding' : sharesOutstanding,
        'FloatShares' : floatShares
    }
    return dictionary

In [7]:
# # AMC , BB , NOK 
# amc_cusip = '00165C104'
# bb_cusip = '09228F103'
# nok_cusip = '654902204'

# # TLSA, SPCE, Shopify, QSR 
# tsla_cusip = "88160R101" 
# shop_cusip = '82509L107'
# qsr_cusip = '76131D103'
# spce_cusip = '92766K106'

# # AAPL, MSFT, JPM, Goldman 
# aapl_cusip = '037833100'
# msft_cusip = '594918104'
# jpm_cusip = '46625H100'
# gs_cusip = '38141G104'

In [8]:
# amc_export = do_all_and_export(amc_cusip,'AMC')
# bb_export = do_all_and_export(bb_cusip,'BB')
# nok_export = do_all_and_export(nok_cusip,'NOK')
# tsla_export = do_all_and_export(tsla_cusip,'TSLA')
# shop_export = do_all_and_export(shop_cusip,'SHOP')
# qsr_export = do_all_and_export(qsr_cusip,'QSR')
# spce_export = do_all_and_export(spce_cusip,'SPCE')
# aapl_export = do_all_and_export(aapl_cusip,'AAPL')
# msft_export = do_all_and_export(msft_cusip,'MSFT')
# jpm_export = do_all_and_export(jpm_cusip,'JPM')
# gs_export = do_all_and_export(gs_cusip,'GS')

In [9]:
## Read FTD file 
data = pd.read_csv(
    Path("../Resources/sec_ftd_1.csv"),
    index_col=False
)
data.head()

Unnamed: 0,SETTLEMENT DATE|CUSIP|SYMBOL|QUANTITY (FAILS)|DESCRIPTION|PRICE
0,20210816|B38564108|EURN|33389|EURONAV NV ANTWE...
1,20210816|C00948106|AGRI|7719|AGRIFORCE GROWING...
2,20210816|D18190898|DB|66551|DEUTSCHE BANK AG N...
3,20210816|G0R21B112|ACTDW|495|ARCLIGHT CLEAN TR...
4,20210816|G00748114|STWOW|3700|ACON S2 ACQUISIT...


In [10]:
header = "SETTLEMENT DATE|CUSIP|SYMBOL|QUANTITY (FAILS)|DESCRIPTION|PRICE"
data[header] = data[header].str.replace('|',',')
new_data = data.rename(columns={header:'SETTLEMENT_DATE,CUSIP,SYMBOL,QUANTITY_FAILS,DESCRIPTION,PRICE'})
Header = "Header"
new_data = data.rename(columns={header:Header})
new_df = pd.DataFrame()
new_df = new_data.Header.str.split(",",expand=True)
new_df = new_df.rename(columns={0:'Date',1:'CUSIP',2:'SYMBOL',3:'QUANTITY_FAILS',4:'DESCRIPTION',5:'PRICE'})

  


In [11]:
new_df.head()

Unnamed: 0,Date,CUSIP,SYMBOL,QUANTITY_FAILS,DESCRIPTION,PRICE,6,7
0,20210816,B38564108,EURN,33389,EURONAV NV ANTWERPEN (BELGIUM),7.75,,
1,20210816,C00948106,AGRI,7719,AGRIFORCE GROWING SYS LTD COM,2.89,,
2,20210816,D18190898,DB,66551,DEUTSCHE BANK AG NAMEN AKT (DE,12.96,,
3,20210816,G0R21B112,ACTDW,495,ARCLIGHT CLEAN TRANSITION CORP,1.12,,
4,20210816,G00748114,STWOW,3700,ACON S2 ACQUISITION CORP WT EX,1.39,,


In [12]:
cusip_df = new_df[['CUSIP','SYMBOL']]
cusip_df.drop_duplicates(inplace=True)
cusip_df.drop(67689, inplace=True)
cusip_df.reset_index(inplace=True)
cusip_df.drop(columns='index',inplace=True)
cusip_df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


Unnamed: 0,CUSIP,SYMBOL
0,B38564108,EURN
1,C00948106,AGRI
2,D18190898,DB
3,G0R21B112,ACTDW
4,G00748114,STWOW
...,...,...
12367,98154X105,WSGF
12368,983134107,WYNN
12369,988869103,ZMRK
12370,98907K202,ZNTEU


In [13]:
#cusip_10['CUSIP'][1]
#C00948106
#test_return = return_dataframe('C00948106',cusip_10['SYMBOL'][2])
empty_list = []

# test_list
# symb_list 

# test_return1 = return_dataframe(cusip_10['CUSIP'][0],cusip_10['SYMBOL'][0])
# test_return2= return_dataframe(cusip_10['CUSIP'][1],cusip_10['SYMBOL'][1])

# cusip_10 = cusip_df.head(20)

# for i in range(10):
#     cusip = test_list['CUSIP'][i]        
#     symbol = symb_list['SYMBOL'][i]
    
#     try:
#         temp_var = return_dataframe(cusip,symbol)
#         empty_list.append(temp_var)
#     except KeyError: 
#         continue
#     except TypeError:
#         continue   

## Below code works in theory 
# empty1_list = []
# empty1_list.append(test_return1)
# empty1_list.append(test_return2)
# empty1_list

In [14]:
new_df.head()

Unnamed: 0,Date,CUSIP,SYMBOL,QUANTITY_FAILS,DESCRIPTION,PRICE,6,7
0,20210816,B38564108,EURN,33389,EURONAV NV ANTWERPEN (BELGIUM),7.75,,
1,20210816,C00948106,AGRI,7719,AGRIFORCE GROWING SYS LTD COM,2.89,,
2,20210816,D18190898,DB,66551,DEUTSCHE BANK AG NAMEN AKT (DE,12.96,,
3,20210816,G0R21B112,ACTDW,495,ARCLIGHT CLEAN TRANSITION CORP,1.12,,
4,20210816,G00748114,STWOW,3700,ACON S2 ACQUISITION CORP WT EX,1.39,,


In [15]:
test_df = new_df.set_index('CUSIP')
test_df

Unnamed: 0_level_0,Date,SYMBOL,QUANTITY_FAILS,DESCRIPTION,PRICE,6,7
CUSIP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
B38564108,20210816,EURN,33389,EURONAV NV ANTWERPEN (BELGIUM),7.75,,
C00948106,20210816,AGRI,7719,AGRIFORCE GROWING SYS LTD COM,2.89,,
D18190898,20210816,DB,66551,DEUTSCHE BANK AG NAMEN AKT (DE,12.96,,
G0R21B112,20210816,ACTDW,495,ARCLIGHT CLEAN TRANSITION CORP,1.12,,
G00748114,20210816,STWOW,3700,ACON S2 ACQUISITION CORP WT EX,1.39,,
...,...,...,...,...,...,...,...
98986M103,20210831,ZYXI,3367,ZYNEX,INC. COMMON STOCK (NV),13.38,
98986T108,20210831,ZNGA,12398,ZYNGA INC CLASS A,8.88,,
98986X109,20210831,ZYNE,506,ZYNERBA PHARMACEUTICALS INC CO,4.11,,
,Trailer record count 67689,,,,,,


In [16]:
test_df1 = test_df.sort_index()

In [17]:
test_df1 = test_df1[test_df1.index.notnull()]
test_df1

Unnamed: 0_level_0,Date,SYMBOL,QUANTITY_FAILS,DESCRIPTION,PRICE,6,7
CUSIP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
000304105,20210819,AACAY,53,AAC TECHNOLOGIES HOLDINGS INC,5.79,,
000304105,20210817,AACAY,34845,AAC TECHNOLOGIES HOLDINGS INC,5.88,,
000304105,20210816,AACAY,32805,AAC TECHNOLOGIES HOLDINGS INC,5.95,,
000304105,20210830,AACAY,759,AAC TECHNOLOGIES HOLDINGS INC,5.36,,
000304105,20210823,AACAY,2224,AAC TECHNOLOGIES HOLDINGS INC,5.64,,
...,...,...,...,...,...,...,...
Y95308105,20210817,WVE,56,WAVE LIFE SCIENCES LTD ORD SHS,5.97,,
Y95308105,20210816,WVE,1047,WAVE LIFE SCIENCES LTD ORD SHS,6.41,,
Y95308105,20210818,WVE,182,WAVE LIFE SCIENCES LTD ORD SHS,6.31,,
Y95308105,20210819,WVE,332,WAVE LIFE SCIENCES LTD ORD SHS,5.72,,


In [18]:
test_df2 = test_df1[['SYMBOL']]
test_df2 = test_df2.drop_duplicates()
test_df3 = test_df2.reset_index()
test_df3

Unnamed: 0,CUSIP,SYMBOL
0,000304105,AACAY
1,00032Q104,AADI
2,000360206,AAON
3,000361105,AIR
4,000375204,ABB
...,...,...
12363,Y8564M113,TGPPRA
12364,Y8564M204,TGPPRB
12365,Y8564W103,TK
12366,Y8565N300,TNK


In [19]:
## read_ftd_data_any_stock(cusip_number,symbol)
cusip_1 = test_df3['CUSIP'][0]
cusip_1

'000304105'

In [20]:
symbol_1 = test_df3['SYMBOL'][0]
symbol_1

'AACAY'

In [21]:
cusip_df = test_df3.head(100)
cusip_df

Unnamed: 0,CUSIP,SYMBOL
0,000304105,AACAY
1,00032Q104,AADI
2,000360206,AAON
3,000361105,AIR
4,000375204,ABB
...,...,...
95,00214Q708,ARKF
96,00214Q807,ARKX
97,00215F107,ATNI
98,00215W100,ASX


In [35]:
empty_list = []
for i in range(20):
    cusip = cusip_df['CUSIP'][i]        
    symbol = cusip_df['SYMBOL'][i]
    
    try:
        temp_var = return_dataframe(cusip,symbol)
        temp_var = temp_var.loc[start_date:]
        empty_list.append(temp_var)
    except KeyError: 
        continue
    except TypeError:
        continue   

        
empty_list

## This code works 



[            ABB_QUANTITY_FAILS
 Date                          
 2020-09-14                6969
 2020-09-15               15875
 2020-09-17                 122
 2020-09-18                 698
 2020-09-21                2745
 ...                        ...
 2021-08-27                4787
 2021-09-03                 647
 2021-09-08                5128
 2021-09-13                4835
 2021-09-14                3526
 
 [165 rows x 1 columns],
             ADMA_QUANTITY_FAILS
 Date                           
 2020-09-14                 3628
 2020-09-15                  115
 2020-09-16                 8170
 2020-09-17                  370
 2020-09-18                 2753
 ...                         ...
 2021-09-07                 7611
 2021-09-08                 9850
 2021-09-10              4952900
 2021-09-13               434884
 2021-09-14               442317
 
 [195 rows x 1 columns],
             ADT_QUANTITY_FAILS
 Date                          
 2020-09-14              175392
 2020

In [36]:
df_test_read = empty_list[1]
df_test_read

Unnamed: 0_level_0,ADMA_QUANTITY_FAILS
Date,Unnamed: 1_level_1
2020-09-14,3628
2020-09-15,115
2020-09-16,8170
2020-09-17,370
2020-09-18,2753
...,...
2021-09-07,7611
2021-09-08,9850
2021-09-10,4952900
2021-09-13,434884


In [47]:
df_test_read_sum = df_test_read.sum()
df_test_read_sum[0]

14607442

In [46]:
test_ticker_info = get_ticker_info('ADMA')
test_ticker_info

{'SharesOutstanding': 132769000, 'FloatShares': 117309794}

In [48]:
test_ytd_pct = df_test_read_sum[0] / test_ticker_info['FloatShares'] * 100
test_ytd_pct

12.452022548091765

In [54]:
# test_df3
# return length of df 

length_of_df = len(test_df3)
length_of_df

12368

In [51]:
## Test Code to iterate through CUSIP list and perform 
## calculations on FTD compared to Float data. 
## Returns a dictionary with all stocks that FTD more than 
## [pct_variable] of their stock in the last year 

empty_list_1 = []
pct_variable = 1.0

for i in range(100):
    cusip = cusip_df['CUSIP'][i]        
    symbol = cusip_df['SYMBOL'][i]
    
    try:
        temp_df = return_dataframe(cusip,symbol)
        temp_sum = temp_df.sum() 
        ticker_info = get_ticker_info(symbol)
        ytd_pct = temp_sum[0] / ticker_info['FloatShares'] * 100
        if ytd_pct >= pct_variable:
            empty_dict = {
                'CUSIP':cusip,
                'SYMBOL':symbol,
                'YTD_FTD_SUM':temp_sum[0],
                'YTD_PCT_FLOAT':ytd_pct
            }
            empty_list_1.append(empty_dict)
    except KeyError: 
        continue
    except TypeError:
        continue   

        
empty_list_1

## This code works 



[{'CUSIP': '000899104',
  'SYMBOL': 'ADMA',
  'YTD_FTD_SUM': 17297195,
  'YTD_PCT_FLOAT': 14.744885665727109},
 {'CUSIP': '00090Q103',
  'SYMBOL': 'ADT',
  'YTD_FTD_SUM': 4456767,
  'YTD_PCT_FLOAT': 2.8665198505557545},
 {'CUSIP': '00164V103',
  'SYMBOL': 'AMCX',
  'YTD_FTD_SUM': 10513662,
  'YTD_PCT_FLOAT': 35.61824818715108},
 {'CUSIP': '00165C104',
  'SYMBOL': 'AMC',
  'YTD_FTD_SUM': 282751345,
  'YTD_PCT_FLOAT': 55.28243804015365},
 {'CUSIP': '00211Y506',
  'SYMBOL': 'ABIO',
  'YTD_FTD_SUM': 4140983,
  'YTD_PCT_FLOAT': 28.849676267124995}]

In [55]:
## Real Code to iterate through CUSIP list and perform 
## calculations on FTD compared to Float data. 
## Returns a dictionary with all stocks that FTD more than 
## [pct_variable] of their stock in the last year 

list_of_FTD_data = []
pct_variable = 1.0
length_of_df = len(test_df3)

for i in range(length_of_df):
    cusip = test_df3['CUSIP'][i]        
    symbol = test_df3['SYMBOL'][i]
    
    try:
        temp_df = return_dataframe(cusip,symbol)
        temp_sum = temp_df.sum() 
        ticker_info = get_ticker_info(symbol)
        ytd_pct = temp_sum[0] / ticker_info['FloatShares'] * 100
        if ytd_pct >= pct_variable:
            empty_dict = {
                'CUSIP':cusip,
                'SYMBOL':symbol,
                'YTD_FTD_SUM':temp_sum[0],
                'YTD_PCT_FLOAT':ytd_pct
            }
            list_of_FTD_data.append(empty_dict)
    except KeyError: 
        continue
    except TypeError:
        continue   

        
list_of_FTD_data



ConnectionError: ('Connection aborted.', ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host', None, 10054, None))