In [1]:
import pandas as pd

import glob

from pathlib import Path
import csv

import os
import requests
import json

#import yfinance as yf

from dotenv import load_dotenv
load_dotenv()

True

In [2]:
## Constants 

## Set start date variable - dataframes will be created starting from this date
start_date = '2021-01-01'
end_date = '2021-09-30'
default_range = '10m' ## Default Range for IEX functions - don't need more at the moment

In [3]:
## Import FTD File 
ftd_df = pd.read_csv(
    Path('../Resources/ftd_all_data.csv'),
    index_col=0, parse_dates=True
)
#ftd_df

In [4]:
## Import Symbol and CUSIP list 
symbol_df = pd.read_csv(
    Path('../Resources/symbol_all_list.csv'),
    index_col=0
)
#symbol_df

In [5]:
def return_ftd_data_cusip(cusip_number):
    df = ftd_df
    df = df.reset_index()
    df = df.set_index("CUSIP")
    df = df.loc[cusip_number]
    df = df.set_index('Date')
    return df

def return_CUSIP_from_symbol(symbol):
    new_symbol_df = pd.DataFrame(symbol_df)
    new_symbol_df.dropna(inplace=True)
    new_symbol_df.reset_index(inplace=True,drop=True)
    new_symbol_df.set_index('SYMBOL',inplace=True)
    cusip_variable = new_symbol_df.loc[symbol]
    cusip_variable = cusip_variable['CUSIP']
    return cusip_variable

In [6]:
## IEX Setup and Test 
iex_api_key = os.getenv("IEX_API_KEY")
iex_test_api_key = os.getenv("IEX_TEST_API_KEY")

base_url = 'https://cloud.iexapis.com/stable/'
sandbox_url = 'https://sandbox.iexapis.com/stable/'

real_token = os.environ.get('IEX_API_KEY')
test_token = os.environ.get('IEX_TEST_API_KEY')

test_resp = requests.get(base_url + 'status')
test_resp

<Response [200]>

In [7]:
## IEX Functions 
def get_chart(stock_ticker, chart_range=default_range,token=False):
    
    # Token = False means using Test Token 
    if token == False:        
        resp_data = requests.get(sandbox_url+'stock/'+stock_ticker+'/chart/'+chart_range+'?token='+test_token)
    # Token = True means using Real Token and Real API calls 
    elif token == True:
        resp_data = requests.get(base_url+'stock/'+stock_ticker+'/chart/'+chart_range+'?token='+real_token)
        
    df = pd.DataFrame(resp_data.json())

    df.rename(columns={'date':'Date'},inplace=True)
    df['Date'] = pd.to_datetime(df['Date'])
    df.set_index('Date',inplace=True)
    
    return df 

def get_concise_data(stock_ticker, chart_range=default_range,token=False):
    #date_range = chart_range
    df = get_chart(stock_ticker,chart_range,token=token)
    df = df[['close','high','low','open','symbol','volume','change','changePercent']]
    df = df[start_date:end_date]  ## Dates declared at start of file 
    return df

In [8]:
def get_ftd_and_iex_for_2021(cusip,symbol,token1=False):
    ftd_data = return_ftd_data_cusip(cusip)
    iex_data = get_concise_data(symbol,token=token1)

    #iex_data.drop(columns={'symbol'},inplace=True)
    ftd_data.drop(columns={'SYMBOL'},inplace=True)
    
    df = pd.merge(iex_data, ftd_data, on='Date',how='outer')
    df['QUANTITY_FAILS'] = df['QUANTITY_FAILS'].fillna(0)
    return df

In [9]:
## Get Short Interest Data From QUANDL/NASDAQ 
nsdq_api_key = os.environ.get('NASDAQ_API_KEY')
base_url_nsdq = 'https://data.nasdaq.com/api/v3/datasets/FINRA/'

In [10]:
import quandl

In [11]:
#gme_test = quandl.get("FINRA/FNRA_GME", authtoken=nas_api_key)
gme_test1= quandl.get("FINRA/FNSQ_GME",start_date=start_date,end_date=end_date,authtoken=nsdq_api_key)   ## Nasdaq
gme_test2= quandl.get("FINRA/FNYX_GME",start_date=start_date,end_date=end_date,authtoken=nsdq_api_key)   ## NYSE

In [12]:
gme_test2

Unnamed: 0_level_0,ShortVolume,ShortExemptVolume,TotalVolume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-01-04,518521.0,5.0,1053968.0
2021-01-05,207936.0,0.0,580990.0
2021-01-06,232678.0,0.0,802968.0
2021-01-07,270475.0,0.0,785411.0
2021-01-08,450167.0,0.0,989717.0
...,...,...,...
2021-09-24,69857.0,0.0,126445.0
2021-09-27,50568.0,0.0,100093.0
2021-09-28,46280.0,0.0,98511.0
2021-09-29,46204.0,180.0,104641.0


In [13]:
gme_test1 = gme_test1.rename(columns={'ShortVolume':'ShortVolumeNSDQ','TotalVolume':'TotalVolumeNSDQ'})
gme_test1 = gme_test1.drop(columns={'ShortExemptVolume'})
gme_test1

Unnamed: 0_level_0,ShortVolumeNSDQ,TotalVolumeNSDQ
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-01-04,2290920.0,3837336.0
2021-01-05,554104.0,1714247.0
2021-01-06,610215.0,2098770.0
2021-01-07,925446.0,2486740.0
2021-01-08,1071479.0,2512653.0
...,...,...
2021-09-24,255744.0,394092.0
2021-09-27,239563.0,387860.0
2021-09-28,256612.0,374765.0
2021-09-29,277830.0,427486.0


In [14]:
gme_test2 = gme_test2.rename(columns={'ShortVolume':'ShortVolumeNYSE','TotalVolume':'TotalVolumeNYSE'})
gme_test2 = gme_test2.drop(columns={'ShortExemptVolume'})
gme_test2

Unnamed: 0_level_0,ShortVolumeNYSE,TotalVolumeNYSE
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-01-04,518521.0,1053968.0
2021-01-05,207936.0,580990.0
2021-01-06,232678.0,802968.0
2021-01-07,270475.0,785411.0
2021-01-08,450167.0,989717.0
...,...,...
2021-09-24,69857.0,126445.0
2021-09-27,50568.0,100093.0
2021-09-28,46280.0,98511.0
2021-09-29,46204.0,104641.0


In [15]:
gme_test3 = pd.merge(gme_test1,gme_test2,on='Date',how='outer')
gme_test3

Unnamed: 0_level_0,ShortVolumeNSDQ,TotalVolumeNSDQ,ShortVolumeNYSE,TotalVolumeNYSE
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2021-01-04,2290920.0,3837336.0,518521.0,1053968.0
2021-01-05,554104.0,1714247.0,207936.0,580990.0
2021-01-06,610215.0,2098770.0,232678.0,802968.0
2021-01-07,925446.0,2486740.0,270475.0,785411.0
2021-01-08,1071479.0,2512653.0,450167.0,989717.0
...,...,...,...,...
2021-09-24,255744.0,394092.0,69857.0,126445.0
2021-09-27,239563.0,387860.0,50568.0,100093.0
2021-09-28,256612.0,374765.0,46280.0,98511.0
2021-09-29,277830.0,427486.0,46204.0,104641.0


In [16]:
gme_test3.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 188 entries, 2021-01-04 to 2021-09-30
Data columns (total 4 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   ShortVolumeNSDQ  188 non-null    float64
 1   TotalVolumeNSDQ  188 non-null    float64
 2   ShortVolumeNYSE  188 non-null    float64
 3   TotalVolumeNYSE  188 non-null    float64
dtypes: float64(4)
memory usage: 7.3 KB


In [25]:
#gme_test = quandl.get("FINRA/FNRA_GME", authtoken=nas_api_key)
gme_test1= quandl.get("FINRA/FNSQ_GME",start_date=start_date,end_date=end_date,authtoken=nsdq_api_key)   ## Nasdaq
gme_test2= quandl.get("FINRA/FNYX_GME",start_date=start_date,end_date=end_date,authtoken=nsdq_api_key)   ## NYSE

gme_test1 = gme_test1.rename(columns={'ShortVolume':'ShortVolumeNSDQ','TotalVolume':'TotalVolumeNSDQ'})
gme_test1 = gme_test1.drop(columns={'ShortExemptVolume'})

gme_test2 = gme_test2.rename(columns={'ShortVolume':'ShortVolumeNYSE','TotalVolume':'TotalVolumeNYSE'})
gme_test2 = gme_test2.drop(columns={'ShortExemptVolume'})

gme_test3 = pd.merge(gme_test1,gme_test2,on='Date',how='outer')

gme_test3['TotalShortVolume'] = gme_test3['ShortVolumeNSDQ'] + gme_test3['ShortVolumeNYSE']
gme_test3['TotalVolume'] = gme_test3['TotalVolumeNSDQ'] + gme_test3['TotalVolumeNYSE']
gme_test3['SHORToverTOTALvolume'] = gme_test3['TotalShortVolume'] / gme_test3['TotalVolume'] * 100
test_short_df = gme_test3[['TotalShortVolume','TotalVolume','SHORToverTOTALvolume']]
test_short_df

Unnamed: 0_level_0,TotalShortVolume,TotalVolume,SHORToverTOTALvolume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-01-04,2809441.0,4891304.0,57.437465
2021-01-05,762040.0,2295237.0,33.200929
2021-01-06,842893.0,2901738.0,29.047867
2021-01-07,1195921.0,3272151.0,36.548466
2021-01-08,1521646.0,3502370.0,43.446181
...,...,...,...
2021-09-24,325601.0,520537.0,62.550981
2021-09-27,290131.0,487953.0,59.458800
2021-09-28,302892.0,473276.0,63.999020
2021-09-29,324034.0,532127.0,60.894110


Unnamed: 0_level_0,TotalShortVolume,TotalVolume,SHORToverTOTALvolume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-01-04,2809441.0,4891304.0,57.437465
2021-01-05,762040.0,2295237.0,33.200929
2021-01-06,842893.0,2901738.0,29.047867
2021-01-07,1195921.0,3272151.0,36.548466
2021-01-08,1521646.0,3502370.0,43.446181
...,...,...,...
2021-09-24,325601.0,520537.0,62.550981
2021-09-27,290131.0,487953.0,59.458800
2021-09-28,302892.0,473276.0,63.999020
2021-09-29,324034.0,532127.0,60.894110


In [19]:
# ## Return CUSIP number from Symbol 
# new_symbol_df = pd.DataFrame(symbol_df)
# new_symbol_df.dropna(inplace=True)
# new_symbol_df.reset_index(inplace=True,drop=True)
# new_symbol_df.set_index('SYMBOL',inplace=True)
# new_symbol_df.loc['GME']

CUSIP    36467W109
Name: GME, dtype: object

In [20]:
gme_cusip = return_CUSIP_from_symbol('GME')
gme_cusip

'36467W109'

In [22]:
def return_data_using_symbol(symbol,token=False):
    cusip_number = return_CUSIP_from_symbol(symbol)
    df = get_ftd_and_iex_for_2021(cusip_number,symbol,token1=token)
    return df

In [23]:
df_return = return_data_using_symbol('GME')
df_return

Unnamed: 0_level_0,close,high,low,open,symbol,volume,change,changePercent,QUANTITY_FAILS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2021-01-04,17.88,19.8000,17.50,19.00,GME,10465498,-1.598204,-0.0869,182269.0
2021-01-05,17.58,18.6982,17.58,17.76,GME,4975431,0.121195,0.0070,490723.0
2021-01-06,18.58,19.8300,17.44,17.44,GME,6217987,1.035208,0.0600,772112.0
2021-01-07,18.91,19.7200,18.10,18.95,GME,6368200,-0.286810,-0.0159,799328.0
2021-01-08,17.92,18.6000,17.54,18.29,GME,6519863,-0.407155,-0.0217,555658.0
...,...,...,...,...,...,...,...,...,...
2021-09-24,185.31,195.0053,185.31,194.00,GME,1713382,-6.358259,-0.0326,699.0
2021-09-27,196.50,197.5300,188.28,193.21,GME,1502320,4.515915,0.0241,10819.0
2021-09-28,180.50,195.2418,180.50,194.00,GME,1852485,-11.175637,-0.0577,4718.0
2021-09-29,181.91,188.8500,181.91,186.44,GME,1914895,-2.704940,-0.0150,12315.0


In [28]:
def get_short_data_QUANDL(symbol):
    string_nsdq = "FINRA/FNSQ_"+symbol
    string_nyse = "FINRA/FNYX_"+symbol
    
    df1 = quandl.get(string_nsdq,start_date=start_date,end_date=end_date,authtoken=nsdq_api_key)   ## Nasdaq
    df2 = quandl.get(string_nyse,start_date=start_date,end_date=end_date,authtoken=nsdq_api_key)   ## NYSE

    df1 = df1.rename(columns={'ShortVolume':'ShortVolumeNSDQ','TotalVolume':'TotalVolumeNSDQ'})
    df1 = df1.drop(columns={'ShortExemptVolume'})

    df2 = df2.rename(columns={'ShortVolume':'ShortVolumeNYSE','TotalVolume':'TotalVolumeNYSE'})
    df2 = df2.drop(columns={'ShortExemptVolume'})

    df3 = pd.merge(df1,df2,on='Date',how='outer')

    df3['TotalShortVolume'] = df3['ShortVolumeNSDQ'] + df3['ShortVolumeNYSE']
    df3['TotalVolume'] = df3['TotalVolumeNSDQ'] + df3['TotalVolumeNYSE']
    df3['SHORToverTOTALvolume'] = df3['TotalShortVolume'] / df3['TotalVolume'] * 100
    short_df = df3[['TotalShortVolume','TotalVolume','SHORToverTOTALvolume']]
    return short_df

In [29]:
test_short_df = get_short_data_QUANDL('GME')
test_short_df

Unnamed: 0_level_0,TotalShortVolume,TotalVolume,SHORToverTOTALvolume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-01-04,2809441.0,4891304.0,57.437465
2021-01-05,762040.0,2295237.0,33.200929
2021-01-06,842893.0,2901738.0,29.047867
2021-01-07,1195921.0,3272151.0,36.548466
2021-01-08,1521646.0,3502370.0,43.446181
...,...,...,...
2021-09-24,325601.0,520537.0,62.550981
2021-09-27,290131.0,487953.0,59.458800
2021-09-28,302892.0,473276.0,63.999020
2021-09-29,324034.0,532127.0,60.894110


In [30]:
df_full = pd.merge(df_return,test_short_df,on='Date',how='outer')
df_full

Unnamed: 0_level_0,close,high,low,open,symbol,volume,change,changePercent,QUANTITY_FAILS,TotalShortVolume,TotalVolume,SHORToverTOTALvolume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2021-01-04,17.88,19.8000,17.50,19.00,GME,10465498,-1.598204,-0.0869,182269.0,2809441.0,4891304.0,57.437465
2021-01-05,17.58,18.6982,17.58,17.76,GME,4975431,0.121195,0.0070,490723.0,762040.0,2295237.0,33.200929
2021-01-06,18.58,19.8300,17.44,17.44,GME,6217987,1.035208,0.0600,772112.0,842893.0,2901738.0,29.047867
2021-01-07,18.91,19.7200,18.10,18.95,GME,6368200,-0.286810,-0.0159,799328.0,1195921.0,3272151.0,36.548466
2021-01-08,17.92,18.6000,17.54,18.29,GME,6519863,-0.407155,-0.0217,555658.0,1521646.0,3502370.0,43.446181
...,...,...,...,...,...,...,...,...,...,...,...,...
2021-09-24,185.31,195.0053,185.31,194.00,GME,1713382,-6.358259,-0.0326,699.0,325601.0,520537.0,62.550981
2021-09-27,196.50,197.5300,188.28,193.21,GME,1502320,4.515915,0.0241,10819.0,290131.0,487953.0,59.458800
2021-09-28,180.50,195.2418,180.50,194.00,GME,1852485,-11.175637,-0.0577,4718.0,302892.0,473276.0,63.999020
2021-09-29,181.91,188.8500,181.91,186.44,GME,1914895,-2.704940,-0.0150,12315.0,324034.0,532127.0,60.894110


In [31]:
def get_all_data(symbol,token=False):
    df1 = return_data_using_symbol(symbol,token=token)
    df2 = get_short_data_QUANDL(symbol)
    df = pd.merge(df1,df2,on='Date',how='outer')
    return df