In [1]:
import pandas as pd

import glob

from pathlib import Path
import csv
import pickle

import os
import requests
import json

import quandl
#import yfinance as yf

from dotenv import load_dotenv
load_dotenv()

True

In [2]:
## Constants 

## Set start date variable - dataframes will be created starting from this date
start_date = '2021-01-01'
end_date = '2021-09-30'
default_range = '10m' ## Default Range for IEX functions - don't need more at the moment

# Flag for IEX - set to false for Sandbox Data. Set to True for Real Data 
token_status = False

In [3]:
## Import FTD File 
ftd_df = pd.read_csv(
    Path('../Resources/ftd_all_data.csv'),
    index_col=0, parse_dates=True
)

## Import Symbol and CUSIP list 
symbol_df = pd.read_csv(
    Path('../Resources/symbol_all_list.csv'),
    index_col=0
)

In [4]:
def return_ftd_data_cusip(cusip_number):
    df = ftd_df
    df = df.reset_index()
    df = df.set_index("CUSIP")
    df = df.loc[cusip_number]
    df = df.set_index('Date')
    return df

def return_CUSIP_from_symbol(symbol):
    new_symbol_df = pd.DataFrame(symbol_df)
    new_symbol_df.dropna(inplace=True)
    new_symbol_df.reset_index(inplace=True,drop=True)
    new_symbol_df.set_index('SYMBOL',inplace=True)
    cusip_variable = new_symbol_df.loc[symbol]
    cusip_variable = cusip_variable['CUSIP']
    return cusip_variable

In [5]:
## API Setup and Test 
iex_api_key = os.getenv("IEX_API_KEY")
iex_test_api_key = os.getenv("IEX_TEST_API_KEY")

base_url = 'https://cloud.iexapis.com/stable/'
sandbox_url = 'https://sandbox.iexapis.com/stable/'

real_token = os.environ.get('IEX_API_KEY')
test_token = os.environ.get('IEX_TEST_API_KEY')

## QUANDL/NASDAQ 
nsdq_api_key = os.environ.get('NASDAQ_API_KEY')
base_url_nsdq = 'https://data.nasdaq.com/api/v3/datasets/FINRA/'

test_resp = requests.get(base_url + 'status')
test_resp

<Response [200]>

In [6]:
## IEX Functions 
def get_chart(stock_ticker, chart_range=default_range,token=False):
    
    # Token = False means using Test Token 
    if token == False:        
        resp_data = requests.get(sandbox_url+'stock/'+stock_ticker+'/chart/'+chart_range+'?token='+test_token)
    # Token = True means using Real Token and Real API calls 
    elif token == True:
        resp_data = requests.get(base_url+'stock/'+stock_ticker+'/chart/'+chart_range+'?token='+real_token)
        
    df = pd.DataFrame(resp_data.json())

    df.rename(columns={'date':'Date'},inplace=True)
    df['Date'] = pd.to_datetime(df['Date'])
    df.set_index('Date',inplace=True)
    
    return df 

def get_concise_data(stock_ticker, chart_range=default_range,token=False):
    #date_range = chart_range
    df = get_chart(stock_ticker,chart_range,token=token)
    df = df[['close','high','low','open','symbol','volume','change','changePercent']]
    df = df.rename(columns={'volume':'volume_IEX'})
    df = df[start_date:end_date]  ## Dates declared at start of file 
    return df

In [7]:
def get_ftd_and_iex_for_2021(cusip,symbol,token=False):
    ftd_data = return_ftd_data_cusip(cusip)
    iex_data = get_concise_data(symbol,token=token)

    #iex_data.drop(columns={'symbol'},inplace=True)
    ftd_data.drop(columns={'SYMBOL'},inplace=True)
    
    df = pd.merge(iex_data, ftd_data, on='Date',how='outer')
    df['QUANTITY_FAILS'] = df['QUANTITY_FAILS'].fillna(0)
    return df

def return_data_using_symbol(symbol,token=False):
    cusip_number = return_CUSIP_from_symbol(symbol)
    df = get_ftd_and_iex_for_2021(cusip_number,symbol,token=token)
    return df

def get_short_data_QUANDL(symbol):
    string_nsdq = "FINRA/FNSQ_"+symbol
    string_nyse = "FINRA/FNYX_"+symbol
    
    df1 = quandl.get(string_nsdq,start_date=start_date,end_date=end_date,authtoken=nsdq_api_key)   ## Nasdaq
    df2 = quandl.get(string_nyse,start_date=start_date,end_date=end_date,authtoken=nsdq_api_key)   ## NYSE

    df1 = df1.rename(columns={'ShortVolume':'ShortVolumeNSDQ','TotalVolume':'TotalVolumeNSDQ'})
    df1 = df1.drop(columns={'ShortExemptVolume'})

    df2 = df2.rename(columns={'ShortVolume':'ShortVolumeNYSE','TotalVolume':'TotalVolumeNYSE'})
    df2 = df2.drop(columns={'ShortExemptVolume'})

    df3 = pd.merge(df1,df2,on='Date',how='outer')

    df3['TotalShortVolume'] = df3['ShortVolumeNSDQ'] + df3['ShortVolumeNYSE']
    df3['TotalVolume'] = df3['TotalVolumeNSDQ'] + df3['TotalVolumeNYSE']
    df3['SHORToverTOTALvolume'] = df3['TotalShortVolume'] / df3['TotalVolume'] * 100
    short_df = df3[['TotalShortVolume','TotalVolume','SHORToverTOTALvolume']]
    return short_df


## Should only need to use this function to get all data into a DataFrame 
def get_all_data_using_symbol(symbol,token=token_status):
    df1 = return_data_using_symbol(symbol,token=token)
    df2 = get_short_data_QUANDL(symbol)
    df = pd.merge(df1,df2,on='Date',how='outer')
    return df

In [8]:
# test_gme_df = get_all_data_using_symbol('GME',token=False)
# test_gme_df.info()

In [9]:
# symbol_ = symbol_df.iloc[14000]
# symbol_var = symbol_['SYMBOL']
# symbol_var

In [10]:
# test_df = get_all_data_using_symbol('XRT',token=token_status)
# test_df.info()

# ## Check if any null values in returned DataFrame 
# bool_var = test_df.isnull().values.any()
# bool_var

In [11]:
# test_df.isnull().values.any()

In [12]:
# symbol_test_range = symbol_df.iloc[0:50]

In [13]:
# length_ = len(symbol_test_range)
# length_

In [14]:
## Test Iteration 

# range_var = range(length_)
# my_dict = {} ## Declare Empty dict to add Data into 
# token_status = False

# for i in range_var: 
#     symbol_vrb = symbol_test_range['SYMBOL'][i]
#     try:
#         dfsymbol = get_all_data_using_symbol(symbol_vrb,token=token_status)
#     except KeyError:
#         continue
#     except:
#         continue
    
#     bool_var = dfsymbol.isnull().values.any()
#     if bool_var == False:
#         my_dict[symbol_vrb] = dfsymbol
#     elif bool_var == True:
#         continue
        
#my_dict

In [15]:
# for i in my_dict:
#     print(i)

In [16]:
## Get Key Values in my_dict using "index" or ordering 

# Key_5 = list(my_dict.keys())[5]
# my_dict[Key_5]

In [17]:
## Use pickle module to export and save files
def save_obj(obj, path ):
    with open(path, 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

In [18]:
## Test Variables if necessary 
symbol_test_range = symbol_df.iloc[0:401]

## Token Status for IEX 
token_status = False
## Set iteration through list 
length_ = len(symbol_df)

#range_var = range(length_)
my_dict = {} ## Declare Empty dict to add Data into 

## Change as necessary to begin where left off in case of failure 
x = 0
y = 100

# for i in range_var: 
#     symbol_vrb = symbol_test_range['SYMBOL'][i]
#     try:
#         dfsymbol = get_all_data_using_symbol(symbol_vrb,token=token_status)
#     except KeyError:
#         continue
#     except:
#         continue
    
#     bool_var = dfsymbol.isnull().values.any()
#     if bool_var == False:
#         my_dict[symbol_vrb] = dfsymbol
#     elif bool_var == True:
#         continue


### This one  
# while x <= (length_ - 200):
#     range_var = range(x,y)
#     str_symbol1 = str(x)
#     str_symbol2 = str(y)
    
#     for i in range_var: 
#         symbol_vrb = symbol_test_range['SYMBOL'][i]
#         try:
#             dfsymbol = get_all_data_using_symbol(symbol_vrb,token=token_status)
#         except KeyError:
#             continue
#         except:
#             continue

#         bool_var = dfsymbol.isnull().values.any()
#         if bool_var == False:
#             my_dict[symbol_vrb] = dfsymbol
#         elif bool_var == True:
#             continue
    
#     ## Export in blocks of 100 in-case there are errors while processing data. 
#     ## Can pick up where the function left off by changing x and y vars to
#     ## avoid repeating API calls by doing this in order to not burn 
#     ## IEX API tokens unnecessarily 
#     ##path_var = Path('../FilesExportAll/data_'+str_symbol1+'_'+str_symbol2+'.csv')

# #     with open(path_var, 'w') as f:
# #         for key in my_dict.keys():
# #             f.write("%s,%s\n"%(key,my_dict[key]))
    
#     pickle_path= Path('../FilesExportAll/data_'+str_symbol1+'_'+str_symbol2+'.pkl')
#     save_obj(my_dict,pickle_path)
    
#     x += 100
#     y += 100

In [19]:
from iexfinance.stocks import Stock
#IEX_API_VERSION
# iex_api_key = os.getenv("IEX_API_KEY")
# iex_test_api_key = os.getenv("IEX_TEST_API_KEY")

iex_token = os.getenv("IEX_TOKEN")
iex_api_ver = os.getenv("IEX_API_VERSION")
iex_out_form = os.getenv("IEX_OUTPUT_FORMAT")

iex_api_ver

'iexcloud-sandbox'

In [32]:
#gme = Stock("GME")
test_df = Stock("GME").get_historical_prices(range='10m',chartCloseOnly=True)
test_df

Unnamed: 0,close,volume,change,changePercent,changeOverTime
2020-12-29,20.24,9323113,0,0,0
2020-12-30,20.08,6114664,-0.125915,-0.0063,-0.00635
2020-12-31,19.59,7101007,-0.420674,-0.0224,-0.029139
2021-01-04,17.91,10400598,-1.617271,-0.0886,-0.113251
2021-01-05,18.06,5110444,0.124002,0.007,-0.104018
...,...,...,...,...,...
2021-10-22,171.4,2836357,-12.139332,-0.0671,7.87859
2021-10-25,180.58,1492460,4.354468,0.0256,8.108587
2021-10-26,181.11,2196690,3.996679,0.023,8.276021
2021-10-27,174.99,1132766,-4.414729,-0.0244,8.314767


In [33]:
test_df.reset_index(inplace=True)
test_df.rename(columns={'index':'Date'},inplace=True)
test_df['Date'] = pd.to_datetime(test_df['Date'])
test_df.set_index('Date',inplace=True)
test_df

Unnamed: 0_level_0,close,volume,change,changePercent,changeOverTime
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-12-29,20.24,9323113,0,0,0
2020-12-30,20.08,6114664,-0.125915,-0.0063,-0.00635
2020-12-31,19.59,7101007,-0.420674,-0.0224,-0.029139
2021-01-04,17.91,10400598,-1.617271,-0.0886,-0.113251
2021-01-05,18.06,5110444,0.124002,0.007,-0.104018
...,...,...,...,...,...
2021-10-22,171.4,2836357,-12.139332,-0.0671,7.87859
2021-10-25,180.58,1492460,4.354468,0.0256,8.108587
2021-10-26,181.11,2196690,3.996679,0.023,8.276021
2021-10-27,174.99,1132766,-4.414729,-0.0244,8.314767


In [37]:
def get_data_all_actual(symbol):
    cusip_number = return_CUSIP_from_symbol(symbol)
    ftd_data = return_ftd_data_cusip(cusip_number)
    iex_data = Stock(symbol).get_historical_prices(range='10m',chartCloseOnly=True)
    iex_data = iex_data[start_date:end_date]
    iex_data.reset_index(inplace=True)
    iex_data.rename(columns={'index':'Date'},inplace=True)
    iex_data['Date'] = pd.to_datetime(iex_data['Date'])
    iex_data.set_index('Date',inplace=True)
    df1 = pd.merge(iex_data, ftd_data, on='Date',how='outer')
    df1['QUANTITY_FAILS'] = df1['QUANTITY_FAILS'].fillna(0)
    df2 = get_short_data_QUANDL(symbol)
    df = pd.merge(df1,df2,on='Date',how='outer')
    return df 

In [38]:
test_symbol='GME'
complete_dict = {}
incomplete_dict = {}

test_df = get_data_all_actual(test_symbol)
bool_var = test_df.isnull().values.any()
if bool_var == False:
    complete_dict[symbol] = test_df
elif bool_var == True:
    incomplete_dict[symbol] = test_df

test_df

Unnamed: 0_level_0,close,volume,change,changePercent,changeOverTime,SYMBOL,QUANTITY_FAILS,TotalShortVolume,TotalVolume,SHORToverTOTALvolume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2021-01-04,17.75,10073840,-1.638393,-0.0878,-0.110053,GME,182269.0,2809441.0,4891304.0,57.437465
2021-01-05,17.79,5125437,0.120876,0.007,-0.105572,GME,490723.0,762040.0,2295237.0,33.200929
2021-01-06,18.88,6056437,1.001044,0.059,-0.05309,GME,772112.0,842893.0,2901738.0,29.047867
2021-01-07,18.55,6349803,-0.28912,-0.0153,-0.070098,GME,799328.0,1195921.0,3272151.0,36.548466
2021-01-08,18.29,6703513,-0.405459,-0.0221,-0.090586,GME,555658.0,1521646.0,3502370.0,43.446181
...,...,...,...,...,...,...,...,...,...,...
2021-09-24,187.2,1713446,-6.214616,-0.033,8.850659,GME,699.0,325601.0,520537.0,62.550981
2021-09-27,190.86,1482251,4.401893,0.0238,8.93394,GME,10819.0,290131.0,487953.0,59.458800
2021-09-28,182.6,1845747,-10.965149,-0.0597,8.32742,GME,4718.0,302892.0,473276.0,63.999020
2021-09-29,181.7,1953332,-2.723278,-0.016,8.435184,GME,12315.0,324034.0,532127.0,60.894110
