In [None]:
import pandas as pd

import glob

from pathlib import Path
import csv
import pickle

import os
import requests
import json

import quandl
from iexfinance.stocks import Stock

from dotenv import load_dotenv
load_dotenv()

In [None]:
## Constants 

## Set start date variable - dataframes will be created starting from this date
start_date = '2021-01-01'
end_date = '2021-09-30'
default_range = '10m' ## Default Range for IEX functions - don't need more at the moment

# # Flag for IEX - set to false for Sandbox Data. Set to True for Real Data 
# token_status = False         ## Not useful anymore, could probably remove 

In [3]:
## Import FTD File 
ftd_df = pd.read_csv(
    Path('../Resources/ftd_all_data.csv'),
    index_col=0, parse_dates=True
)

## Import Symbol and CUSIP list 
symbol_df = pd.read_csv(
    Path('../Resources/symbol_all_list.csv'),
    index_col=0
)

In [4]:
def return_ftd_data_cusip(cusip_number):
    df = ftd_df
    df = df.reset_index()
    df = df.set_index("CUSIP")
    df = df.loc[cusip_number]
    df = df.set_index('Date')
    return df

def return_CUSIP_from_symbol(symbol):
    new_symbol_df = pd.DataFrame(symbol_df)
    new_symbol_df.dropna(inplace=True)
    new_symbol_df.reset_index(inplace=True,drop=True)
    new_symbol_df.set_index('SYMBOL',inplace=True)
    cusip_variable = new_symbol_df.loc[symbol]
    cusip_variable = cusip_variable['CUSIP']
    return cusip_variable

In [5]:
# iex_api_key = os.getenv("IEX_API_KEY")
# iex_test_api_key = os.getenv("IEX_TEST_API_KEY")

# real_token = os.environ.get('IEX_API_KEY')
# test_token = os.environ.get('IEX_TEST_API_KEY')

base_url_iex = 'https://cloud.iexapis.com/stable/'
# sandbox_url = 'https://sandbox.iexapis.com/stable/'

## QUANDL/NASDAQ 
nsdq_api_key = os.environ.get('NASDAQ_API_KEY')
base_url_nsdq = 'https://data.nasdaq.com/api/v3/datasets/FINRA/'

## IEX Status Test 
test_resp = requests.get(base_url_iex + 'status')
test_resp

<Response [200]>

In [6]:
iex_token = os.getenv("IEX_TOKEN")
iex_api_ver = os.getenv("IEX_API_VERSION")
iex_out_form = os.getenv("IEX_OUTPUT_FORMAT")

## Check sandbox vs stable 
#print(iex_token)
print(iex_api_ver)
print(iex_out_form)

stable
pandas


In [7]:
def get_short_data_QUANDL(symbol):
    string_nsdq = "FINRA/FNSQ_"+symbol
    string_nyse = "FINRA/FNYX_"+symbol
    
    df1 = quandl.get(string_nsdq,start_date=start_date,end_date=end_date,authtoken=nsdq_api_key)   ## Nasdaq
    df2 = quandl.get(string_nyse,start_date=start_date,end_date=end_date,authtoken=nsdq_api_key)   ## NYSE

    df1 = df1.rename(columns={'ShortVolume':'ShortVolumeNSDQ','TotalVolume':'TotalVolumeNSDQ'})
    df1 = df1.drop(columns={'ShortExemptVolume'})

    df2 = df2.rename(columns={'ShortVolume':'ShortVolumeNYSE','TotalVolume':'TotalVolumeNYSE'})
    df2 = df2.drop(columns={'ShortExemptVolume'})

    df3 = pd.merge(df1,df2,on='Date',how='outer')

    df3['TotalShortVolume'] = df3['ShortVolumeNSDQ'] + df3['ShortVolumeNYSE']
    df3['TotalVolume'] = df3['TotalVolumeNSDQ'] + df3['TotalVolumeNYSE']
    df3['SHORToverTOTALvolume'] = df3['TotalShortVolume'] / df3['TotalVolume'] * 100
    short_df = df3[['TotalShortVolume','TotalVolume','SHORToverTOTALvolume']]
    return short_df

In [8]:
def get_data_all(symbol):
    cusip_number = return_CUSIP_from_symbol(symbol)
    ftd_data = return_ftd_data_cusip(cusip_number)
    ftd_data = ftd_data.drop(columns={'SYMBOL'})
    iex_data = Stock(symbol).get_historical_prices(range='10m',chartCloseOnly=True)
    iex_data = iex_data[start_date:end_date]
    iex_data.reset_index(inplace=True)
    iex_data.rename(columns={'index':'Date'},inplace=True)
    iex_data['Date'] = pd.to_datetime(iex_data['Date'])
    iex_data.set_index('Date',inplace=True)
    df1 = pd.merge(iex_data, ftd_data, on='Date',how='outer')
    df1['QUANTITY_FAILS'] = df1['QUANTITY_FAILS'].fillna(0)
    df2 = get_short_data_QUANDL(symbol)
    df = pd.merge(df1,df2,on='Date',how='outer')
    return df 

## Use pickle module to export and save files
def save_obj(obj, path ):
    with open(path, 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

In [9]:
# test_symbol = 'GME'
# test_df = get_data_all(test_symbol)
# test_df

In [10]:
## Test Variables if necessary 
#symbol_test_range = symbol_df.iloc[0:201]

In [None]:
## Set iteration through symbol_df 
length_ = len(symbol_df)
iex_api_ver = os.getenv("IEX_API_VERSION")
sandbox = 'iexcloud-sandbox'
real = 'stable'
## Change as necessary to begin where left off in case of failure 
## Completed up to x = 900
## Start at x = 1000 - set Max at 9900
## All data went to incomplete up to 6600/6700 
## Finishing Data from 6700/6800 on 
## Oct 29th - 2,415,426 (49%) API Usage 
## May need to redo 0-6700 
## Done up to 10,000

## Do in 1000 increments now
## Did 10000-11000, but others wrong 
## Start 11000 now 

## Finished 12000_13000 before API calls ran out for month
## Start again from 13000/14000 next time 
x = 13000    
y = 14000
increment = 1000  ## Make sure increment is right

## If testing set to "iexcloud-sandbox"(sandbox) to not burn tokens and check your .env variables 
## If not testing, set to "stable"(real) and load your secret key as .env variable 
## Set to sandbox in order to not worry about worry about wasting IEX API tokens 
while (x <= length_) and (iex_api_ver == real):
    complete_dict = {}
    incomplete_dict = {}
    
    range_var = range(x,y)
    str_symbol1 = str(x)
    str_symbol2 = str(y)
    
    for i in range_var: 
        symbol_var = symbol_df['SYMBOL'][i]
        try:
            data_df = get_data_all(symbol_var)
        except KeyError:
            continue
        except:
            continue
        ## Check if null values, add to different dicts if null values present, or no nulls present
        bool_var = data_df.isnull().values.any()
        if bool_var == False:
            complete_dict[symbol_var] = data_df
        elif bool_var == True:
            incomplete_dict[symbol_var] = data_df

    
    pickle_path1= Path('../FilesExportComplete/data_complete_'+str_symbol1+'_'+str_symbol2+'.pkl')
    save_obj(complete_dict,pickle_path1)
    pickle_path2= Path('../FilesExportIncomplete/data_incomplete_'+str_symbol1+'_'+str_symbol2+'.pkl')
    save_obj(incomplete_dict,pickle_path2)
    
    ## Check before run, if incorrect, can waste a lot of API credits  
    x += increment
    y += increment
    
    ## Export in blocks of 100 in-case there are errors while processing data. 
    ## Can pick up where the function left off by changing x and y vars to
    ## avoid repeating API calls by doing this in order to not burn 
    ## IEX API tokens unnecessarily 
    

In [None]:
# test_df = get_data_all('GME')
# test_df

In [None]:
increment = 1000 

In [None]:
#ftd_df