In [1]:
import pandas as pd

import glob

from pathlib import Path
import csv

import os
import requests
import json

import quandl
from iexfinance.stocks import Stock

from dotenv import load_dotenv
load_dotenv()

True

In [2]:
## Load pickle for exports and imports of data  
import pickle 
def load_obj(path):
    with open(path, 'rb') as f:
        return pickle.load(f)
    
def save_obj(obj, path ):
    with open(path, 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

In [3]:
## Constants 

## Set start date variable - dataframes will be created starting from this date
start_date = '2020-01-01'
end_date = '2021-10-14'
default_date_range = '2y' ## Default Range for IEX functions - don't need more at the moment

In [4]:
## Import FTD File and Symbol List using Pickle 
# ftd_df = load_obj('AnalysisResources/analysis_ftd_all_data.pkl')
# symbol_df = load_obj('AnalysisResources/analysis_symbol_all_list.pkl')


## Import FTD File using CSV 
ftd_df = pd.read_csv(
    Path('AnalysisResources/analysis_ftd_all_data.csv'),
    index_col=0, parse_dates=True
)

## Import Symbol and CUSIP list using CSV
symbol_df = pd.read_csv(
    Path('AnalysisResources/analysis_symbol_all_list.csv'),
    index_col=0
)

In [5]:
## QUANDL/NASDAQ 
nsdq_api_key = os.environ.get('NASDAQ_API_KEY')
base_url_nsdq = 'https://data.nasdaq.com/api/v3/datasets/FINRA/'

## Test iexcloud setup and stability 
iex_token = os.getenv("IEX_TOKEN")
iex_api_ver = os.getenv("IEX_API_VERSION")
iex_out_form = os.getenv("IEX_OUTPUT_FORMAT")

## Check sandbox vs stable 
if iex_api_ver == 'stable':
    print('stable IEX API')
elif iex_api_ver == 'iexcloud-sandbox':
    print('iexcloud-sandbox TESTING')
else:
    print('PROBLEM')

## Print output form from iexfinance library and API 
print(iex_out_form)

base_url_iex = 'https://cloud.iexapis.com/stable/'
# sandbox_url = 'https://sandbox.iexapis.com/stable/'

## IEX Status Test 
test_resp = requests.get(base_url_iex + 'status')
test_resp

iexcloud-sandbox TESTING
pandas


<Response [200]>

In [6]:
def get_sorted_short_data_QUANDL(symbol):
    string_nsdq = "FINRA/FNSQ_"+symbol
    string_nyse = "FINRA/FNYX_"+symbol
    
    df1 = quandl.get(string_nsdq,start_date=start_date,end_date=end_date,authtoken=nsdq_api_key)   ## Nasdaq
    df2 = quandl.get(string_nyse,start_date=start_date,end_date=end_date,authtoken=nsdq_api_key)   ## NYSE

    df1 = df1.rename(columns={'ShortVolume':'ShortVolumeNSDQ','TotalVolume':'TotalVolumeNSDQ'})
    df1 = df1.drop(columns={'ShortExemptVolume'})

    df2 = df2.rename(columns={'ShortVolume':'ShortVolumeNYSE','TotalVolume':'TotalVolumeNYSE'})
    df2 = df2.drop(columns={'ShortExemptVolume'})

    df3 = pd.merge(df1,df2,on='Date',how='outer')

    df3['TotalShortVolume'] = df3['ShortVolumeNSDQ'] + df3['ShortVolumeNYSE']
    df3['TotalVolume'] = df3['TotalVolumeNSDQ'] + df3['TotalVolumeNYSE']
    df3['SHORToverTOTALvolume'] = df3['TotalShortVolume'] / df3['TotalVolume'] * 100
    short_df = df3[['TotalShortVolume','TotalVolume','SHORToverTOTALvolume']]
    return short_df

def get_short_data_QUANDL(symbol):
    string_nsdq = "FINRA/FNSQ_"+symbol
    string_nyse = "FINRA/FNYX_"+symbol
    
    df1 = quandl.get(string_nsdq,start_date=start_date,end_date=end_date,authtoken=nsdq_api_key)   ## Nasdaq
    df2 = quandl.get(string_nyse,start_date=start_date,end_date=end_date,authtoken=nsdq_api_key)   ## NYSE

    df1 = df1.rename(columns={'ShortVolume':'ShortVolumeNSDQ','TotalVolume':'TotalVolumeNSDQ'})
    #df1 = df1.drop(columns={'ShortExemptVolume'})
    df1 = df1.rename(columns={'ShortExemptVolume':'ShortExemptVolumeNSDQ'})

    df2 = df2.rename(columns={'ShortVolume':'ShortVolumeNYSE','TotalVolume':'TotalVolumeNYSE'})
    #df2 = df2.drop(columns={'ShortExemptVolume'})
    df2 = df2.rename(columns={'ShortExemptVolume':'ShortExemptVolumeNYSE'})

    df3 = pd.merge(df1,df2,on='Date',how='outer')
    
    return df3

In [7]:
# test_short = get_short_data_QUANDL('GME')
# test_short

In [8]:
def get_data_all(symbol):
    cusip_number = return_CUSIP_from_symbol(symbol)
    ftd_data = return_ftd_data_cusip(cusip_number)
    ftd_data = ftd_data.drop(columns={'SYMBOL'})
    iex_data = Stock(symbol).get_historical_prices(range=default_date_range,chartCloseOnly=True)
    iex_data = iex_data[start_date:end_date]
    iex_data.reset_index(inplace=True)
    iex_data.rename(columns={'index':'Date'},inplace=True)
    iex_data['Date'] = pd.to_datetime(iex_data['Date'])
    iex_data.set_index('Date',inplace=True)
    df1 = pd.merge(iex_data, ftd_data, on='Date',how='outer')
    df1['QUANTITY_FAILS'] = df1['QUANTITY_FAILS'].fillna(0)
    df2 = get_short_data_QUANDL(symbol)
    df = pd.merge(df1,df2,on='Date',how='outer')
    return df 

def return_ftd_data_cusip(cusip_number):
    df = ftd_df.copy()
    df = df.reset_index()
    df = df.set_index("CUSIP")
    df = df.loc[cusip_number]
    df = df.set_index('Date')
    return df

def return_CUSIP_from_symbol(symbol):
    new_symbol_df = pd.DataFrame(symbol_df)
    new_symbol_df.dropna(inplace=True)
    new_symbol_df.reset_index(inplace=True,drop=True)
    new_symbol_df.set_index('SYMBOL',inplace=True)
    cusip_variable = new_symbol_df.loc[symbol]
    cusip_variable = cusip_variable['CUSIP']
    return cusip_variable

In [9]:
# test_1 = return_CUSIP_from_symbol('GME')
# test_1

In [10]:
# test_2 = return_ftd_data_cusip(test_1)
# test_2

In [14]:
test_df = get_data_all('TSLA')

In [15]:
test_df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 451 entries, 2020-01-02 to 2021-10-14
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   close                  451 non-null    object 
 1   volume                 451 non-null    object 
 2   change                 451 non-null    object 
 3   changePercent          451 non-null    object 
 4   changeOverTime         451 non-null    object 
 5   QUANTITY_FAILS         451 non-null    float64
 6   ShortVolumeNSDQ        451 non-null    float64
 7   ShortExemptVolumeNSDQ  451 non-null    float64
 8   TotalVolumeNSDQ        451 non-null    float64
 9   ShortVolumeNYSE        451 non-null    float64
 10  ShortExemptVolumeNYSE  451 non-null    float64
 11  TotalVolumeNYSE        451 non-null    float64
dtypes: float64(7), object(5)
memory usage: 45.8+ KB


In [16]:
test_df

Unnamed: 0_level_0,close,volume,change,changePercent,changeOverTime,QUANTITY_FAILS,ShortVolumeNSDQ,ShortExemptVolumeNSDQ,TotalVolumeNSDQ,ShortVolumeNYSE,ShortExemptVolumeNYSE,TotalVolumeNYSE
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2020-01-02,86.597,47885568,2.417108,0.0293,0.258069,26769.0,1289634.0,6660.0,2152744.0,1207329.0,115.0,2072736.0
2020-01-03,90.65,93195035,2.585122,0.03,0.287183,1152.0,2731221.0,15933.0,4256279.0,2156040.0,100.0,3779597.0
2020-01-06,92.742,52394859,1.716592,0.0194,0.316278,25009.0,1576885.0,3454.0,2404140.0,1450162.0,425.0,2576385.0
2020-01-07,93.868,95277819,3.636696,0.0394,0.36225,16916.0,2806009.0,10404.0,4239258.0,2469900.0,504.0,4181320.0
2020-01-08,101.509,162294668,4.783,0.0495,0.44567,4272.0,4832879.0,32720.0,7490583.0,4090595.0,206.0,7075044.0
...,...,...,...,...,...,...,...,...,...,...,...,...
2021-10-08,818.01,17351456,-8.189639,-0.0104,10.774641,0.0,1708713.0,16924.0,4466091.0,1011138.0,1564.0,1885826.0
2021-10-11,823.61,14772046,6.665781,0.0085,10.739116,0.0,1586746.0,24521.0,4402787.0,924216.0,1337.0,1842014.0
2021-10-12,843.22,22063044,14.026866,0.0176,10.760529,52034.0,2445515.0,24788.0,6300303.0,1646257.0,3327.0,2880190.0
2021-10-13,823.78,14491125,5.478787,0.0068,10.873055,0.0,1413386.0,22644.0,3520153.0,812099.0,1915.0,1585795.0
