In [1]:
## Read Specific Stock Data and search SEC data to combine 
## Using GME as Control/Test stock 

## Gather all data from IEX API and combine FTD file data

In [2]:
import pandas as pd

import glob

from pathlib import Path
import csv

import os
import requests
import json

import quandl

from dotenv import load_dotenv
load_dotenv()

True

In [3]:
## Load pickle for exports and imports of data  
import pickle 
def load_obj(path):
    with open(path, 'rb') as f:
        return pickle.load(f)
    
def save_obj(obj, path ):
    with open(path, 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

In [4]:
## Constants 

## Set start date variable - dataframes will be created starting from this date
start_date = '2020-01-01'
end_date = '2021-10-14'
default_date_range = '2y' ## Default Range for IEX functions - don't need more at the moment

In [5]:
## QUANDL/NASDAQ 
nsdq_api_key = os.environ.get('NASDAQ_API_KEY')
base_url_nsdq = 'https://data.nasdaq.com/api/v3/datasets/FINRA/'

iex_api_key = os.getenv("IEX_API_KEY")
iex_test_api_key = os.getenv("IEX_TEST_API_KEY")

real_token = iex_api_key
test_token = iex_test_api_key

base_url_iex = 'https://cloud.iexapis.com/stable/'
sandbox_url = 'https://sandbox.iexapis.com/stable/'

## IEX Status Test 
test_resp = requests.get(base_url_iex + 'status')
test_resp

<Response [200]>

In [6]:
token_status = real_token ## Set to either real token or test token 

In [7]:
## Import FTD File and Symbol List using Pickle 
# ftd_df = load_obj('AnalysisResources/analysis_ftd_all_data.pkl')
# symbol_df = load_obj('AnalysisResources/analysis_symbol_all_list.pkl')


## Import FTD File using CSV 
ftd_df = pd.read_csv(
    Path('AnalysisResources/analysis_ftd_all_data.csv'),
    index_col=0, parse_dates=True
)

## Import Symbol and CUSIP list using CSV
symbol_df = pd.read_csv(
    Path('AnalysisResources/analysis_symbol_all_list.csv'),
    index_col=0
)

In [8]:
## IEX Calls and Functions 

## Get IEX Chart data - Close Data Only 
def get_iex_chart(stock_ticker, chart_range=default_date_range,token=token_status):
    
    if token == test_token:
        resp_data = requests.get(sandbox_url+'stock/'+stock_ticker+'/chart/'+chart_range+'?chartCloseOnly=true&token='+test_token)
        df = pd.DataFrame(resp_data.json())
    elif token == real_token:
        resp_data = requests.get(base_url_iex+'stock/'+stock_ticker+'/chart/'+chart_range+'?chartCloseOnly=true&token='+real_token)
        df = pd.DataFrame(resp_data.json())

    df.rename(columns={'date':'Date'},inplace=True)
    df['Date'] = pd.to_datetime(df['Date'])
    df.set_index('Date',inplace=True)
    
    return df

## Get Short "Interest" Data from Quandl 
def get_short_data_QUANDL(symbol):
    string_nsdq = "FINRA/FNSQ_"+symbol
    string_nyse = "FINRA/FNYX_"+symbol
    
    df1 = quandl.get(string_nsdq,start_date=start_date,end_date=end_date,authtoken=nsdq_api_key)   ## Nasdaq
    df2 = quandl.get(string_nyse,start_date=start_date,end_date=end_date,authtoken=nsdq_api_key)   ## NYSE

    df1 = df1.rename(columns={'ShortVolume':'ShortVolumeNSDQ','TotalVolume':'TotalVolumeNSDQ'})
    #df1 = df1.drop(columns={'ShortExemptVolume'})
    df1 = df1.rename(columns={'ShortExemptVolume':'ShortExemptVolumeNSDQ'})

    df2 = df2.rename(columns={'ShortVolume':'ShortVolumeNYSE','TotalVolume':'TotalVolumeNYSE'})
    #df2 = df2.drop(columns={'ShortExemptVolume'})
    df2 = df2.rename(columns={'ShortExemptVolume':'ShortExemptVolumeNYSE'})

    df3 = pd.merge(df1,df2,on='Date',how='outer')
    
    return df3


## Return FTD Data from SEC FTD files using a Stock's CUSIP number to sort 
def return_ftd_data_cusip(cusip_number):
    df = ftd_df.copy()
    df = df.reset_index()
    df = df.set_index("CUSIP")
    df = df.loc[cusip_number]
    df = df.set_index('Date')
    return df


## Return the CUSIP symbol from the symbol_df symbol list 
def return_CUSIP_from_symbol(symbol):
    new_symbol_df = pd.DataFrame(symbol_df)
    new_symbol_df.dropna(inplace=True)
    new_symbol_df.reset_index(inplace=True,drop=True)
    new_symbol_df.set_index('SYMBOL',inplace=True)
    cusip_variable = new_symbol_df.loc[symbol]
    cusip_variable = cusip_variable['CUSIP']
    return cusip_variable

In [9]:
## Get all stock data and merge with FTD data by passing a single stock symbol 
def get_data_all(symbol, save=False):
    cusip_number = return_CUSIP_from_symbol(symbol)
    ftd_data = return_ftd_data_cusip(cusip_number)
    ftd_data = ftd_data.drop(columns={'SYMBOL'})
    iex_data = get_iex_chart(symbol)
    iex_data = iex_data[start_date:end_date]
    iex_data.reset_index(inplace=True)
    iex_data.rename(columns={'index':'Date'},inplace=True)
    iex_data['Date'] = pd.to_datetime(iex_data['Date'])
    iex_data.set_index('Date',inplace=True)
    df1 = pd.merge(iex_data, ftd_data, on='Date',how='outer')
    df1['QUANTITY_FAILS'] = df1['QUANTITY_FAILS'].fillna(0)
    df2 = get_short_data_QUANDL(symbol)
    df = pd.merge(df1,df2,on='Date',how='outer')
    
    ## Export DF based off symbol Name, and parameter
    if save==True:
        pkl_path = Path('FilesExportIndividualSymbol/'+symbol+'_combined_df.pkl')
        save_obj(df, pkl_path)
    
    
    return df 

In [10]:
test_df = get_data_all('GME',save=True)
test_df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 451 entries, 2020-01-02 to 2021-10-14
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   close                  451 non-null    float64
 1   volume                 451 non-null    int64  
 2   change                 451 non-null    float64
 3   changePercent          451 non-null    float64
 4   changeOverTime         451 non-null    float64
 5   QUANTITY_FAILS         451 non-null    float64
 6   ShortVolumeNSDQ        451 non-null    float64
 7   ShortExemptVolumeNSDQ  451 non-null    float64
 8   TotalVolumeNSDQ        451 non-null    float64
 9   ShortVolumeNYSE        451 non-null    float64
 10  ShortExemptVolumeNYSE  451 non-null    float64
 11  TotalVolumeNYSE        451 non-null    float64
dtypes: float64(11), int64(1)
memory usage: 45.8 KB


In [11]:
test_df

Unnamed: 0_level_0,close,volume,change,changePercent,changeOverTime,QUANTITY_FAILS,ShortVolumeNSDQ,ShortExemptVolumeNSDQ,TotalVolumeNSDQ,ShortVolumeNYSE,ShortExemptVolumeNYSE,TotalVolumeNYSE
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2020-01-02,6.31,4430750,0.23,0.0378,0.122776,10609.0,872127.0,8472.0,1185904.0,276386.0,2448.0,527414.0
2020-01-03,5.88,3550961,-0.43,-0.0681,0.046263,28692.0,520156.0,11999.0,832247.0,306258.0,2.0,506189.0
2020-01-06,5.85,3394777,-0.03,-0.0051,0.040925,28665.0,498788.0,20569.0,859426.0,198834.0,15.0,319375.0
2020-01-07,5.52,5235519,-0.33,-0.0564,-0.017794,0.0,604854.0,6372.0,1908324.0,351972.0,991.0,523321.0
2020-01-08,5.72,5629445,0.20,0.0362,0.017794,7039.0,746989.0,26168.0,1480915.0,529867.0,28.0,852908.0
...,...,...,...,...,...,...,...,...,...,...,...,...
2021-10-08,172.68,818207,0.56,0.0033,29.725979,3369.0,127261.0,1889.0,207559.0,22569.0,83.0,49483.0
2021-10-11,178.10,2684855,5.42,0.0314,30.690391,0.0,442600.0,4696.0,711928.0,100482.0,163.0,280789.0
2021-10-12,175.82,1233652,-2.28,-0.0128,30.284698,43995.0,201231.0,428.0,331727.0,47431.0,5.0,100709.0
2021-10-13,184.06,1930239,8.24,0.0469,31.750890,213285.0,324851.0,5530.0,534297.0,76019.0,641.0,171661.0
