In [1]:
## Read SEC data and append data from APIs 

In [2]:
import pandas as pd

from pathlib import Path
import csv

import os
import requests
import json

import quandl

from dotenv import load_dotenv
load_dotenv()

True

In [3]:
## Constants 

## Set start date variable - dataframes will be created starting from this date
start_date = '2016-01-01'
end_date = '2021-10-29'
default_date_range = '71m' ## Default Range for IEX functions - don't need more at the moment

In [4]:
## QUANDL/NASDAQ 
nsdq_api_key = os.environ.get('NASDAQ_API_KEY')
base_url_nsdq = 'https://data.nasdaq.com/api/v3/datasets/FINRA/'

iex_api_key = os.getenv("IEX_API_KEY")
iex_test_api_key = os.getenv("IEX_TEST_API_KEY")

## Redundant Assignment but improves Readability throughout code 
real_token = iex_api_key
test_token = iex_test_api_key

base_url_iex = 'https://cloud.iexapis.com/stable/'
sandbox_url = 'https://sandbox.iexapis.com/stable/'

## IEX Status Test 
test_resp = requests.get(base_url_iex + 'status')
test_resp

<Response [200]>

In [21]:
## Get Short "Interest" Data from Quandl 
def get_short_data_QUANDL(symbol):
    string_nsdq = "FINRA/FNSQ_"+symbol
    string_nyse = "FINRA/FNYX_"+symbol
    
    df1 = quandl.get(string_nsdq,start_date=start_date,end_date=end_date,authtoken=nsdq_api_key)   ## Nasdaq
    df2 = quandl.get(string_nyse,start_date=start_date,end_date=end_date,authtoken=nsdq_api_key)   ## NYSE

    df1 = df1.rename(columns={'ShortVolume':'ShortVolumeNSDQ','TotalVolume':'TotalVolumeNSDQ'})
    #df1 = df1.drop(columns={'ShortExemptVolume'})
    df1 = df1.rename(columns={'ShortExemptVolume':'ShortExemptVolumeNSDQ'})

    df2 = df2.rename(columns={'ShortVolume':'ShortVolumeNYSE','TotalVolume':'TotalVolumeNYSE'})
    #df2 = df2.drop(columns={'ShortExemptVolume'})
    df2 = df2.rename(columns={'ShortExemptVolume':'ShortExemptVolumeNYSE'})

    df3 = pd.merge(df1,df2,on='Date',how='outer')
    #df3 = df3.fillna(0)
    
    return df3


## Return FTD Data from SEC FTD files using a Stock's CUSIP number to sort 
def return_ftd_data_cusip(cusip_number):
    df = ftd_df.copy()
    df.set_index("CUSIP",inplace=True)
    df = df.loc[cusip_number]
    df['Date'] = pd.to_datetime(df['Date'])
    df = df.set_index('Date')
    return df

## Return the CUSIP symbol from the symbol_df symbol list 
def return_CUSIP_from_symbol(symbol):
    df = symbol_df.copy()
    df.set_index('SYMBOL',inplace=True)
    cusip_variable = df.loc[symbol]
    cusip_variable = cusip_variable['CUSIP']
    return cusip_variable

def return_ftd_data_symbol(symbol):
    cusip_number = return_CUSIP_from_symbol(symbol)
    df = return_ftd_data_cusip(cusip_number)
    return df

In [6]:
## FMP Constants 
fmpbase_urlv3 = 'https://fmpcloud.io/api/v3/'
fmpbase_urlv4 = 'https://fmpcloud.io/api/v4/'
api_key = os.getenv("FMP_CLOUD_API_KEY")

## FMP Functions 
def get_FMP_historical_data(symbol, startDate=start_date, endDate=end_date, apiKey=api_key):
    url_hist_price = fmpbase_urlv3+'historical-price-full/'
    url_hist_query_with_date = url_hist_price+symbol+'?from='+startDate+'&to='+endDate+'&apikey='+apiKey
    resp_data = requests.get(url_hist_query_with_date)
    json_ = resp_data.json()
    data = json_['historical']
    df = pd.DataFrame(data)
    df.rename(columns={'date':'Date'},inplace=True)
    df['Date'] = pd.to_datetime(df['Date'])
    df = df.reindex(index=df.index[::-1]) ## Reverse the DataFrame 
    df.set_index('Date',inplace=True)
    df.drop(columns='label',inplace=True)
    return df

api_key = os.getenv("FMP_CLOUD_API_KEY")
def get_float_data_FMP(symbol):
    url_float_shares = fmpbase_urlv4+'shares_float?symbol='
    url_query_float_data = url_float_shares+symbol+'&apikey='+api_key
    resp_data = requests.get(url_query_float_data)
    #df = pd.DataFrame(resp_data.json())
    json_ = resp_data.json()
    return json_[0]

def get_company_profile_FMP_json(symbol):
    ## https://fmpcloud.io/api/v3/profile/AAPL?apikey='yourkeyhere'
    url_company_profile_url = fmpbase_urlv3+'profile/'+symbol+'?apikey='+api_key
    resp_data = requests.get(url_company_profile_url)
    json_response = resp_data.json()
    return json_response[0]

def save_and_export_raw_df_csv(data, symbol):
    path = ('../FilesExportIndividualStockDFs/'+symbol+'_combined_df.csv')
    data.to_csv(path)

In [7]:
## Load IEX to get ETF statistics 

token_status = test_token ## Set to either real token or test token for IEX

def get_IEX_statistics(stock_ticker, token=token_status):
    if token == test_token:
        resp_data = requests.get(sandbox_url+'stock/'+stock_ticker+'/stats/?token='+test_token)
        data_json = resp_data.json()
    elif token == real_token:
        resp_data = requests.get(base_url_iex+'stock/'+stock_ticker+'/stats/?token='+real_token)
        data_json = resp_data.json()
        
    return data_json

## Get IEX Chart data - Close Data Only 
def get_IEX_historical_data(stock_ticker, chart_range=default_date_range,token=token_status):
    
    if token == test_token:
        resp_data = requests.get(sandbox_url+'stock/'+stock_ticker+'/chart/'+chart_range+'?chartCloseOnly=true&token='+test_token)
        df = pd.DataFrame(resp_data.json())
    elif token == real_token:
        resp_data = requests.get(base_url_iex+'stock/'+stock_ticker+'/chart/'+chart_range+'?chartCloseOnly=true&token='+real_token)
        df = pd.DataFrame(resp_data.json())

    df.rename(columns={'date':'Date'},inplace=True)
    df['Date'] = pd.to_datetime(df['Date'])
    df.set_index('Date',inplace=True)
    
    return df

## Get IEX Chart data - All data 
def get_IEX_historical_data_all(stock_ticker, chart_range=default_date_range,token=token_status):
    
    if token == test_token:
        resp_data = requests.get(sandbox_url+'stock/'+stock_ticker+'/chart/'+chart_range+'?token='+test_token)
        df = pd.DataFrame(resp_data.json())
    elif token == real_token:
        resp_data = requests.get(base_url_iex+'stock/'+stock_ticker+'/chart/'+chart_range+'?token='+real_token)
        df = pd.DataFrame(resp_data.json())

    df.rename(columns={'date':'Date'},inplace=True)
    df['Date'] = pd.to_datetime(df['Date'])
    df.set_index('Date',inplace=True)
    
    return df

In [8]:
## Use pickle module to import and export and save files
import pickle
def load_obj(path):
    with open(path, 'rb') as f:
        return pickle.load(f)
def save_obj(obj, path ):
    with open(path, 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

In [9]:
## Import FTD File using CSV 
ftd_df = pd.read_csv(
    Path('../Resources/ftd_all_data.csv'),
    index_col=0, parse_dates=True
)

In [10]:
## Import Symbol and CUSIP list using CSV
symbol_df = pd.read_csv(
    Path('../Resources/symbol_all_list.csv'),
    index_col=0
)

In [11]:
ftd_df

Unnamed: 0,Date,CUSIP,SYMBOL,QUANTITY_FAILS
0,2017-09-11,Y21990034,0034RIGHTS,103745.0
1,2017-09-12,Y21990034,0034RIGHTS,103745.0
2,2018-10-17,812350122,0122PIK,828.0
3,2018-10-18,812350122,0122PIK,828.0
4,2019-05-02,812350122,0122PIK,828.0
...,...,...,...,...
7022993,2021-05-25,98959W203,ZZZOD,324.0
7022994,2021-05-26,98959W203,ZZZOD,354.0
7022995,2021-05-27,98959W203,ZZZOD,976.0
7022996,2021-05-28,98959W203,ZZZOD,2.0


In [12]:
symbol_df

Unnamed: 0,SYMBOL,CUSIP
0,0034RIGHTS,Y21990034
1,0122PIK,812350122
2,0297RTS,G72990297
3,0329REORG,G33990329
4,0888RTSPYMNT,529900888
...,...,...
33911,ZZLL,98880P103
33912,ZZLL,98880P202
33913,ZZLLD,98880P202
33914,ZZZOD,98959W203


In [13]:
test_symbol = 'GME'
symbol = test_symbol
symbol

'GME'

In [14]:
ftd_data = return_ftd_data_symbol(symbol)
ftd_data = ftd_data.drop(columns={'SYMBOL'})
ftd_data

Unnamed: 0_level_0,QUANTITY_FAILS
Date,Unnamed: 1_level_1
2016-01-04,9.0
2016-01-05,2709.0
2016-01-06,129.0
2016-01-07,1733.0
2016-01-08,187.0
...,...
2021-10-22,5680.0
2021-10-25,11251.0
2021-10-27,67280.0
2021-10-28,11310.0


In [15]:
fmp_data = get_FMP_historical_data(symbol)
fmp_data

Unnamed: 0_level_0,open,high,low,close,adjClose,volume,unadjustedVolume,change,changePercent,vwap,changeOverTime
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2016-01-04,27.040001,28.459999,27.010000,28.309999,21.848804,4330400.0,4330400.0,1.27,4.697,27.92667,0.04697
2016-01-05,28.590000,29.010000,28.059999,28.770000,22.203817,3416600.0,3416600.0,0.18,0.630,28.61333,0.00630
2016-01-06,28.230000,28.740000,27.440001,28.370001,21.895107,3756400.0,3756400.0,0.14,0.496,28.18333,0.00496
2016-01-07,28.120001,28.770000,27.740000,28.450001,21.956852,2662800.0,2662800.0,0.33,1.174,28.32000,0.01174
2016-01-08,28.700001,28.790001,28.040001,28.370001,21.895107,2699200.0,2699200.0,-0.33,-1.150,28.40000,-0.01150
...,...,...,...,...,...,...,...,...,...,...,...
2021-10-25,169.420000,174.800000,167.260000,173.970000,173.970000,1449036.0,1449036.0,4.55,2.686,172.01000,0.02686
2021-10-26,173.360000,185.000000,172.500000,177.840000,177.840000,2171711.0,2171711.0,4.48,2.584,178.44667,0.02584
2021-10-27,180.000000,183.000000,172.340000,173.510000,173.510000,1113458.0,1113458.0,-6.49,-3.606,176.28333,-0.03606
2021-10-28,175.160000,183.140000,175.000000,182.850000,182.850000,1687577.0,1687577.0,7.69,4.390,180.33000,0.04390


In [16]:
iex_data = get_IEX_historical_data_all('TSLA',chart_range='71m',token=test_token)
iex_data

Unnamed: 0_level_0,close,high,low,open,symbol,volume,id,key,subkey,updated,...,uLow,uVolume,fOpen,fClose,fHigh,fLow,fVolume,label,change,changePercent
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015-12-29,49.672,49.6720,46.0948,47.256,TSLA,12142400,CHCSTI_RPLRSIIAOE,TSAL,,1619505154360,...,230.432,2474269,47.570,48.891,49.0490,46.8444,12269540,"Dec 29, 15",0.000000,0.0000
2015-12-30,49.499,50.7557,47.6400,47.640,TSLA,18996610,CR_ETIORSHIPSLAIC,LSAT,,1611903993839,...,243.700,3850236,47.690,49.311,49.6989,47.2522,19311864,"Dec 30, 15",0.180512,0.0038
2015-12-31,48.169,50.2900,48.1690,49.943,TSLA,13747309,RAS_OTIESIHRPCCIL,TALS,,1645745899333,...,244.830,2850270,47.801,48.958,49.4200,48.9010,14094357,"Dec 31, 15",0.397084,0.0084
2016-01-04,45.489,47.1100,44.6000,47.110,TSLA,35530715,ESACIPISC_TIRROLH,TLAS,,1638673021645,...,226.000,7005426,47.027,46.229,47.2100,44.8000,35298919,"Jan 4, 16",-3.365590,-0.0719
2016-01-05,45.207,47.5130,45.0000,47.513,TSLA,16151356,IERSRSLTOICAP_CHI,LTAS,,1660116929805,...,223.000,3233395,46.613,45.901,45.6900,45.0000,16075283,"Jan 5, 16",0.004157,0.0001
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-11-19,1162.500,1185.0158,1093.8000,1134.760,TSLA,22235775,LIORPIR_ICSHCAEST,LAST,,1669654751182,...,1118.500,21867617,1134.030,1155.320,1190.2839,1113.1000,22152078,"Nov 19, 21",41.703805,0.0385
2021-11-22,1200.870,1251.7700,1135.1500,1188.960,TSLA,34503191,PI_ITCLRSHCERSAIO,LATS,,1703498179789,...,1164.860,33606792,1189.440,1193.830,1215.7900,1149.3800,34440812,"Nov 22, 21",20.477858,0.0179
2021-11-23,1148.060,1204.6400,1071.9000,1204.640,TSLA,36350581,PCIRO_HSRELICAITS,STAL,,1657451673774,...,1107.400,37607359,1208.450,1127.410,1182.2885,1063.1000,36451760,"Nov 23, 21",-49.035334,-0.0432
2021-11-24,1148.000,1156.7700,1108.0000,1129.700,TSLA,23490206,TI_IICCAELRSOHRPS,ASTL,,1646996550053,...,1082.000,22703070,1105.840,1158.000,1150.7300,1096.0000,22695843,"Nov 24, 21",7.073838,0.0064


In [17]:
iex_data.to_csv('../Resources/test_csv.csv')

In [18]:
# url_hist_price = fmpbase_urlv3+'historical-price-full/'
# url_hist_query_with_date = url_hist_price+symbol+'?from='+startDate+'&to='+endDate+'&apikey='+apiKey
# resp_data = requests.get(url_hist_query_with_date)
# json_ = resp_data.json()
# data = json_['historical']
# df = pd.DataFrame(data)
# df.rename(columns={'date':'Date'},inplace=True)
# df['Date'] = pd.to_datetime(df['Date'])
# df = df.reindex(index=df.index[::-1]) ## Reverse the DataFrame 
# df.set_index('Date',inplace=True)
# df.drop(columns='label',inplace=True)

In [24]:
ftd_data = return_ftd_data_symbol(symbol)
ftd_data = ftd_data.drop(columns={'SYMBOL'})

fmp_data = get_FMP_historical_data(symbol)
df1 = pd.merge(fmp_data,ftd_data, on='Date',how='outer')
df1['QUANTITY_FAILS'] = df1['QUANTITY_FAILS'].fillna(0)
df1['volume'] = df1['volume'].fillna(0)
df1['unadjustedVolume'] = df1['unadjustedVolume'].fillna(0)
df1['vwap'] = df1['vwap'].fillna(0)
df2 = get_short_data_QUANDL(symbol)
df = pd.merge(df1,df2,on='Date',how='outer')
df = df.fillna(0)
df

Unnamed: 0_level_0,open,high,low,close,adjClose,volume,unadjustedVolume,change,changePercent,vwap,changeOverTime,QUANTITY_FAILS,ShortVolumeNSDQ,ShortExemptVolumeNSDQ,TotalVolumeNSDQ,ShortVolumeNYSE,ShortExemptVolumeNYSE,TotalVolumeNYSE
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2016-01-04,27.040001,28.459999,27.010000,28.309999,21.848804,4330400.0,4330400.0,1.27,4.697,27.92667,0.04697,9.0,734359.0,250.0,1091239.0,369005.0,0.0,411261.0
2016-01-05,28.590000,29.010000,28.059999,28.770000,22.203817,3416600.0,3416600.0,0.18,0.630,28.61333,0.00630,2709.0,324822.0,0.0,766181.0,45542.0,0.0,108705.0
2016-01-06,28.230000,28.740000,27.440001,28.370001,21.895107,3756400.0,3756400.0,0.14,0.496,28.18333,0.00496,129.0,470222.0,1300.0,818619.0,54449.0,0.0,136915.0
2016-01-07,28.120001,28.770000,27.740000,28.450001,21.956852,2662800.0,2662800.0,0.33,1.174,28.32000,0.01174,1733.0,255510.0,100.0,483840.0,27639.0,0.0,83420.0
2016-01-08,28.700001,28.790001,28.040001,28.370001,21.895107,2699200.0,2699200.0,-0.33,-1.150,28.40000,-0.01150,187.0,297805.0,268.0,441419.0,17055.0,0.0,32738.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-10-25,169.420000,174.800000,167.260000,173.970000,173.970000,1449036.0,1449036.0,4.55,2.686,172.01000,0.02686,11251.0,224603.0,6814.0,371983.0,55031.0,748.0,114748.0
2021-10-26,173.360000,185.000000,172.500000,177.840000,177.840000,2171711.0,2171711.0,4.48,2.584,178.44667,0.02584,0.0,338831.0,4819.0,600800.0,75546.0,966.0,167756.0
2021-10-27,180.000000,183.000000,172.340000,173.510000,173.510000,1113458.0,1113458.0,-6.49,-3.606,176.28333,-0.03606,67280.0,164158.0,3233.0,292965.0,34396.0,238.0,84514.0
2021-10-28,175.160000,183.140000,175.000000,182.850000,182.850000,1687577.0,1687577.0,7.69,4.390,180.33000,0.04390,11310.0,250043.0,4653.0,433029.0,40388.0,304.0,126039.0


In [25]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1468 entries, 2016-01-04 to 2021-10-29
Data columns (total 18 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   open                   1468 non-null   float64
 1   high                   1468 non-null   float64
 2   low                    1468 non-null   float64
 3   close                  1468 non-null   float64
 4   adjClose               1468 non-null   float64
 5   volume                 1468 non-null   float64
 6   unadjustedVolume       1468 non-null   float64
 7   change                 1468 non-null   float64
 8   changePercent          1468 non-null   float64
 9   vwap                   1468 non-null   float64
 10  changeOverTime         1468 non-null   float64
 11  QUANTITY_FAILS         1468 non-null   float64
 12  ShortVolumeNSDQ        1468 non-null   float64
 13  ShortExemptVolumeNSDQ  1468 non-null   float64
 14  TotalVolumeNSDQ        1468 non-null  

In [26]:
def get_time_series_data(symbol):
    ftd_data = return_ftd_data_symbol(symbol)
    ftd_data = ftd_data.drop(columns={'SYMBOL'})

    fmp_data = get_FMP_historical_data(symbol)
    df1 = pd.merge(fmp_data,ftd_data, on='Date',how='outer')
    df1['QUANTITY_FAILS'] = df1['QUANTITY_FAILS'].fillna(0)
    df1['volume'] = df1['volume'].fillna(0)
    df1['unadjustedVolume'] = df1['unadjustedVolume'].fillna(0)
    df1['vwap'] = df1['vwap'].fillna(0)
    df2 = get_short_data_QUANDL(symbol)
    df = pd.merge(df1,df2,on='Date',how='outer')
    df = df.fillna(0)
    return df