In [1]:
import pandas as pd

from pathlib import Path
import csv

import os
import requests
import json

import quandl

from dotenv import load_dotenv
load_dotenv()

True

In [2]:
## Use pickle module to import and export and save files
import pickle
def load_obj(path):
    with open(path, 'rb') as f:
        return pickle.load(f)
def save_obj(obj, path ):
    with open(path, 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

In [3]:
## Constants 

## Set start date variable - dataframes will be created starting from this date
start_date = '2016-01-01'
end_date = '2022-01-31'
default_date_range = '71m' ## Default Range for IEX functions - don't need more at the moment

In [4]:
## QUANDL/NASDAQ 
nsdq_api_key = os.environ.get('NASDAQ_API_KEY')
base_url_nsdq = 'https://data.nasdaq.com/api/v3/datasets/FINRA/'

## Get Short "Interest" Data from Quandl 
def get_short_data_QUANDL(symbol):
    string_nsdq = "FINRA/FNSQ_"+symbol
    string_nyse = "FINRA/FNYX_"+symbol
    
    df1 = quandl.get(string_nsdq,start_date=start_date,end_date=end_date,authtoken=nsdq_api_key)   ## Nasdaq
    df2 = quandl.get(string_nyse,start_date=start_date,end_date=end_date,authtoken=nsdq_api_key)   ## NYSE

    df1 = df1.rename(columns={'ShortVolume':'ShortVolumeNSDQ','TotalVolume':'TotalVolumeNSDQ'})
    #df1 = df1.drop(columns={'ShortExemptVolume'})
    df1 = df1.rename(columns={'ShortExemptVolume':'ShortExemptVolumeNSDQ'})

    df2 = df2.rename(columns={'ShortVolume':'ShortVolumeNYSE','TotalVolume':'TotalVolumeNYSE'})
    #df2 = df2.drop(columns={'ShortExemptVolume'})
    df2 = df2.rename(columns={'ShortExemptVolume':'ShortExemptVolumeNYSE'})

    df3 = pd.merge(df1,df2,on='Date',how='outer')
    #df3 = df3.fillna(0)
    
    return df3


## Return FTD Data from SEC FTD files using a Stock's CUSIP number to sort 
def return_ftd_data_cusip(cusip_number):
    df = ftd_df.copy()
    df.set_index("CUSIP",inplace=True)
    df = df.loc[cusip_number]
    df['Date'] = pd.to_datetime(df['Date'])
    df = df.set_index('Date')
    return df

## Return the CUSIP symbol from the symbol_df symbol list 
def return_CUSIP_from_symbol(symbol):
    df = symbol_df.copy()
    df.set_index('SYMBOL',inplace=True)
    cusip_variable = df.loc[symbol]
    cusip_variable = cusip_variable['CUSIP']
    return cusip_variable

def return_ftd_data_symbol(symbol):
    cusip_number = return_CUSIP_from_symbol(symbol)
    df = return_ftd_data_cusip(cusip_number)
    return df

In [5]:
## FMP Constants 
fmpbase_urlv3 = 'https://fmpcloud.io/api/v3/'
fmpbase_urlv4 = 'https://fmpcloud.io/api/v4/'
api_key = os.getenv("FMP_CLOUD_API_KEY")

## FMP Functions 
def get_FMP_historical_data(symbol, startDate=start_date, endDate=end_date, apiKey=api_key):
    url_hist_price = fmpbase_urlv3+'historical-price-full/'
    url_hist_query_with_date = url_hist_price+symbol+'?from='+startDate+'&to='+endDate+'&apikey='+apiKey
    resp_data = requests.get(url_hist_query_with_date)
    json_ = resp_data.json()
    data = json_['historical']
    df = pd.DataFrame(data)
    df.rename(columns={'date':'Date'},inplace=True)
    df['Date'] = pd.to_datetime(df['Date'])
    df = df.reindex(index=df.index[::-1]) ## Reverse the DataFrame 
    df.set_index('Date',inplace=True)
    df.drop(columns='label',inplace=True)
    return df

def get_float_data_FMP(symbol):
    url_float_shares = fmpbase_urlv4+'shares_float?symbol='
    url_query_float_data = url_float_shares+symbol+'&apikey='+api_key
    resp_data = requests.get(url_query_float_data)
    #df = pd.DataFrame(resp_data.json())
    json_ = resp_data.json()
    return json_[0]

def get_company_profile_FMP_json(symbol):
    ## https://fmpcloud.io/api/v3/profile/AAPL?apikey='yourkeyhere'
    url_company_profile_url = fmpbase_urlv3+'profile/'+symbol+'?apikey='+api_key
    resp_data = requests.get(url_company_profile_url)
    json_response = resp_data.json()
    return json_response[0]

# def save_and_export_raw_df_csv(data, symbol, path='None'):
#     ## Can set custom path (useful for testing) otherwise will default to below path
#     if path=='None':
#         path = ('../FilesExportIndividualStockDFs_Big/'+symbol+'_combined_df.csv')
#     data.to_csv(path)
    
def save_and_export_raw_df_pkl(data, symbol, path='None'):
    ## Can set custom path (useful for testing) otherwise will default to below path
    if path=='None':
        path = ('../FilesExport_TimeSeries_DFs/'+symbol+'_combined_df.pkl')
    save_obj(data,path)
    
def get_float_data_FMP(symbol):
    url_float_shares = fmpbase_urlv4+'shares_float?symbol='
    url_query_float_data = url_float_shares+symbol+'&apikey='+api_key
    resp_data = requests.get(url_query_float_data)
    #df = pd.DataFrame(resp_data.json())
    json_ = resp_data.json()
    return json_[0]

def get_company_profile_FMP_json(symbol):
    ## https://fmpcloud.io/api/v3/profile/AAPL?apikey='yourkeyhere'
    url_company_profile_url = fmpbase_urlv3+'profile/'+symbol+'?apikey='+api_key
    resp_data = requests.get(url_company_profile_url)
    json_response = resp_data.json()
    return json_response[0]

In [6]:
def get_time_series_data(symbol):
    ftd_data = return_ftd_data_symbol(symbol)
    ftd_data = ftd_data.drop(columns={'SYMBOL'})

    fmp_data = get_FMP_historical_data(symbol)
    df1 = pd.merge(fmp_data,ftd_data, on='Date',how='outer')
    df1['QUANTITY_FAILS'] = df1['QUANTITY_FAILS'].fillna(0)
    df1['volume'] = df1['volume'].fillna(0)
    df1['unadjustedVolume'] = df1['unadjustedVolume'].fillna(0)
    df1['vwap'] = df1['vwap'].fillna(0)
    df2 = get_short_data_QUANDL(symbol)
    df = pd.merge(df1,df2,on='Date',how='outer')
    df = df.fillna(0)
    df = df.reset_index().sort_values('Date').set_index('Date')
    return df

In [7]:
## Load symbol list from previous file as it's needed for above functions
import_path = Path('../Resources/symbol_all_list_01_2022.pkl')
symbol_df = load_obj(import_path)
len(symbol_df)

35140

In [8]:
## Only update symbols that exist in this list 
import_path = Path('../Resources/02_symbol_success_list.pkl')
symbol_list = load_obj(import_path)
len(symbol_list)

6620

In [9]:
## Check if duplicates 
symbol_list_set = set(symbol_list) 
contains_duplicates = len(symbol_list) != len(symbol_list_set)
print(contains_duplicates)

True


In [10]:
if contains_duplicates == True: ## Remove duplicates 
    symbol_list = list( dict.fromkeys(symbol_list) )
    print(len(symbol_list))

5493


In [11]:
## Load ftd_df
ftd_df = load_obj('../Resources/ftd_all_data_01_2022.pkl')

In [12]:
## Add fundamental data 

In [13]:
## IEX Constants
iex_api_key = os.getenv("IEX_API_KEY")
iex_test_api_key = os.getenv("IEX_TEST_API_KEY")

## Redundant Assignment but improves Readability throughout code 
real_token = iex_api_key
test_token = iex_test_api_key

base_url_iex = 'https://cloud.iexapis.com/stable/'
sandbox_url = 'https://sandbox.iexapis.com/stable/'

## IEX Status Test 
test_resp = requests.get(base_url_iex + 'status')
test_resp

<Response [200]>

In [14]:
token_status = test_token ## Set to either real token or test token for IEX
iex_token = token_status

In [15]:
## Load IEX to get ETF statistics 
def get_IEX_statistics(stock_ticker, token=token_status):
    if token == test_token:
        resp_data = requests.get(sandbox_url+'stock/'+stock_ticker+'/stats/?token='+test_token)
        data_json = resp_data.json()
    elif token == real_token:
        resp_data = requests.get(base_url_iex+'stock/'+stock_ticker+'/stats/?token='+real_token)
        data_json = resp_data.json()
        
    return data_json

In [16]:
def get_fundamentals(symbol,dataframe,b_export=False):

    time_series_df = dataframe.copy()

    ## IEX 
    try:
        iex_stats = get_IEX_statistics(stock_ticker=symbol,token=iex_token)        
        sharesOutstanding = iex_stats['sharesOutstanding']
        peRatio = iex_stats['peRatio']
        beta = iex_stats['beta']
        week52high = iex_stats['week52high']
        week52low = iex_stats['week52low']
        week52change = iex_stats['week52change']
        avg10Volume = iex_stats['avg10Volume']
        avg30Volume = iex_stats['avg30Volume']
        marketcap_IEX = iex_stats['marketcap']
    except:
        iex_stats = 0
        sharesOutstanding = 0
        peRatio = 0
        beta = 0
        week52high = 0
        week52low = 0
        week52change = 0
        avg10Volume = 0
        avg30Volume = 0
        marketcap_IEX = 0

    ## FMP 
    try:
        fmp_profile = get_company_profile_FMP_json(symbol)
        exchange = fmp_profile['exchangeShortName']
        marketcap_FMP = fmp_profile['mktCap']
    except:
        fmp_profile = 0
        exchange = 0
        marketcap_FMP = 0
    try:
        float_data = get_float_data_FMP(symbol)
        floatShares = float_data['floatShares']
    except:
        float_data = 0
        floatShares = 0

    ## Create data_dict:
    data = {symbol : {
    'Fundamentals':                    			    ## Fundamental Data about the stock 
    {
        'sharesOutstanding': sharesOutstanding,		## Sourced from IEX for ETFs, FMP for Equity
        'floatShares': floatShares,			        ## Can't source for ETFs at the moment - can get from FMP for Equity 
        'exchange': exchange,                       ## Sourced from FMP 
        'final_close_price': time_series_df.iloc[-1]['close'],		## Sourced from time_series_df, use last close price 
        'peRatio': peRatio,			                ## Sourced from iex_statistics, =0 for ETFs
        'beta': beta,			                    ## Sourced from iex_statistics for Equity, =0 for ETFs
        'week52high': week52high,					## Sourced from iex_statistics
        'week52low': week52low,					    ## Sourced from iex_statistics
        'week52change': week52change,               ## Sourced from iex_statistics
        'avg10Volume': avg10Volume,				    ## Sourced from iex_statistics
        'avg30Volume': avg30Volume,				    ## Sourced from iex_statistics
        'marketcap_IEX': marketcap_IEX,		        ## Sourced from iex_statistics
        'marketcap_FMP': marketcap_FMP			    ## Sourced from FMP	
    },
    'dataFrame':time_series_df,							## FMP historical merged with Nasdaq Short Data and SEC FTD Data
    'companyProfile':fmp_profile,         	## Sourced from FMP, otherwise =0
    'floatData':float_data,   					## Sourced from FMP, otherwise =0
    #'textNews':['article1','article2','article3'],    	## Not sourced
    #'returns':'returns_data',    						## Not calculated
    'iex_statistics': iex_stats     	## Added to completed data_dicts, both ETFs and Equity
    }}

    if b_export == True:
        export_path = Path('../FilesExport_DataDicts_01_2022/'+symbol+'_data_dict.pkl')
        save_obj(data,export_path)
        
    return data

    ## End for loop

In [17]:
## Now do FTD calculations
ytd_variable = '2021-02' ## Set to year ago Feb 2021 - Jan 2022 for 12 months of data 
last_month_variable = '2022-01'

def do_ftd_calculations(symbol,data):
    
    df = data[symbol]['dataFrame']   #data['dataFrame']
    total_ftds_all = df.QUANTITY_FAILS.sum() ## Add to data['ftd_stats']
    df2021 = df.loc['2021':]
    total_ftds_2021 = df2021.QUANTITY_FAILS.sum() ## Add to data['ftd_stats'] 
    ytd_ftd = df.loc[ytd_variable:]
    total_ftds_ytd = ytd_ftd.QUANTITY_FAILS.sum() ## Add to data['ftd_stats']
    
    month_ftd = df.loc[last_month_variable:]
    total_ftds_month = month_ftd.QUANTITY_FAILS.sum() ## Add to data['ftd_stats']
    
    outstandingShares = data[symbol]['Fundamentals']['sharesOutstanding']
    float_shares = data[symbol]['Fundamentals']['floatShares']
    
    if outstandingShares != 0:
        try:
            os_ftd_pct_all = total_ftds_all / outstandingShares * 100 ## Add to data['ftd_stats']
            os_ftd_pct_2021 = total_ftds_2021 / outstandingShares * 100 ## Add to data['ftd_stats']
            os_ftd_pct_ytd = total_ftds_ytd / outstandingShares * 100 ## Add to data['ftd_stats']
            os_ftd_pct_month = total_ftds_month / outstandingShares * 100 ## Add to data['ftd_stats']
        except:
            os_ftd_pct_all=0
            os_ftd_pct_2021=0
            os_ftd_pct_ytd=0
            os_ftd_pct_month=0
    else:
        os_ftd_pct_all=0
        os_ftd_pct_2021=0
        os_ftd_pct_ytd=0
        os_ftd_pct_month=0
    if float_shares != 0:
        try:
            float_ftd_pct_all = total_ftds_all / float_shares * 100 ## Add to data['ftd_stats']
            float_ftd_pct_2021 = total_ftds_2021 / float_shares * 100 ## Add to data['ftd_stats']
            float_ftd_pct_ytd = total_ftds_ytd / float_shares * 100 ## Add to data['ftd_stats']
            float_ftd_pct_month = total_ftds_month / float_shares * 100 ## Add to data['ftd_stats']
        except:
            float_ftd_pct_all=0
            float_ftd_pct_2021=0
            float_ftd_pct_ytd=0
            float_ftd_pct_month=0
    else:
        float_ftd_pct_all=0
        float_ftd_pct_2021=0
        float_ftd_pct_ytd=0
        float_ftd_pct_month=0
    
    ## All data 
    totalvolume1 = df.ShortVolumeNSDQ.sum()
    totalvolume2 = df.ShortVolumeNYSE.sum()
    totalshortvolume = totalvolume1+totalvolume2

    totalvolume1 = df.ShortExemptVolumeNSDQ.sum()
    totalvolume2 = df.ShortExemptVolumeNYSE.sum()
    totalshortexemptvolume = totalvolume1+totalvolume2

    totalvolume1 = df.TotalVolumeNSDQ.sum()
    totalvolume2 = df.TotalVolumeNYSE.sum()
    total_volume_nsdq_data= totalvolume1+totalvolume2

    total_volume_fmp_data = df.volume.sum()
    
    if total_volume_fmp_data != 0:
        short_volume_FMPpct_all_data = totalshortvolume / total_volume_fmp_data * 100 ## Add to data['ftd_stats']
    else:
        short_volume_FMPpct_all_data=0
    if total_volume_nsdq_data != 0:
        short_volume_NSDQpct_all_data= totalshortvolume / total_volume_nsdq_data * 100 ## Add to data['ftd_stats']
    else:
        short_volume_NSDQpct_all_data=0
        
    if total_volume_fmp_data != 0:
        shortexempt_volume_FMPpct_all_data = totalshortexemptvolume / total_volume_fmp_data * 100 ## Add to data['ftd_stats']
    else:
        shortexempt_volume_FMPpct_all_data=0
    if total_volume_nsdq_data != 0:
        shortexempt_volume_NSDQpct_all_data= totalshortexemptvolume / total_volume_nsdq_data * 100 ## Add to data['ftd_stats']
    else:
        shortexempt_volume_NSDQpct_all_data=0
        
    ## 1-year data
    totalvolume1 = ytd_ftd.ShortVolumeNSDQ.sum()
    totalvolume2 = ytd_ftd.ShortVolumeNYSE.sum()
    totalshortvolume = totalvolume1+totalvolume2
    totalshortvolume2021 = totalshortvolume

    totalvolume1 = ytd_ftd.ShortExemptVolumeNSDQ.sum()
    totalvolume2 = ytd_ftd.ShortExemptVolumeNYSE.sum()
    totalshortexemptvolume = totalvolume1+totalvolume2
    totalshortexemptvolume2021 = totalshortexemptvolume

    totalvolume1 = ytd_ftd.TotalVolumeNSDQ.sum()
    totalvolume2 = ytd_ftd.TotalVolumeNYSE.sum()
    total_volume_nsdq_data= totalvolume1+totalvolume2

    total_volume_fmp_data = ytd_ftd.volume.sum()

    if total_volume_fmp_data != 0:
        short_volume_FMPpct_ytd = totalshortvolume / total_volume_fmp_data * 100 ## Add to data['ftd_stats']
    else:
        short_volume_FMPpct_ytd=0
    if total_volume_nsdq_data != 0:
        short_volume_NSDQpct_ytd= totalshortvolume / total_volume_nsdq_data * 100 ## Add to data['ftd_stats']
    else: 
        short_volume_NSDQpct_ytd=0
        
    if total_volume_fmp_data != 0: 
        shortexempt_volume_FMPpct_ytd = totalshortexemptvolume / total_volume_fmp_data * 100 ## Add to data['ftd_stats']
    else: 
        shortexempt_volume_FMPpct_ytd=0
    if total_volume_nsdq_data != 0:
        shortexempt_volume_NSDQpct_ytd= totalshortexemptvolume / total_volume_nsdq_data * 100 ## Add to data['ftd_stats']
    else: 
        shortexempt_volume_NSDQpct_ytd=0
    
    ftd_stats = {
        'total_ftds_all':total_ftds_all,
        'total_ftds_2021':total_ftds_2021,
        'total_ftds_ytd':total_ftds_ytd,
        'total_ftds_month':total_ftds_month,
        'os_ftd_pct_all':os_ftd_pct_all,
        'os_ftd_pct_2021':os_ftd_pct_2021,
        'os_ftd_pct_ytd':os_ftd_pct_ytd,
        'os_ftd_pct_month':os_ftd_pct_month,
        'float_ftd_pct_all':float_ftd_pct_all,
        'float_ftd_pct_2021':float_ftd_pct_2021,
        'float_ftd_pct_ytd':float_ftd_pct_ytd,
        'float_ftd_pct_month':float_ftd_pct_month,
        'short_volume_NSDQpct_all_data':short_volume_NSDQpct_all_data,
        'shortexempt_volume_NSDQpct_all_data':shortexempt_volume_NSDQpct_all_data,
        'short_volume_NSDQpct_ytd':short_volume_NSDQpct_ytd,
        'shortexempt_volume_NSDQpct_ytd':shortexempt_volume_NSDQpct_ytd,
        'short_volume_FMPpct_all_data':short_volume_FMPpct_all_data,
        'shortexempt_volume_FMPpct_all_data':shortexempt_volume_FMPpct_all_data,
        'short_volume_FMPpct_ytd':short_volume_FMPpct_ytd,
        'shortexempt_volume_FMPpct_ytd':shortexempt_volume_FMPpct_ytd
    }
    
    ## Add to data passed to function, then return 
    data[symbol]['ftd_stats'] = ftd_stats
    return data 

In [18]:
# symbol = 'XRT'
# test_df = get_time_series_data(symbol)
# test_data_dict = get_fundamentals(symbol,test_df)
# test_data_dict_2 = do_ftd_calculations(symbol,test_data_dict)
# test_data_dict_2[symbol]['ftd_stats']

In [19]:
## Make loop to get all data
symbol_success_list = []
symbol_error_list = []

for symbol in symbol_list:
    
    try:
        df = get_time_series_data(symbol)
        data = get_fundamentals(symbol,df)
        data2 = do_ftd_calculations(symbol,data)

        export_path = Path('../FilesExport_Complete_DataDicts_2/'+symbol+'_updated_data_dict.pkl')
        save_obj(data2,export_path)
        symbol_success_list.append(symbol)
    except:
        symbol_error_list.append(symbol)
    
export_path = Path('../Resources/symbol_success_list_new.pkl')
save_obj(symbol_success_list,export_path)

In [20]:
symbol_success_list

['A',
 'AA',
 'AAA',
 'AACG',
 'AADR',
 'AAIC',
 'AAL',
 'AAMC',
 'AAME',
 'AAN',
 'AAOI',
 'AAON',
 'AAP',
 'AAPL',
 'AAT',
 'AATC',
 'AAU',
 'AAWW',
 'AAXJ',
 'AB',
 'ABB',
 'ABBV',
 'ABC',
 'ABCB',
 'ABEO',
 'ABEV',
 'ABG',
 'ABIO',
 'ABM',
 'ABMD',
 'ABR',
 'ABST',
 'ABT',
 'ABTX',
 'ABUS',
 'AC',
 'ACAD',
 'ACB',
 'ACBI',
 'ACC',
 'ACCO',
 'ACER',
 'ACET',
 'ACFN',
 'ACGL',
 'ACH',
 'ACHC',
 'ACHV',
 'ACIW',
 'ACLS',
 'ACM',
 'ACN',
 'ACNB',
 'ACOR',
 'ACP',
 'ACR',
 'ACRE',
 'ACRS',
 'ACRX',
 'ACST',
 'ACTG',
 'ACU',
 'ACUR',
 'ACV',
 'ACWF',
 'ACWI',
 'ACWV',
 'ACWX',
 'ACY',
 'ADAP',
 'ADBE',
 'ADC',
 'ADES',
 'ADI',
 'ADM',
 'ADMA',
 'ADMP',
 'ADP',
 'ADRE',
 'ADS',
 'ADSK',
 'ADTN',
 'ADUS',
 'ADVM',
 'ADX',
 'ADXS',
 'AE',
 'AEE',
 'AEF',
 'AEG',
 'AEHL',
 'AEHR',
 'AEIS',
 'AEL',
 'AEM',
 'AEMD',
 'AEO',
 'AEP',
 'AER',
 'AERI',
 'AES',
 'AEY',
 'AEYE',
 'AEZS',
 'AFB',
 'AFG',
 'AFK',
 'AFL',
 'AFMD',
 'AFT',
 'AFTY',
 'AG',
 'AGCO',
 'AGD',
 'AGEN',
 'AGFS',
 'AGG',
 'AGG

In [22]:
symbol_error_list

[]