In [1]:
## FMP is being currently used to source historical price data - it is exporting historical data to individual CSV files for each symbol 
## In theory - should be possible to parse the CSV files for historical data, and append missing data from FMP via IEX and make more 
## complete data frames. FMP has an issue where it can't source fundamentals data (market cap, outstandingshares) for ETFs. IEX can. 
## Need to come up with code to fill in the gaps from FMP with IEX data. Unlike FMP, IEX has limited API calls, so should attempt to 
## source FMP first and then call IEX to fill in gaps 

## Rewrite of code to be more clean, no test code, and to work more on applying more fields of data to "Fundamentals field"

In [2]:
import pandas as pd

import glob

from pathlib import Path
import csv

import os
import requests
import json

#import quandl

from dotenv import load_dotenv
load_dotenv()

True

In [3]:
## Constants 

## Set start date variable - dataframes will be created starting from this date
start_date = '2020-01-01'
end_date = '2021-10-29'
default_date_range = '2y' ## Default Range for IEX functions - don't need more at the moment

In [4]:
## IEX Constants
iex_api_key = os.getenv("IEX_API_KEY")
iex_test_api_key = os.getenv("IEX_TEST_API_KEY")

## Redundant Assignment but improves Readability throughout code 
real_token = iex_api_key
test_token = iex_test_api_key

base_url_iex = 'https://cloud.iexapis.com/stable/'
sandbox_url = 'https://sandbox.iexapis.com/stable/'

## IEX Status Test 
test_resp = requests.get(base_url_iex + 'status')
test_resp

<Response [200]>

In [5]:
token_status = test_token ## Set to either real token or test token for IEX

In [6]:
## Load IEX to get ETF statistics 
def get_IEX_statistics(stock_ticker, token=token_status):
    if token == test_token:
        resp_data = requests.get(sandbox_url+'stock/'+stock_ticker+'/stats/?token='+test_token)
        data_json = resp_data.json()
    elif token == real_token:
        resp_data = requests.get(base_url_iex+'stock/'+stock_ticker+'/stats/?token='+real_token)
        data_json = resp_data.json()
        
    return data_json

In [7]:
## FMP Constants 
fmpbase_urlv3 = 'https://fmpcloud.io/api/v3/'
fmpbase_urlv4 = 'https://fmpcloud.io/api/v4/'
api_key = os.getenv("FMP_CLOUD_API_KEY")

## FMP Functions - this file should not need to call the historical data function. 
##                 This file should import the CSV files with historical data and then append IEX testing data
##                 to any data FMP cannot source. 
def get_FMP_historical_data(symbol, startDate=start_date, endDate=end_date, apiKey=api_key):
    url_hist_price = fmpbase_urlv3+'historical-price-full/'
    url_hist_query_with_date = url_hist_price+symbol+'?from='+startDate+'&to='+endDate+'&apikey='+apiKey
    resp_data = requests.get(url_hist_query_with_date)
    json_ = resp_data.json()
    data = json_['historical']
    df = pd.DataFrame(data)
    df.rename(columns={'date':'Date'},inplace=True)
    df['Date'] = pd.to_datetime(df['Date'])
    df = df.reindex(index=df.index[::-1]) ## Reverse the DataFrame 
    df.set_index('Date',inplace=True)
    df.drop(columns='label',inplace=True)
    return df

def get_float_data_FMP(symbol):
    url_float_shares = fmpbase_urlv4+'shares_float?symbol='
    url_query_float_data = url_float_shares+symbol+'&apikey='+api_key
    resp_data = requests.get(url_query_float_data)
    #df = pd.DataFrame(resp_data.json())
    json_ = resp_data.json()
    return json_[0]

def get_company_profile_FMP_json(symbol):
    ## https://fmpcloud.io/api/v3/profile/AAPL?apikey='yourkeyhere'
    url_company_profile_url = fmpbase_urlv3+'profile/'+symbol+'?apikey='+api_key
    resp_data = requests.get(url_company_profile_url)
    json_response = resp_data.json()
    return json_response[0]

# def save_and_export_raw_df_csv(data, symbol):
#     path = ('../FilesExportIndividualStockDFs/'+symbol+'_combined_df.csv')
#     data.to_csv(path)

In [8]:
## Use pickle module to import and export and save files
import pickle
def load_obj(path):
    with open(path, 'rb') as f:
        return pickle.load(f)
def save_obj(obj, path ):
    with open(path, 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

In [9]:
# # ## Import FTD File using CSV 
# # ftd_df = pd.read_csv(
# #     Path('../Resources/ftd_all_data.csv'),
# #     index_col=0, parse_dates=True
# # )

# ## Don't need FTD file at the moment. Import symbol_df as a way to get symbols to iterate through saved CSVs. 
# ## Some symbols will fail, need to create function that skips missed indexes 

# ## Import Symbol and CUSIP list using CSV
# symbol_df = pd.read_csv(
#     Path('../Resources/symbol_all_list.csv'),
#     index_col=0
# )


## Do not seem to be needed for this file, but code here anyway if either file is needed 

In [10]:
# ## How to find index value of specific stock 
# index_value = 0
# for i in complete_key_list:
#     if i == 'ACP':
#         print(index_value)
#         break
#     index_value +=1

In [11]:
x = 0
y = 49
increment = 50
imported_data_dict = {}

test_length = 1500
## Current last file is 8450 
while x < test_length:
    str_symbol1 = str(x)
    str_symbol2 = str(y)
    
    pkl_path = Path('../FilesExportCompleteFMP/data_complete_'+str_symbol1+'_'+str_symbol2+'.pkl')
    data_obj = load_obj(pkl_path)
    data_obj_key_list = []
    for key in data_obj.keys():
        data_obj_key_list.append(key)  
    for symbol in data_obj_key_list:
        data = data_obj[symbol]
        imported_data_dict[symbol] = data
        
    x += increment
    y += increment
    
imported_key_list = []
for key in imported_data_dict.keys():
    imported_key_list.append(key)

In [12]:
length_ = len(imported_data_dict)
length_

267

In [13]:
## Find index value of specific symbol 
index_value = 0
for i in imported_key_list:
    if i == 'ARKQ':
        print(index_value,i)
        break
    index_value +=1
    if index_value == length_: print("No")

#complete_data_dict['ARKQ']

## Trying to use ARKQ as a 'control' ETF. Any info applied to it should also apply to other valid ETFs for data gathering
## Capping list at 250 because don't need all symbols 
## See test_length variable above as well to extend data collection 

241 ARKQ


In [14]:
test_symbol = 'ARKQ'

In [16]:
len(imported_key_list)
new_key_list = imported_key_list[0:250]
len(new_key_list)
imported_key_list = new_key_list
len(imported_key_list)

250

In [17]:
etf_data_dict = {} 
equity_data_dict = {}
no_profile_data_dict = {}
error_count = 0                  ## Can use to check index in complete_key_list when loop fails 
for i in imported_key_list:
    #symbol = i 
    if imported_data_dict[i]['companyProfile'] != 0:
        if imported_data_dict[i]['companyProfile']['isEtf'] == True:   ## Add to etf_data_dict
            data = imported_data_dict[i]
            etf_data_dict[i] = data
        else:                                   ## Add to equity_data_dict
            data = imported_data_dict[i]
            equity_data_dict[i] = data
    else:                                      ## Add to no_profile_data_dict
        data = imported_data_dict[i]
        no_profile_data_dict[i] = data
    error_count += 1
    
etf_key_list = []
for key in etf_data_dict.keys():
    etf_key_list.append(key)
print(len(etf_data_dict))

equity_key_list = []
for key in equity_data_dict.keys():
    equity_key_list.append(key)
print(len(equity_data_dict))

20
226


In [18]:
## Shorten equity_key_list for testing 
## Find index value of specific symbol 
index_value = 0
for i in equity_key_list:
    if i == 'AAPL':
        print(index_value,i)
        break
    index_value +=1
    if index_value == length_: print("No")

6 AAPL


In [19]:
equity_key_list = equity_key_list[0:50]
print(len(equity_key_list))

50


In [20]:
token_status = test_token ## Set to either real token or test token for IEX

# ## Loop to get ETF stats 
# for i in etf_key_list:
#     symbol = i 
#     iex_etf_stats = get_IEX_statistics(symbol)
#     #sharesOutstanding = iex_etf_stats['sharesOutstanding']   
#     etf_data_dict[i]['Fundamentals']['sharesOutstanding'] = iex_etf_stats['sharesOutstanding']
#     etf_data_dict[i]['Fundamentals']['peRatio'] = iex_etf_stats['peRatio']
#     etf_data_dict[i]['Fundamentals']['beta'] = iex_etf_stats['beta']
#     etf_data_dict[i]['Fundamentals']['week52high'] = iex_etf_stats['week52high']
#     etf_data_dict[i]['Fundamentals']['week52low'] = iex_etf_stats['week52low']
#     etf_data_dict[i]['Fundamentals']['week52change'] = iex_etf_stats['week52change']
#     etf_data_dict[i]['Fundamentals']['avg10Volume'] = iex_etf_stats['avg10Volume']
#     etf_data_dict[i]['Fundamentals']['avg30Volume'] = iex_etf_stats['avg30Volume']
#     etf_data_dict[i]['Fundamentals']['marketcap_IEX'] = iex_etf_stats['marketcap']    
    
#     ## Replace 'marketCap' and put into 'marketcap_FMP', then delete 'marketCap' to seperate sources
#     etf_data_dict[i]['Fundamentals']['marketcap_FMP'] = etf_data_dict[i]['Fundamentals']['marketCap']
#     del etf_data_dict[i]['Fundamentals']['marketCap']
#     del etf_data_dict[i]['Fundamentals']['price_earnings']
    
#     etf_data_dict[i]['iex_statistics'] = iex_etf_stats
    
# ## Loop to get Equity stats 
# for i in equity_key_list:
#     symbol = i 
#     iex_etf_stats = get_IEX_statistics(symbol)
#     #sharesOutstanding = iex_etf_stats['sharesOutstanding']   
#     equity_data_dict[i]['Fundamentals']['sharesOutstanding'] = iex_etf_stats['sharesOutstanding']
#     equity_data_dict[i]['Fundamentals']['peRatio'] = iex_etf_stats['peRatio']
#     equity_data_dict[i]['Fundamentals']['beta'] = iex_etf_stats['beta']
#     equity_data_dict[i]['Fundamentals']['week52high'] = iex_etf_stats['week52high']
#     equity_data_dict[i]['Fundamentals']['week52low'] = iex_etf_stats['week52low']
#     equity_data_dict[i]['Fundamentals']['week52change'] = iex_etf_stats['week52change']
#     equity_data_dict[i]['Fundamentals']['avg10Volume'] = iex_etf_stats['avg10Volume']
#     equity_data_dict[i]['Fundamentals']['avg30Volume'] = iex_etf_stats['avg30Volume']
#     equity_data_dict[i]['Fundamentals']['marketcap_IEX'] = iex_etf_stats['marketcap']    
    
#     ## Replace 'marketCap' and put into 'marketcap_FMP', then delete 'marketCap' to seperate sources
#     equity_data_dict[i]['Fundamentals']['marketcap_FMP'] = equity_data_dict[i]['Fundamentals']['marketCap']
#     del equity_data_dict[i]['Fundamentals']['marketCap']
#     del equity_data_dict[i]['Fundamentals']['price_earnings']
    
#     equity_data_dict[i]['iex_statistics'] = iex_etf_stats
    

# ## Export equity/etf data 
# pkl_path_etf = Path('../Resources/complete_etf_data_dict.pkl')
# pkl_path_equity = Path('../Resources/complete_equity_data_dict.pkl')
# save_obj(etf_data_dict,pkl_path_etf)
# save_obj(equity_data_dict,pkl_path_equity)

In [28]:
## Try doing in one loop 
full_key_list = []
full_key_data_dict = {}
notfull_key_list = []
notfull_key_data_dict = {}
for i in imported_key_list:
    if imported_data_dict[i]['companyProfile'] != 0:
        full_key_list.append(i)
        data = imported_data_dict[i]
        full_key_data_dict[i] = data
    elif imported_data_dict[i]['companyProfile'] == 0:
        notfull_key_list.append(i)
        data = imported_data_dict[i]
        notfull_key_data_dict[i] = data        

In [None]:
for i in full_key_list: 
    iex_etf_stats = get_IEX_statistics(i)
    #sharesOutstanding = iex_etf_stats['sharesOutstanding']   
    full_key_data_dict[i]['Fundamentals']['sharesOutstanding'] = iex_etf_stats['sharesOutstanding']
    full_key_data_dict[i]['Fundamentals']['peRatio'] = iex_etf_stats['peRatio']
    full_key_data_dict[i]['Fundamentals']['beta'] = iex_etf_stats['beta']
    full_key_data_dict[i]['Fundamentals']['week52high'] = iex_etf_stats['week52high']
    full_key_data_dict[i]['Fundamentals']['week52low'] = iex_etf_stats['week52low']
    full_key_data_dict[i]['Fundamentals']['week52change'] = iex_etf_stats['week52change']
    full_key_data_dict[i]['Fundamentals']['avg10Volume'] = iex_etf_stats['avg10Volume']
    full_key_data_dict[i]['Fundamentals']['avg30Volume'] = iex_etf_stats['avg30Volume']
    full_key_data_dict[i]['Fundamentals']['marketcap_IEX'] = iex_etf_stats['marketcap']    
    
    ## Replace 'marketCap' and put into 'marketcap_FMP', then delete 'marketCap' to seperate sources
    full_key_data_dict[i]['Fundamentals']['marketcap_FMP'] = full_key_data_dict[i]['Fundamentals']['marketCap']
    del full_key_data_dict[i]['Fundamentals']['marketCap']
    del full_key_data_dict[i]['Fundamentals']['price_earnings']
    
    full_key_data_dict[i]['iex_statistics'] = iex_etf_stats

In [27]:
full_key_data_dict['ARKQ']['Fundamentals']
#full_key_data_dict[test_symbol]['companyProfile']

{'sharesOutstanding': 30626902,
 'floatShares': 0,
 'debt_ratio': 'debt_ratioValue',
 'exchange': 'New York Stock Exchange Arca',
 'final_close_price': 84.800003,
 'peRatio': 0,
 'beta': 0,
 'week52high': 104.64,
 'week52low': 70.33,
 'week52change': 0.2356534775048127,
 'avg10Volume': 394915,
 'avg30Volume': 386220,
 'marketcap_IEX': 2680620430,
 'marketcap_FMP': 184056034}

In [30]:
full_key_list

['A',
 'AA',
 'AAAU',
 'AAL',
 'AAOI',
 'AAON',
 'AAP',
 'AAPL',
 'AAU',
 'AAWW',
 'AB',
 'ABB',
 'ABBV',
 'ABC',
 'ABCL',
 'ABEO',
 'ABEV',
 'ABG',
 'ABM',
 'ABMD',
 'ABNB',
 'ABOS',
 'ABR',
 'ABSI',
 'ABT',
 'ABUS',
 'ACAD',
 'ACB',
 'ACC',
 'ACCO',
 'ACES',
 'ACGL',
 'ACHC',
 'ACHV',
 'ACI',
 'ACIU',
 'ACIW',
 'ACLS',
 'ACM',
 'ACMR',
 'ACN',
 'ACOR',
 'ACRE',
 'ACRS',
 'ACRX',
 'ACST',
 'ACVA',
 'ACWI',
 'ACXP',
 'ADAP',
 'ADBE',
 'ADC',
 'ADCT',
 'ADES',
 'ADGI',
 'ADI',
 'ADM',
 'ADMA',
 'ADMP',
 'ADMS',
 'ADNT',
 'ADP',
 'ADPT',
 'ADS',
 'ADSK',
 'ADT',
 'ADTX',
 'ADUS',
 'ADVM',
 'ADX',
 'ADXS',
 'AEE',
 'AEG',
 'AEIS',
 'AEL',
 'AEM',
 'AEO',
 'AEP',
 'AER',
 'AERI',
 'AES',
 'AESE',
 'AEZS',
 'AFCG',
 'AFG',
 'AFIB',
 'AFIN',
 'AFL',
 'AFMD',
 'AFRM',
 'AG',
 'AGCO',
 'AGE',
 'AGEN',
 'AGFY',
 'AGG',
 'AGI',
 'AGIO',
 'AGL',
 'AGNC',
 'AGQ',
 'AGR',
 'AGRI',
 'AGRO',
 'AGRX',
 'AGS',
 'AGTC',
 'AGZ',
 'AHH',
 'AHPI',
 'AHT',
 'AIG',
 'AIHS',
 'AIM',
 'AIMC',
 'AIN',
 'AIR',
 