In [1]:
## FMP is being currently used to source historical price data - it is exporting historical data to individual CSV files for each symbol 
## In theory - should be possible to parse the CSV files for historical data, and append missing data from FMP via IEX and make more 
## complete data frames. FMP has an issue where it can't source fundamentals data (market cap, outstandingshares) for ETFs. IEX can. 
## Need to come up with code to fill in the gaps from FMP with IEX data. Unlike FMP, IEX has limited API calls, so should attempt to 
## source FMP first and then call IEX to fill in gaps 

## Rewrite of code to be more clean, no test code, and to work more on applying more fields of data to "Fundamentals field"

In [2]:
import pandas as pd

import glob

from pathlib import Path
import csv

import os
import requests
import json

#import quandl

from dotenv import load_dotenv
load_dotenv()

True

In [3]:
## Constants 

## Set start date variable - dataframes will be created starting from this date
start_date = '2020-01-01'
end_date = '2021-10-29'
default_date_range = '2y' ## Default Range for IEX functions - don't need more at the moment

In [4]:
## IEX Constants
iex_api_key = os.getenv("IEX_API_KEY")
iex_test_api_key = os.getenv("IEX_TEST_API_KEY")

## Redundant Assignment but improves Readability throughout code 
real_token = iex_api_key
test_token = iex_test_api_key

base_url_iex = 'https://cloud.iexapis.com/stable/'
sandbox_url = 'https://sandbox.iexapis.com/stable/'

## IEX Status Test 
test_resp = requests.get(base_url_iex + 'status')
test_resp

<Response [200]>

In [5]:
token_status = test_token ## Set to either real token or test token for IEX

In [6]:
## Load IEX to get ETF statistics 
def get_IEX_statistics(stock_ticker, token=token_status):
    if token == test_token:
        resp_data = requests.get(sandbox_url+'stock/'+stock_ticker+'/stats/?token='+test_token)
        data_json = resp_data.json()
    elif token == real_token:
        resp_data = requests.get(base_url_iex+'stock/'+stock_ticker+'/stats/?token='+real_token)
        data_json = resp_data.json()
        
    return data_json

In [7]:
## FMP Constants 
fmpbase_urlv3 = 'https://fmpcloud.io/api/v3/'
fmpbase_urlv4 = 'https://fmpcloud.io/api/v4/'
api_key = os.getenv("FMP_CLOUD_API_KEY")

## FMP Functions - this file should not need to call the historical data function. 
##                 This file should import the CSV files with historical data and then append IEX testing data
##                 to any data FMP cannot source. 
def get_FMP_historical_data(symbol, startDate=start_date, endDate=end_date, apiKey=api_key):
    url_hist_price = fmpbase_urlv3+'historical-price-full/'
    url_hist_query_with_date = url_hist_price+symbol+'?from='+startDate+'&to='+endDate+'&apikey='+apiKey
    resp_data = requests.get(url_hist_query_with_date)
    json_ = resp_data.json()
    data = json_['historical']
    df = pd.DataFrame(data)
    df.rename(columns={'date':'Date'},inplace=True)
    df['Date'] = pd.to_datetime(df['Date'])
    df = df.reindex(index=df.index[::-1]) ## Reverse the DataFrame 
    df.set_index('Date',inplace=True)
    df.drop(columns='label',inplace=True)
    return df

def get_float_data_FMP(symbol):
    url_float_shares = fmpbase_urlv4+'shares_float?symbol='
    url_query_float_data = url_float_shares+symbol+'&apikey='+api_key
    resp_data = requests.get(url_query_float_data)
    #df = pd.DataFrame(resp_data.json())
    json_ = resp_data.json()
    return json_[0]

def get_company_profile_FMP_json(symbol):
    ## https://fmpcloud.io/api/v3/profile/AAPL?apikey='yourkeyhere'
    url_company_profile_url = fmpbase_urlv3+'profile/'+symbol+'?apikey='+api_key
    resp_data = requests.get(url_company_profile_url)
    json_response = resp_data.json()
    return json_response[0]

# def save_and_export_raw_df_csv(data, symbol):
#     path = ('../FilesExportIndividualStockDFs/'+symbol+'_combined_df.csv')
#     data.to_csv(path)

In [8]:
## Use pickle module to import and export and save files
import pickle
def load_obj(path):
    with open(path, 'rb') as f:
        return pickle.load(f)
def save_obj(obj, path ):
    with open(path, 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

In [9]:
# # ## Import FTD File using CSV 
# # ftd_df = pd.read_csv(
# #     Path('../Resources/ftd_all_data.csv'),
# #     index_col=0, parse_dates=True
# # )

# ## Don't need FTD file at the moment. Import symbol_df as a way to get symbols to iterate through saved CSVs. 
# ## Some symbols will fail, need to create function that skips missed indexes 

# ## Import Symbol and CUSIP list using CSV
# symbol_df = pd.read_csv(
#     Path('../Resources/symbol_all_list.csv'),
#     index_col=0
# )


## Do not seem to be needed for this file, but code here anyway if either file is needed 

In [10]:
# ## How to find index value of specific stock 
# index_value = 0
# for i in complete_key_list:
#     if i == 'ACP':
#         print(index_value)
#         break
#     index_value +=1

In [11]:
x = 0
y = 49
increment = 50
complete_data_dict = {}

test_length = 1500
## Current last file is 8450 
while x < test_length:
    str_symbol1 = str(x)
    str_symbol2 = str(y)
    
    pkl_path = Path('../FilesExportCompleteFMP/data_complete_'+str_symbol1+'_'+str_symbol2+'.pkl')
    data_obj = load_obj(pkl_path)
    data_obj_key_list = []
    for key in data_obj.keys():
        data_obj_key_list.append(key)  
    for symbol in data_obj_key_list:
        data = data_obj[symbol]
        complete_data_dict[symbol] = data
        
    x += increment
    y += increment
    
complete_key_list = []
for key in complete_data_dict.keys():
    complete_key_list.append(key)

In [12]:
length_ = len(complete_data_dict)
length_

267

In [22]:
## Find index value of specific symbol 
index_value = 0
for i in complete_key_list:
    if i == 'ARKQ':
        print(index_value,i)
        break
    index_value +=1
    if index_value == length_: print("No")

#complete_data_dict['ARKQ']

## Trying to use ARKQ as a 'control' ETF. Any info applied to it should also apply to other valid ETFs for data gathering
## Capping list at 250 because don't need all symbols 
## See test_length variable above as well to extend data collection 

241 ARKQ


In [14]:
test_symbol = 'ARKQ'

In [15]:
len(complete_key_list)
new_key_list = complete_key_list[0:250]
len(new_key_list)
complete_key_list = new_key_list
len(complete_key_list)

250

In [19]:
etf_data_dict = {} 
equity_data_dict = {}
no_profile_data_dict = {}
error_count = 0                  ## Can use to check index in complete_key_list when loop fails 
for i in complete_key_list:
    #symbol = i 
    if complete_data_dict[i]['companyProfile'] != 0:
        if complete_data_dict[i]['companyProfile']['isEtf'] == True:   ## Add to etf_data_dict
            data = complete_data_dict[i]
            etf_data_dict[i] = data
        else:                                   ## Add to equity_data_dict
            data = complete_data_dict[i]
            equity_data_dict[i] = data
    else:                                      ## Add to no_profile_data_dict
        data = complete_data_dict[i]
        no_profile_data_dict[i] = data
    error_count += 1
    
etf_key_list = []
for key in etf_data_dict.keys():
    etf_key_list.append(key)
print(len(etf_data_dict))

equity_key_list = []
for key in equity_data_dict.keys():
    equity_key_list.append(key)
print(len(equity_data_dict))

20
226


In [23]:
## Shorten equity_key_list for testing 
## Find index value of specific symbol 
index_value = 0
for i in equity_key_list:
    if i == 'AAPL':
        print(index_value,i)
        break
    index_value +=1
    if index_value == length_: print("No")

6 AAPL


In [29]:
equity_key_list = equity_key_list[0:50]
print(len(equity_key_list))

50


In [30]:
token_status = test_token ## Set to either real token or test token for IEX

## Loop to get ETF stats 
for i in etf_key_list:
    symbol = i 
    iex_etf_stats = get_IEX_statistics(symbol)
    sharesOutstanding = iex_etf_stats['sharesOutstanding']   
    etf_data_dict[i]['Fundamentals']['sharesOutstanding'] = sharesOutstanding
    etf_data_dict[i]['iex_statistics'] = iex_etf_stats
    
## Loop to get Equity stats 
for i in equity_key_list:
    symbol = i 
    iex_etf_stats = get_IEX_statistics(symbol)
    sharesOutstanding = iex_etf_stats['sharesOutstanding']   
    equity_data_dict[i]['Fundamentals']['sharesOutstanding'] = sharesOutstanding
    equity_data_dict[i]['iex_statistics'] = iex_etf_stats
    

## Export equity/etf data 
pkl_path_etf = Path('../Resources/complete_etf_data_dict.pkl')
pkl_path_equity = Path('../Resources/complete_equity_data_dict.pkl')
save_obj(etf_data_dict,pkl_path_etf)
save_obj(equity_data_dict,pkl_path_equity)

In [18]:
etf_data_dict[test_symbol]

{'Fundamentals': {'sharesOutstanding': 31234935,
  'floatShares': 0,
  'marketCap': 184056034,
  'debt_ratio': 'debt_ratioValue',
  'price_earnings': 'price_earningsValue',
  'exchange': 'New York Stock Exchange Arca',
  'final_close_price': 84.800003},
 'dataFrame':                  open       high        low      close   adjClose    volume  \
 Date                                                                          
 2020-01-02  37.580002  37.891998  37.516998  37.891998  37.564457   18100.0   
 2020-01-03  37.570000  37.990002  37.570000  37.797001  37.470280   28300.0   
 2020-01-06  37.639999  37.859001  37.435001  37.842999  37.515881   27400.0   
 2020-01-07  38.009998  38.375000  37.860001  38.310001  37.978848   23800.0   
 2020-01-08  38.410000  39.000000  38.330002  38.869999  38.534004   43900.0   
 ...               ...        ...        ...        ...        ...       ...   
 2021-10-25  82.490000  84.755000  82.210000  84.390000  84.390000  330479.0   
 2021-10-26  