In [1]:
import pandas as pd

from pathlib import Path
import csv

import os
import requests
import json

from dotenv import load_dotenv
load_dotenv()

True

In [2]:
## IEX Constants
iex_api_key = os.getenv("IEX_API_KEY")
iex_test_api_key = os.getenv("IEX_TEST_API_KEY")

## Redundant Assignment but improves Readability throughout code 
real_token = iex_api_key
test_token = iex_test_api_key

base_url_iex = 'https://cloud.iexapis.com/stable/'
sandbox_url = 'https://sandbox.iexapis.com/stable/'

## IEX Status Test 
test_resp = requests.get(base_url_iex + 'status')
test_resp

<Response [200]>

In [3]:
token_status = test_token ## Set to either real token or test token for IEX

In [4]:
## Load IEX to get ETF statistics 
def get_IEX_statistics(stock_ticker, token=token_status):
    if token == test_token:
        resp_data = requests.get(sandbox_url+'stock/'+stock_ticker+'/stats/?token='+test_token)
        data_json = resp_data.json()
    elif token == real_token:
        resp_data = requests.get(base_url_iex+'stock/'+stock_ticker+'/stats/?token='+real_token)
        data_json = resp_data.json()
        
    return data_json

In [5]:
## FMP Constants 
fmpbase_urlv3 = 'https://fmpcloud.io/api/v3/'
fmpbase_urlv4 = 'https://fmpcloud.io/api/v4/'
api_key = os.getenv("FMP_CLOUD_API_KEY")

## FMP Functions 
# def get_FMP_historical_data(symbol, startDate=start_date, endDate=end_date, apiKey=api_key):
#     url_hist_price = fmpbase_urlv3+'historical-price-full/'
#     url_hist_query_with_date = url_hist_price+symbol+'?from='+startDate+'&to='+endDate+'&apikey='+apiKey
#     resp_data = requests.get(url_hist_query_with_date)
#     json_ = resp_data.json()
#     data = json_['historical']
#     df = pd.DataFrame(data)
#     df.rename(columns={'date':'Date'},inplace=True)
#     df['Date'] = pd.to_datetime(df['Date'])
#     df = df.reindex(index=df.index[::-1]) ## Reverse the DataFrame 
#     df.set_index('Date',inplace=True)
#     df.drop(columns='label',inplace=True)
#     return df

def get_float_data_FMP(symbol):
    url_float_shares = fmpbase_urlv4+'shares_float?symbol='
    url_query_float_data = url_float_shares+symbol+'&apikey='+api_key
    resp_data = requests.get(url_query_float_data)
    #df = pd.DataFrame(resp_data.json())
    json_ = resp_data.json()
    return json_[0]

def get_company_profile_FMP_json(symbol):
    ## https://fmpcloud.io/api/v3/profile/AAPL?apikey='yourkeyhere'
    url_company_profile_url = fmpbase_urlv3+'profile/'+symbol+'?apikey='+api_key
    resp_data = requests.get(url_company_profile_url)
    json_response = resp_data.json()
    return json_response[0]

def save_and_export_raw_df_csv(data, symbol):
    path = ('../FilesExportIndividualStockDFs/'+symbol+'_combined_df.csv')
    data.to_csv(path)

In [6]:
## Use pickle module to import and export and save files
import pickle
def load_obj(path):
    with open(path, 'rb') as f:
        return pickle.load(f)
def save_obj(obj, path ):
    with open(path, 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

In [7]:
data_dict_all = {}
data_dict_full = {}
data_dict_uneven={}

symbol_list_all = []
symbol_list_full = []
symbol_list_uneven=[]

test_length = 199 
total_length = 33915 ## Number of possible symbols 
loop_length = total_length


full_length_ = 1469 ## Length of a full data fram (2016-01 to 2021-10)

iex_token = real_token ## IEX Token Status 

In [8]:

x=0
y=49
increment=50

while x < loop_length:
    str_symbol1 = str(x)
    str_symbol2 = str(y)

    ## Load time series data - Historical(FMP), FTD(SEC), and Short Interest (FINRA)
    pkl_path = Path('../FilesExportCompleteFMP_big/data_complete_'+str_symbol1+'_'+str_symbol2+'.pkl')
    data_obj = load_obj(pkl_path)
    data_obj_key_list = []
    for key in data_obj.keys():
        data_obj_key_list.append(key)  
    for symbol in data_obj_key_list:
        err_symbol = symbol
        ## Get time series data 
        time_series_df = data_obj[symbol].sort_index() ## Create dataframe from pkl object
        try:
            iex_stats = get_IEX_statistics(stock_ticker=symbol,token=iex_token)        
            sharesOutstanding = iex_stats['sharesOutstanding']
            peRatio = iex_stats['peRatio']
            beta = iex_stats['beta']
            week52high = iex_stats['week52high']
            week52low = iex_stats['week52low']
            week52change = iex_stats['week52change']
            avg10Volume = iex_stats['avg10Volume']
            avg30Volume = iex_stats['avg30Volume']
            marketcap_IEX = iex_stats['marketcap']
        except:
            iex_stats = 0
            sharesOutstanding = 0
            peRatio = 0
            beta = 0
            week52high = 0
            week52low = 0
            week52change = 0
            avg10Volume = 0
            avg30Volume = 0
            marketcap_IEX = 0
        
        ## FMP 
        try:
            fmp_profile = get_company_profile_FMP_json(symbol)
            exchange = fmp_profile['exchangeShortName']
            marketcap_FMP = fmp_profile['mktCap']
        except:
            fmp_profile = 0
            exchange = 0
            marketcap_FMP = 0
        try:
            float_data = get_float_data_FMP(symbol)
            floatShares = float_data['floatShares']
        except:
            float_data = 0
            floatShares = 0
        
        
        ## Create data_dict: 
        data = {
        'Fundamentals':                    			##
        {
            'sharesOutstanding': sharesOutstanding,		## Sourced from IEX for ETFs, FMP for Equity
             'floatShares': floatShares,			## Can't source for ETFs at the moment - can get from FMP for Equity
             #'debt_ratio': 'debt_ratioValue',		## Blank - no source yet 
             'exchange': exchange,    ## Sourced from FMP 
             'final_close_price': time_series_df.iloc[-1]['close'],		## Sourced from iex_statistics
             'peRatio': peRatio,			## Sourced from iex_statistics, =0 for ETFs
             'beta': beta,			## Sourced from iex_statistics for Equity, =0 for ETFs
             'week52high': week52high,					## Sourced from iex_statistics
             'week52low': week52low,					## Sourced from iex_statistics
             'week52change': week52change,    ## Sourced from iex_statistics
             'avg10Volume': avg10Volume,				## Sourced from iex_statistics
             'avg30Volume': avg30Volume,				## Sourced from iex_statistics
             'marketcap_IEX': marketcap_IEX,		## Sourced from iex_statistics
             'marketcap_FMP': marketcap_FMP			## Sourced from FMP	
        },
        'dataFrame':time_series_df,							## FMP historical merged with Nasdaq Short Data and SEC FTD Data
        'companyProfile':fmp_profile,         	## Sourced from FMP, otherwise =0
        'floatData':float_data,   					## Sourced from FMP, otherwise =0
        'textNews':['article1','article2','article3'],    	## Not sourced
        'returns':'returns_data',    						## Not calculated
        'iex_statistics': iex_stats     	## Added to completed data_dicts, both ETFs and Equity
        }
        ## Export individual data_dict

        export_path = Path('../FilesExport_IndividualDataDict/'+symbol+'data_dict.pkl')
        save_obj(data,export_path)
        
                
        if len(time_series_df) == full_length_:
            data_dict_full[symbol] = data
            symbol_list_full.append(symbol)
        else:
            data_dict_uneven[symbol] = data
            symbol_list_uneven.append(symbol)
        
        ## Append all data 
        data_dict_all[symbol] = data
        ## Append symbol to list 
        symbol_list_all.append(symbol)  

    x += increment
    y += increment
    if y > loop_length: y = loop_length

## If while loop finishes - export all data 
pkl_path = Path('../Resources/data_dict_all.pkl')
save_obj(data_dict_all,pkl_path)
pkl_path = Path('../Resources/data_dict_full.pkl')
save_obj(data_dict_full,pkl_path)
pkl_path = Path('../Resources/data_dict_uneven.pkl')
save_obj(data_dict_uneven,pkl_path)


symbol_list_alldf = pd.DataFrame(symbol_list_all)
symbol_list_fulldf = pd.DataFrame(symbol_list_full)
symbol_list_unevendf = pd.DataFrame(symbol_list_uneven)

## Export list
list_path = ('../Resources/all_symbol_list.csv')
symbol_list_alldf.to_csv(list_path)
list_path = ('../Resources/symbol_list_full.csv')
symbol_list_fulldf.to_csv(list_path)
list_path = ('../Resources/symbol_list_uneven.csv')
symbol_list_unevendf.to_csv(list_path)

In [9]:
len(data_dict_all)

12316

In [10]:
len(data_dict_full)

2878

In [11]:
len(data_dict_uneven)

9438