In [1]:
## File for searching and adding IEX stock data 
## Will search and append all historical data to all stocks 
## Should only be run once, then saved, and not run again in order to conserve API Calls 

## Watch difference between real and sandbox keys for testing purposes 

In [2]:
import pandas as pd

import glob

from pathlib import Path
import csv

import os
import requests
import json

import quandl

from dotenv import load_dotenv
load_dotenv()

True

In [3]:
## Constants 

## Set start date variable - dataframes will be created starting from this date
start_date = '2020-01-01'
end_date = '2021-10-29'
#default_date_range = '2y' ## Default Range for IEX functions - don't need more at the moment

In [4]:
## QUANDL/NASDAQ 
nsdq_api_key = os.environ.get('NASDAQ_API_KEY')
base_url_nsdq = 'https://data.nasdaq.com/api/v3/datasets/FINRA/'

# iex_api_key = os.getenv("IEX_API_KEY")
# iex_test_api_key = os.getenv("IEX_TEST_API_KEY")

# ## Redundant Assignment but improves Readability throughout code 
# real_token = iex_api_key
# test_token = iex_test_api_key

# base_url_iex = 'https://cloud.iexapis.com/stable/'
# sandbox_url = 'https://sandbox.iexapis.com/stable/'

# ## IEX Status Test 
# test_resp = requests.get(base_url_iex + 'status')
# test_resp

In [5]:
## Import FTD File and Symbol List using Pickle 
# ftd_df = load_obj('AnalysisResources/analysis_ftd_all_data.pkl')
# symbol_df = load_obj('AnalysisResources/analysis_symbol_all_list.pkl')


## Import FTD File using CSV 
ftd_df = pd.read_csv(
    Path('../Resources/ftd_all_data.csv'),
    index_col=0, parse_dates=True
)

## Import Symbol and CUSIP list using CSV
symbol_df = pd.read_csv(
    Path('../Resources/symbol_all_list.csv'),
    index_col=0
)

In [6]:
## Get Short "Interest" Data from Quandl 
def get_short_data_QUANDL(symbol):
    string_nsdq = "FINRA/FNSQ_"+symbol
    string_nyse = "FINRA/FNYX_"+symbol
    
    df1 = quandl.get(string_nsdq,start_date=start_date,end_date=end_date,authtoken=nsdq_api_key)   ## Nasdaq
    df2 = quandl.get(string_nyse,start_date=start_date,end_date=end_date,authtoken=nsdq_api_key)   ## NYSE

    df1 = df1.rename(columns={'ShortVolume':'ShortVolumeNSDQ','TotalVolume':'TotalVolumeNSDQ'})
    #df1 = df1.drop(columns={'ShortExemptVolume'})
    df1 = df1.rename(columns={'ShortExemptVolume':'ShortExemptVolumeNSDQ'})

    df2 = df2.rename(columns={'ShortVolume':'ShortVolumeNYSE','TotalVolume':'TotalVolumeNYSE'})
    #df2 = df2.drop(columns={'ShortExemptVolume'})
    df2 = df2.rename(columns={'ShortExemptVolume':'ShortExemptVolumeNYSE'})

    df3 = pd.merge(df1,df2,on='Date',how='outer')
    
    return df3


## Return FTD Data from SEC FTD files using a Stock's CUSIP number to sort 
def return_ftd_data_cusip(cusip_number):
    df = ftd_df.copy()
    df = df.reset_index()
    df = df.set_index("CUSIP")
    df = df.loc[cusip_number]
    df = df.set_index('Date')
    return df


## Return the CUSIP symbol from the symbol_df symbol list 
def return_CUSIP_from_symbol(symbol):
    new_symbol_df = pd.DataFrame(symbol_df)
    new_symbol_df.dropna(inplace=True)
    new_symbol_df.reset_index(inplace=True,drop=True)
    new_symbol_df.set_index('SYMBOL',inplace=True)
    cusip_variable = new_symbol_df.loc[symbol]
    cusip_variable = cusip_variable['CUSIP']
    return cusip_variable

In [7]:
## FMP Constants 
fmpbase_urlv3 = 'https://fmpcloud.io/api/v3/'
fmpbase_urlv4 = 'https://fmpcloud.io/api/v4/'
api_key = os.getenv("FMP_CLOUD_API_KEY")

## FMP Functions 
def get_FMP_historical_data(symbol, startDate=start_date, endDate=end_date, apiKey=api_key):
    url_hist_price = fmpbase_urlv3+'historical-price-full/'
    url_hist_query_with_date = url_hist_price+symbol+'?from='+startDate+'&to='+endDate+'&apikey='+apiKey
    resp_data = requests.get(url_hist_query_with_date)
    json_ = resp_data.json()
    data = json_['historical']
    df = pd.DataFrame(data)
    df.rename(columns={'date':'Date'},inplace=True)
    df['Date'] = pd.to_datetime(df['Date'])
    df = df.reindex(index=df.index[::-1]) ## Reverse the DataFrame 
    df.set_index('Date',inplace=True)
    df.drop(columns='label',inplace=True)
    return df

api_key = os.getenv("FMP_CLOUD_API_KEY")
def get_float_data_FMP(symbol):
    url_float_shares = fmpbase_urlv4+'shares_float?symbol='
    url_query_float_data = url_float_shares+symbol+'&apikey='+api_key
    resp_data = requests.get(url_query_float_data)
    #df = pd.DataFrame(resp_data.json())
    json_ = resp_data.json()
    return json_[0]

def get_company_profile_FMP_json(symbol):
    ## https://fmpcloud.io/api/v3/profile/AAPL?apikey='yourkeyhere'
    url_company_profile_url = fmpbase_urlv3+'profile/'+symbol+'?apikey='+api_key
    resp_data = requests.get(url_company_profile_url)
    json_response = resp_data.json()
    return json_response[0]

def save_and_export_raw_df_csv(data, symbol):
    path = ('../FilesExport/'+symbol+'_combined_df.pkl')
    data.to_csv(path)

In [8]:
## Test get_all 
test_symbol = 'GME'
symbol = test_symbol
cusip_number = return_CUSIP_from_symbol(symbol)
ftd_data = return_ftd_data_cusip(cusip_number)
ftd_data = ftd_data.drop(columns={'SYMBOL'})

fmp_data = get_FMP_historical_data(symbol)
df1 = pd.merge(fmp_data,ftd_data, on='Date',how='outer')
df1['QUANTITY_FAILS'] = df1['QUANTITY_FAILS'].fillna(0)
df2 = get_short_data_QUANDL(symbol)
df = pd.merge(df1,df2,on='Date',how='outer')

df

Unnamed: 0_level_0,open,high,low,close,adjClose,volume,unadjustedVolume,change,changePercent,vwap,changeOverTime,QUANTITY_FAILS,ShortVolumeNSDQ,ShortExemptVolumeNSDQ,TotalVolumeNSDQ,ShortVolumeNYSE,ShortExemptVolumeNYSE,TotalVolumeNYSE
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2020-01-02,6.14,6.47,6.07,6.31,6.31,4453598.0,4453598.0,0.17,2.769,6.28333,0.02769,10609.0,872127.0,8472.0,1185904.0,276386.0,2448.0,527414.0
2020-01-03,6.21,6.25,5.84,5.88,5.88,3543933.0,3543933.0,-0.33,-5.314,5.99000,-0.05314,28692.0,520156.0,11999.0,832247.0,306258.0,2.0,506189.0
2020-01-06,5.80,5.91,5.60,5.85,5.85,3394774.0,3394774.0,0.05,0.862,5.78667,0.00862,28665.0,498788.0,20569.0,859426.0,198834.0,15.0,319375.0
2020-01-07,5.77,5.83,5.44,5.52,5.52,5230265.0,5230265.0,-0.25,-4.333,5.59667,-0.04333,0.0,604854.0,6372.0,1908324.0,351972.0,991.0,523321.0
2020-01-08,5.49,5.85,5.41,5.72,5.72,5629442.0,5629442.0,0.23,4.189,5.66000,0.04189,7039.0,746989.0,26168.0,1480915.0,529867.0,28.0,852908.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-10-25,169.42,174.80,167.26,173.97,173.97,1442806.0,1442806.0,4.55,2.686,172.01000,0.02686,11251.0,224603.0,6814.0,371983.0,55031.0,748.0,114748.0
2021-10-26,173.36,185.00,172.50,177.84,177.84,2176749.0,2176749.0,4.48,2.584,178.44667,0.02584,0.0,338831.0,4819.0,600800.0,75546.0,966.0,167756.0
2021-10-27,180.00,183.09,172.33,173.51,173.51,1106998.0,1106998.0,-6.49,-3.606,176.31000,-0.03606,67280.0,164158.0,3233.0,292965.0,34396.0,238.0,84514.0
2021-10-28,175.16,183.14,175.00,182.85,182.85,1696206.0,1696206.0,7.69,4.390,180.33000,0.04390,11310.0,250043.0,4653.0,433029.0,40388.0,304.0,126039.0


In [9]:
make "error"

SyntaxError: invalid syntax (<ipython-input-9-d4183dc90d71>, line 1)

In [None]:
## Get all time series (iex data and short interest data) and merge with FTD data by passing a single stock symbol 
def get_data_all_time_series(symbol, save=False):
    cusip_number = return_CUSIP_from_symbol(symbol)
    ftd_data = return_ftd_data_cusip(cusip_number)
    ftd_data = ftd_data.drop(columns={'SYMBOL'})
    
    fmp_data = get_FMP_historical_data(symbol)
    df1 = pd.merge(fmp_data,ftd_data, on='Date',how='outer')
    
#     iex_data = get_iex_chart(symbol,token=token_value)
#     iex_data = iex_data[start_date:end_date]
#     iex_data.reset_index(inplace=True)
#     iex_data.rename(columns={'index':'Date'},inplace=True)
#     iex_data['Date'] = pd.to_datetime(iex_data['Date'])
#     iex_data.set_index('Date',inplace=True)
#     ## Merge data
#     df1 = pd.merge(iex_data, ftd_data, on='Date',how='outer')
#     df1['QUANTITY_FAILS'] = df1['QUANTITY_FAILS'].fillna(0)
#     df2 = get_short_data_QUANDL(symbol)
#     df = pd.merge(df1,df2,on='Date',how='outer')
    
    ## Export DF based off symbol Name, and parameter
    if save==True:
        pkl_path = Path('FilesExportIndividualSymbol/'+symbol+'_combined_df.pkl')
        save_obj(df, pkl_path)
    
    
    return df 

In [None]:
## Use pickle module to import and export and save files
import pickle
def load_obj(path):
    with open(path, 'rb') as f:
        return pickle.load(f)
def save_obj(obj, path ):
    with open(path, 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

In [None]:
test_df = get_data_all_time_series('GME',save=False)
test_df.info()

In [None]:
test_df

In [None]:
## Practice making list of dict
## Also a preview of how the upcoming while loop will be setup. 

## Finished variable placeholders 
cusip_number_var = 0
time_series_df = 'dfgoeshere'
FMP_company_profile_json = 0
FMP_float_data_df = 0
market_cap = 0
exchange = 0
   
database_y_test = {'StockSymbol_1':
{
    'Fundamentals':                                             ##
    {
        'sharesOutstanding':'sharesOutstandingValue',          ## Can source for equity but not ETF     
        #'floatShares':'floatSharesValue',                     ## Can't source for ETFs at the moment 
        'marketCap':market_cap,                               ## Sourced FMP
        'debt_ratio':'debt_ratioValue',                        ## Need to source
        'price_earnings':'price_earningsValue',                 ## Need to source
        'exchange':exchange,                                  ## Sourced FMP
        'close_price_value':'last_days_close_price_maybe',        ## Need to source
    },
    'dataFrame':time_series_df,
    'companyProfile':FMP_company_profile_json,
    'floatData':FMP_float_data_df,
    'textNews':['article1','article2','article3'],
    'returns':'returns_data'
}
                  
}

In [None]:
#database_y_test['StockSymbol_1']

In [None]:
database_y_test['StockSymbol_1']['dataFrame']

In [None]:
## Append database_y_test

data = {'Fundamentals':{
            'sharesOutstanding':'sharesOutstandingValue',
            'floatShares':'floatSharesValue',
            'marketCap':'marketCapValue',
            'debt_ratio':'debt_ratioValue',
            'price_earnings':'price_earningsValue'
        },
        'dataFrame':'dfGoesHere_3',
        'textNews':['article1','article2','article3']  
    }
database_y_test['StockSymbol_2'] = data
database_y_test['StockSymbol_2']['dataFrame']

In [None]:
#fix next thing "make error here"
## Error to stop code from running - need to review next code block before continuing 

In [None]:
## Set iteration through symbol_df 
length_ = len(symbol_df)
loop_token = test_token ## Set to either real_token or test_token 

complete_dict = {}            ## Empty dicts in order to create
incomplete_dict = {}          ## two big files at end of while loop 
stock_dict = {}            ## Dict with completed stock data 
etf_dict = {}              ## Dict with completed ETF data 


x = 0    
y = 50
increment = 50  ## Make sure increment is right
test_length = 50 

## Setup in 50 increments to find error in case the loop fails. 

## If not testing make x <= test_length
while (x < test_length):
    ## Initialize temp dictionaries 
    complete_dict_temp = {}
    incomplete_dict_temp = {}
    ## Initialize temp dictionaries 
    complete_dict_temp_stock = {}
    complete_dict_temp_etf = {}
    
    range_var = range(x,y)
    str_symbol1 = str(x)
    y2 = y - 1 
    str_symbol2 = str(y2)
    
    for i in range_var: 
        ## Iterate through symbol list and create data by symbol
        symbol_var = symbol_df['SYMBOL'][i]
        
        # Use while running loop, if error is made, can see what index it happened on 
        # can also check current value of symbol_var as well 
        error_var = i
        
        ## Get Time Series Data 
        try:
            time_series_df = get_data_all_time_series(symbol_var, token_value=loop_token)
        except KeyError:     ## Some symbols fail and are unreadable. Unreadable symbols are unimportant and okay to be discarded
            continue
        except:
            continue
            
        ## Get FMP Data 
        try: 
            FMP_company_profile_json = get_company_profile_FMP_json(symbol_var)
            if FMP_company_profile_json['isEtf'] == False:
                FMP_float_data_df = get_float_data_FMP(symbol_var)
            else: FMP_float_data_df = 0
            market_cap = FMP_company_profile_json['mktCap']
            exchange = FMP_company_profile_json['exchange']
        except IndexError:
            FMP_company_profile_json = 0
            FMP_float_data_df = 0
            market_cap = 0
            exchange = 0          
        
        
        ## Get Data to add into dict     
        data = {
            'Fundamentals':                                             ##
            {
                'sharesOutstanding':'sharesOutstandingValue',          ## Need to source     
                #'floatShares':'floatSharesValue',                     ## Can't source for ETFs at the moment 
                'marketCap':market_cap,                               ## Sourced FMP
                'debt_ratio':'debt_ratioValue',                        ## Need to source
                'price_earnings':'price_earningsValue',                 ## Need to source
                'exchange':exchange,                                  ## Sourced FMP
                'close_price_value':'last_days_close_price_maybe',        ## Need to source
            },
            'dataFrame':time_series_df,
            'companyProfile':FMP_company_profile_json,
            'floatData':FMP_float_data_df,
            'textNews':['article1','article2','article3'],
            'returns':'returns_data'
        }


            
            
        ## Check if null values, add to different dicts if null values present, or no nulls present
        bool_var = time_series_df.isnull().values.any()
        if bool_var == False:
            complete_dict[symbol_var] = data
            complete_dict_temp[symbol_var] = data
            ## Seperate by Stocks and Equities
            if FMP_company_profile_json != 0:    ## Make sure no error with FMP_company_profile_json
                if FMP_company_profile_json['isEtf'] == False:
                    ## Export stock 
                    stock_dict[symbol_var] = data
                else: ## Export ETF
                    etf_dict[symbol_var] = data  
        elif bool_var == True:
            incomplete_dict[symbol_var] = data
            incomplete_dict_temp[symbol_var] = data

    
    ## Exporting in batches is useful to catch errors, and also pick up where you left off 
    pickle_path1= Path('../FilesExportComplete/data_complete_'+str_symbol1+'_'+str_symbol2+'.pkl')
    save_obj(complete_dict_temp,pickle_path1)
    pickle_path2= Path('../FilesExportIncomplete/data_incomplete_'+str_symbol1+'_'+str_symbol2+'.pkl')
    save_obj(incomplete_dict_temp,pickle_path2)
    
    ## Check before run, if incorrect, can waste a lot of API credits  
    x += increment
    y += increment
    
    ## Export in blocks of {increment} in-case there are errors while processing data. 
    ## Can pick up where the function left off by changing x and y vars to
    ## avoid repeating API calls by doing this in order to not burn 
    ## IEX API tokens unnecessarily 
    
    
## If while loop finishes - export all data 
pkl_path_complete= Path('../Resources/data_complete_all.pkl')
save_obj(complete_dict,pkl_path_complete)
pkl_path_incomplete= Path('../Resources/data_incomplete_all.pkl')
save_obj(incomplete_dict,pkl_path_incomplete)
pkl_path_stocks = Path('../Resources/data_stock_list.pkl')
save_obj(stock_dict,pkl_path_stocks)
pkl_path_etf = Path('../Resources/data_etf_list.pkl')
save_obj(etf_dict,pkl_path_etf)

In [None]:
error_var    #27 


In [None]:

symbol_var

In [None]:
# json_test = get_company_profile_FMP_json(symbol_var)
# json_test

In [None]:
## Test to see what was exported 
pkl_path_1 = Path('../FilesExportComplete/data_complete_0_49.pkl')
import_data  = load_obj(pkl_path_1)
import_data['A']

## Was successful. Created what was wanted, no errors during process.  
## Can add to and adapt above while loop much further 

In [None]:
key_list1 = []
for key in complete_dict.keys():
    key_list1.append(key)  
len(key_list1)

In [None]:
key_list1

In [None]:
key_list = []
for key in import_data.keys():
    key_list.append(key)  
len(key_list)

In [None]:
key_list

In [None]:
pkl_path_last = Path('../Resources/data_stock_list.pkl')
import_data_last  = load_obj(pkl_path_last)
key_list = []
for key in import_data_last.keys():
    key_list.append(key)  
len(key_list)

In [None]:
key_list

In [None]:
import_data_last['A']['floatData']