### PreProcessing - Append Prices
Add closing price to fundamental data csv files for each filing date (or closest available date)

In [None]:
import operator
import math
import random
import pandas as pd
import datetime as dt
import numpy as np
import os
import urllib
import json
import requests
from io import StringIO

from pathlib import Path
from eod import EodHistoricalData

from functools import partial

from deap import algorithms
from deap import base
from deap import creator
from deap import tools
from deap import gp

from sklearn.preprocessing import StandardScaler

pd.set_option('display.max_rows', None)

In [3]:
#paths for data - set prefix to location of Data folder
path_prefix = r'C:\Users\OEM\GDrive\WQU'
path_att = r'\Data\fundamentals_by_attribute'
path_fun = r'\Data\fundamentals_by_ticker'
path_std = r'\Data\standardised_fundamentals'
path_eda = r'\Data\exploratory_data_analysis'
path_rob = r'\Data\robust_scaling'

In [4]:
#Instantiate datasource client
api_key = "618f834b7bfe27.18132752"
client = EodHistoricalData(api_key)

In [5]:
#get ticker symbols from exchange
resp = client.get_exchange_symbols(exchange='NYSE')

In [6]:
#create list of stock tickers
tickers = []
for i in range(len(resp)):
    if resp[i]['Type'] == 'Common Stock' or resp[i]['Type'] == 'Preferred Stock' :
        ticker = resp[i]['Code']
        tickers.append(ticker)

In [None]:
#submission dates for financial statements are typically last day of the month which may be a weekend with no trading - so gaps need to be filled from prices beforehand
#for each ticker get prices and add NaN column
for ticker in tickers:
    filepath = Path(path_prefix + path_rob + "/" + 'df_rob_{}.csv'.format(ticker))
    print(ticker)
    if os.path.isfile(filepath) == True:
        prices = client.get_prices_eod(ticker)
        df_prices = pd.DataFrame(prices)
        df_prices.set_index('date', inplace=True)
        df_prices.index = pd.to_datetime(df_prices.index)
        #df_prices['prev_close'] = np.nan
        
        #read standardised fundamentals 
        df_std = pd.read_csv(filepath, index_col=0)
        df_std.index = pd.to_datetime(df_std.index)
        
        #if adj close already appended - remove
        if 'adjusted_close' in df_std.columns:
            df_std.drop('adjusted_close', axis=1, inplace=True)
        
        #join close prices to fundamentals on date 
        df_std = df_std.join(df_prices['adjusted_close'], how='left')
        
        #for each row in fundamentals if no price was joined use the price from 1,2 or 3 days previous
        for i in range(len(df_std)):
            loc_ac = df_std.columns.get_loc('adjusted_close')
    
            if math.isnan(df_std.iloc[i, loc_ac]):
                idx = df_std.index[i]
                try:
                    idx = idx - dt.timedelta(days=1)
                    df_std.iloc[i, loc_ac] = df_prices.loc[idx, 'adjusted_close']
                except:
                    print('No price for {}'.format(idx))

                try:
                    idx = idx - dt.timedelta(days=1)
                    df_std.iloc[i, loc_ac] = df_prices.loc[idx, 'adjusted_close']
                except:
                    print('No price for {}'.format(idx))

                try:
                    idx = idx - dt.timedelta(days=1)
                    df_std.iloc[i, loc_ac] = df_prices.loc[idx, 'adjusted_close']
                except:
                    print('No price for {}'.format(idx)) 
    ''' 
        for i in range(len(df_prices)):
            loc_ac = df_prices.columns.get_loc('adjusted_close')
            loc_pc = df_prices.columns.get_loc('prev_close')
            
            if i >= 2:
                prev_close = df_prices.iloc[i-2, loc_ac]
                df_prices.iloc[i, loc_pc] = prev_close
                
        df_std = df_std.join(df_prices['prev_close'], how='left')
    '''
    df_std.to_csv(filepath)