In [129]:
import pandas as pd
import numpy as np
import seaborn as sns
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, LSTM
import warnings
warnings.filterwarnings("ignore")

In [2]:
returns = pd.read_pickle("./Data/returns.pkl")
returns = returns.iloc[1:]

In [3]:
# dropping columns with all NaN
drop_columns = []

for col in returns.columns:
    if returns[col].isnull().all() == True:
        drop_columns.append(col)
        
returns.drop(columns=drop_columns, inplace=True)

In [4]:
def get_investable(t, n_rows):
    "Find stocks in investable universe at time t\
    (stocks in the S&P500 that have prices recorded for the last n_rows days)"
    
    df_investable = returns.copy(deep = True).sort_index(ascending = False)
    
    t_index = df_investable.index.get_loc(t)
    
    #take n_rows worth of data upto time specified
    df_investable = df_investable.iloc[t_index + 1:t_index + n_rows + 1]
    
    #find all stocks that exist in the S&P at this time period
    investable_universe = []
    for col in df_investable.columns:
        if ~df_investable[col].iloc[:n_rows].isna().any():
            investable_universe.append(col)
        
    df_investable = df_investable[investable_universe]
    
    return df_investable

In [100]:
def format_investable(df, n_in=1, n_out=1, dropnan = True):
    "Takes investable dataframe and formats it to have all stock returns at time t-1\
    (the features) and all corresponding stock returns at time t (the targets)"
    n_vars = df.shape[1]
    
    cols = []
    input_col_names = df.add_suffix(' (t-1)').columns
    forecast_col_names = df.add_suffix(' (t)').columns
    names = input_col_names.append(forecast_col_names)
    
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))

    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        
    # put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    
    # drop the first row (it contains NaNs)
    agg.dropna(inplace=True)
    
    return agg

In [126]:
def reshape_investable(df):
    "Takes formatted investable and reshapes inputs it into 3D array [samples, timesteps, features]"
    
    # select columns ending with ' (t-1)' for features
    # and columns ending with ' (t)' for targets
    X_col = [df.columns[i] for i in range(df.shape[1]) if df.columns[i].endswith(' (t-1)')]
    y_col = [df.columns[i] for i in range(df.shape[1]) if df.columns[i].endswith(' (t)')]
    
    X = df[X_col].values
    y = df[y_col].values
    
    reshaped_X = X.reshape(X.shape[0], 1, X.shape[1])
    
    return reshaped_X, y

In [71]:
t = pd.to_datetime('2020-05-22')
inv = get_investable(t, 360)

In [102]:
f_inv = format_investable(inv)

In [131]:
reshaped_X, y = reshape_investable(f_inv)

In [132]:
model = Sequential()
model.add(LSTM(50, input_shape=(reshaped_X.shape[1], reshaped_X.shape[2])))
model.add(Dense(y.shape[1]))

In [133]:
model.compile(loss = 'mae', optimizer = 'adam')

In [134]:
history = model.fit(reshaped_X, y, epochs=50, batch_size=16)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
