In [None]:
import pandas as pd
import numpy as np
from collections import deque
from sklearn.preprocessing import scale
import random
import time

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint

LOOK_AHEAD = 1
TRAIN_RATIO = 0.8
SEQUENCE_LENGTH = 50

EPOCHS = 100
BATCH_SIZE = 1

MODEL_NAME = f'LA-{LOOK_AHEAD}--SL-{SEQUENCE_LENGTH}--{int(time.time())}'

df_main = pd.read_csv('dataset.csv', index_col='Date', parse_dates=['Date'])
df_main

In [None]:
n = len(df_main)
n

In [None]:
df_main['SPX Future Price'] = df_main['SPX'].shift(periods=-LOOK_AHEAD)
df_main.dropna(inplace=True)

In [None]:
def label(curr_price, future_price):
    return 1 if future_price > curr_price else 0

df_main['Signal'] = list(map(label, df_main['SPX'], df_main['SPX Future Price']))
df_main = df_main.drop('SPX Future Price', axis=1)
df_main

In [None]:
df_main['Signal'].value_counts()

In [None]:
def make_stationary(df):
    
    ln_ratio_cols = [
        'SPX',
        'SPY Volume',
        'USO Price',
        'GLD Price',
        #'VIX P/C Ratio',
        'VIX Options Volume',
        #'SPX P/C Ratio',
        'SPX Options Volume'
    ]
    
    diff_cols = [
        #'DIX',
        #'GEX'
    ]
    
    alt_features = [
        'SPX',
        'DIX',
        'GEX',
        'VIX P/C Ratio',
        'SPX P/C Ratio',
        'Signal'
    ]
    
    TY_cols = [col for col in df.columns if 'TY' in col]
    #diff_cols.extend(TY_cols)
    
    for col in ln_ratio_cols:
        if col in df.columns:
            df[col] = np.log1p(df[col].pct_change())
    
    for col in diff_cols:
        if col in df.columns:
            df[col] = df[col].diff()
    
    df['GEX'] = df['GEX'] / 1000
    
    df = df[alt_features]
    
    return df

df_old = df_main.copy()
df_main = make_stationary(df_main)
df_main

In [None]:
train_index = int(n*TRAIN_RATIO)

df_train = df_main.iloc[ :train_index, : ].dropna()
df_test = df_main.iloc[train_index: , : ].dropna()

def preprocess_data(df):
    
    '''
    for col in df.columns:
        if col != 'Signal':
            col = scale(df[col].values)
    '''
    
    sequences = []
    prev_days = deque(maxlen=SEQUENCE_LENGTH)
    
    for row in df.values:
        prev_days.append([value for value in row[:-1]])
        if len(prev_days) == SEQUENCE_LENGTH:
            
            sequence = scale(np.array(prev_days))
            #sequence = np.array(prev_days)
            sequences.append([sequence, row[-1]])
            
    random.shuffle(sequences)
    
    buys, sells = [], []
    
    for seq, signal in sequences:
        if signal == 1:
            buys.append([seq, signal])
        else:
            sells.append([seq, signal])
    
    smaller = min(len(buys), len(sells))
    buys = buys[:smaller]
    sells = sells[:smaller]
    
    sequences = buys+sells
    
    random.shuffle(sequences)
    
    X, y = [], []
    
    for seq, signal in sequences:
        X.append(seq)
        y.append(signal)
    
    return np.array(X), np.array(y)

X_train, y_train = preprocess_data(df_train)
X_test, y_test = preprocess_data(df_test)

In [None]:
print(X_train[0])
print(y_train[0])

In [None]:
model = Sequential()

model.add(LSTM(96, input_shape=(X_train.shape[1:]), return_sequences=True, activation='tanh'))
#model.add(Dropout(0.1))
model.add(BatchNormalization())

model.add(LSTM(96, return_sequences=True, activation='tanh'))
#model.add(Dropout(0.1))
model.add(BatchNormalization())

model.add(LSTM(96, return_sequences=True, activation='tanh'))
#model.add(Dropout(0.1))
model.add(BatchNormalization())

model.add(LSTM(96, activation='tanh'))
#model.add(Dropout(0.1))
model.add(BatchNormalization())

'''
model.add(LSTM(128, return_sequences=True, activation='tanh'))
#model.add(Dropout(0.1))
model.add(BatchNormalization())

model.add(LSTM(128, activation='tanh'))
#model.add(Dropout(0.1))
model.add(BatchNormalization())

model.add(Dense(64, activation='relu'))
#model.add(Dropout(0.1))


model.add(Dense(32, activation='relu'))
#model.add(Dropout(0.1))
'''
model.add(Dense(1, activation='sigmoid'))

opt = tf.keras.optimizers.Adam(lr=0.01, decay=1e-6)

model.compile(loss='binary_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

tensorboard = TensorBoard(log_dir=f'logs/{MODEL_NAME}')

#filepath = "RNN_Final-{epoch:02d}-{val_acc:.3f}"  # unique file name that will include the epoch and the validation acc for that epoch
#checkpoint = ModelCheckpoint("models/{}.model".format(filepath, monitor="val_acc", verbose=1, save_best_only=True, mode='max')) # saves only the best ones

history = model.fit(X_train,
                    y_train,
                    batch_size=BATCH_SIZE,
                    epochs=EPOCHS,
                    validation_data=(X_test, y_test))