CSE543 Algorithms for Nonlinear Optimization Course Project: 
RNN/LSTM Notebook

After adding APPL.csv to the notebook's directory, all cells can be rerun in current order to reproduce the results described in our paper for the RNN/LSTM model. 

In [None]:
# import libaries 
import numpy as np
import pandas as pd
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, roc_auc_score, f1_score, precision_score, recall_score, confusion_matrix
from tensorflow import keras
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from keras.utils import to_categorical

In [None]:
# Functions to calculate RSI and MACD
def rsi(data, periods=14):
    delta = data['Close'].diff()
    gain, loss = delta.copy(), delta.copy()
    gain[gain < 0] = 0
    loss[loss > 0] = 0

    avg_gain = gain.rolling(window=periods).mean()
    avg_loss = -loss.rolling(window=periods).mean()

    rs = avg_gain / avg_loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

def macd(data, short=12, long=26, signal=9):
    exp1 = data['Close'].ewm(span=short, adjust=False).mean()
    exp2 = data['Close'].ewm(span=long, adjust=False).mean()
    macd = exp1 - exp2
    signal_line = macd.ewm(span=signal, adjust=False).mean()
    return macd, signal_line

In [None]:
# Load data and preprocess
#add APPL.csv to same directory as notebook
#data = pd.read_csv('/spy.csv')
data = pd.read_csv('/AAPL.csv')

#feature engineering
data['Average'] = (data['Open'] + data['Close'] + data['High'] + data['Low']) / 4
data['HL_PCT'] = (data['High'] - data['Low']) / data['Low']
data['PCT_change'] = (data['Close'] - data['Open']) / data['Open']
data['Volume_pct_change'] = data['Volume'].pct_change()
data['RSI'] = rsi(data)
data['MACD'], data['Signal'] = macd(data)

#target
forecast_out = 5
data['Target'] = data['Close'].shift(-forecast_out)

# Calculate the price direction (1 for gain, 0 for loss)
data['Price_Direction'] = (data['Target'] > data['Close']).astype(int)

# Remove rows with missing target values (last 5 days)
data.dropna(inplace=True)

# Split the data into features (X) and target (y) variables
X1 = data.drop(['Date', 'Target', 'Price_Direction'], axis=1).to_numpy()
y1 = data['Price_Direction'].to_numpy()

scaler = StandardScaler()
X1 = scaler.fit_transform(X1)

# Split data into train and test sets
tscv = TimeSeriesSplit(n_splits=5)

In [None]:
#Setup LSTM timesteps
X = []
y = []

for i in range(5, len(X1)):
    X.append(X1[i-5:i,:])
    y.append(y1[i])
X, y = np.array(X), np.array(y)

In [None]:
#ADAM with 0.001 initial learning rate
for i, (train_index, test_index) in enumerate(tscv.split(X)):
  X_train = X[train_index, :]
  X_test = X[test_index, :]
  y_train = y[train_index]
  y_test = y[test_index]

  # Scale the features

  model = tf.keras.models.Sequential([
    tf.keras.layers.LSTM(50, return_sequences=True),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.LSTM(50, return_sequences=True),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.LSTM(50, return_sequences=True),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.LSTM(50, return_sequences=True),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.LSTM(50, return_sequences=False),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(1, activation='sigmoid')
  ])

  # Compile the model
  model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

  # Train the model
  model.fit(X_train, y_train, epochs=50)

  loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
  print(i)
  print(accuracy)
  yprob = model.predict(X_test)
  ypred = np.where(yprob > 0.5, 1, 0)
  print(np.count_nonzero(ypred < 0.5))
  print(np.count_nonzero(ypred > 0.5))
  accuracy = accuracy_score(y_test, ypred)
  roc_auc = roc_auc_score(y_test, yprob)
  f1 = f1_score(y_test, ypred)
  precision = precision_score(y_test, ypred)
  recall = recall_score(y_test, ypred)
  conf_matrix = confusion_matrix(y_test, ypred)
  print(accuracy)
  print(roc_auc)
  print(f1)
  print(precision)
  print(recall)
  print(conf_matrix)

In [None]:
#ADAM with 0.0001 initial learning rate
for i, (train_index, test_index) in enumerate(tscv.split(X)):
  X_train = X[train_index, :]
  X_test = X[test_index, :]
  y_train = y[train_index]
  y_test = y[test_index]

  # Scale the features

  model = tf.keras.models.Sequential([
    tf.keras.layers.LSTM(50, return_sequences=True),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.LSTM(50, return_sequences=True),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.LSTM(50, return_sequences=True),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.LSTM(50, return_sequences=True),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.LSTM(50, return_sequences=False),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(1, activation='sigmoid')
  ])

  # Compile the model
  model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])

  # Train the model
  model.fit(X_train, y_train, epochs=50)

  loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
  print(i)
  print(accuracy)
  yprob = model.predict(X_test)
  ypred = np.where(yprob > 0.5, 1, 0)
  print(np.count_nonzero(ypred < 0.5))
  print(np.count_nonzero(ypred > 0.5))
  accuracy = accuracy_score(y_test, ypred)
  roc_auc = roc_auc_score(y_test, yprob)
  f1 = f1_score(y_test, ypred)
  precision = precision_score(y_test, ypred)
  recall = recall_score(y_test, ypred)
  conf_matrix = confusion_matrix(y_test, ypred)
  print(accuracy)
  print(roc_auc)
  print(f1)
  print(precision)
  print(recall)
  print(conf_matrix)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
0
0.5379477739334106
1134
513
0.5379477838494232
0.5396024852783252
0.4239212717638153
0.5458089668615984
0.3465346534653465
[[606 233]
 [528 280]]
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 

In [None]:
#SGD with 0.001 initial learning rate
for i, (train_index, test_index) in enumerate(tscv.split(X)):
  X_train = X[train_index, :]
  X_test = X[test_index, :]
  y_train = y[train_index]
  y_test = y[test_index]

  # Scale the features

  model = tf.keras.models.Sequential([
    tf.keras.layers.LSTM(50, return_sequences=True),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.LSTM(50, return_sequences=True),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.LSTM(50, return_sequences=True),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.LSTM(50, return_sequences=True),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.LSTM(50, return_sequences=False),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(1, activation='sigmoid')
  ])

  # Compile the model
  model.compile(optimizer=keras.optimizers.SGD(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

  # Train the model
  model.fit(X_train, y_train, epochs=50)

  loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
  print(i)
  print(accuracy)
  yprob = model.predict(X_test)
  ypred = np.where(yprob > 0.5, 1, 0)
  print(np.count_nonzero(ypred < 0.5))
  print(np.count_nonzero(ypred > 0.5))
  accuracy = accuracy_score(y_test, ypred)
  roc_auc = roc_auc_score(y_test, yprob)
  f1 = f1_score(y_test, ypred)
  precision = precision_score(y_test, ypred)
  recall = recall_score(y_test, ypred)
  conf_matrix = confusion_matrix(y_test, ypred)
  print(accuracy)
  print(roc_auc)
  print(f1)
  print(precision)
  print(recall)
  print(conf_matrix)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
0
0.5094110369682312
1647
0
0.509411050394657
0.49152323605423714
0.0
0.0
0.0
[[839   0]
 [808   0]]
Epoch 1/50


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
1
0.5003035664558411
1647
0
0.5003035822707954
0.5464401786030271
0.0
0.0
0.0
[[824   0]
 [823   0]]
Epoch 1/50


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
2
0.4517304301261902
1647
0
0.4517304189435337
0.46222493123280817
0.0
0.0
0.0
[[744   0]
 [903   0]]
Epoch 1/50


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
3
0.5774134993553162
0
1647
0.5774134790528234
0.5406158973615189
0.7321016166281755
0.5774134790528234
1.0
[[  0 696]
 [  0 951]]
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/5

In [None]:
#SGD with 0.01 initial learning rate
for i, (train_index, test_index) in enumerate(tscv.split(X)):
  X_train = X[train_index, :]
  X_test = X[test_index, :]
  y_train = y[train_index]
  y_test = y[test_index]

  # Scale the features

  model = tf.keras.models.Sequential([
    tf.keras.layers.LSTM(50, return_sequences=True),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.LSTM(50, return_sequences=True),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.LSTM(50, return_sequences=True),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.LSTM(50, return_sequences=True),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.LSTM(50, return_sequences=False),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(1, activation='sigmoid')
  ])

  # Compile the model
  model.compile(optimizer=keras.optimizers.SGD(learning_rate=0.01), loss='binary_crossentropy', metrics=['accuracy'])

  # Train the model
  model.fit(X_train, y_train, epochs=50)

  loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
  print(i)
  print(accuracy)
  yprob = model.predict(X_test)
  ypred = np.where(yprob > 0.5, 1, 0)
  print(np.count_nonzero(ypred < 0.5))
  print(np.count_nonzero(ypred > 0.5))
  accuracy = accuracy_score(y_test, ypred)
  roc_auc = roc_auc_score(y_test, yprob)
  f1 = f1_score(y_test, ypred)
  precision = precision_score(y_test, ypred)
  recall = recall_score(y_test, ypred)
  conf_matrix = confusion_matrix(y_test, ypred)
  print(accuracy)
  print(roc_auc)
  print(f1)
  print(precision)
  print(recall)
  print(conf_matrix)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
0
0.5094110369682312
1647
0
0.509411050394657
0.532109624848063
0.0
0.0
0.0
[[839   0]
 [808   0]]
Epoch 1/50


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
1
0.5003035664558411
1647
0
0.5003035822707954
0.517392855878918
0.0
0.0
0.0
[[824   0]
 [823   0]]
Epoch 1/50


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
2
0.4517304301261902
1647
0
0.4517304189435337
0.45668708843877637
0.0
0.0
0.0
[[744   0]
 [903   0]]
Epoch 1/50


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
3
0.5774134993553162
0
1647
0.5774134790528234
0.5425263183339981
0.7321016166281755
0.5774134790528234
1.0
[[  0 696]
 [  0 951]]
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/5