## Lets test models with other windows for the look-back for LSTM

### 100 Days

In [None]:
# Train test split
X_train = []
y_train = []
X_test = []
y_test = []
window = 100 # Window is the number of previous days data we will use for LSTM

# Select how much data we want to train / test on (75% for LSTM is appropriate)
training_size = round(len(close_scaled) * 0.75)
training_data = close_scaled[:training_size]
test_data = close_scaled[training_size:]
for i in range(window, len(training_data)):
    # Training will use the trailing 60 days (can tweak this parameter)
    X_train.append(close_scaled[i-window:i, 0])
    y_train.append(close_scaled[i, 0])

for i in range(len(training_data)+window, len(close_scaled)):
    # Testing will use the trailing 60 days (can tweak this parameter)
    X_test.append(close_scaled[i-window:i, 0])
    y_test.append(close_scaled[i, 0])


In [None]:
X_train, y_train = np.array(X_train), np.array(y_train)
X_test, y_test = np.array(X_test), np.array(y_test)

In [None]:
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

In [None]:
# Keras is a NN library built on top of Tensorflow that has LSTM built-in
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM, Bidirectional

In [None]:
# Create the LSTM model with multiple dropout layers to prevent overfitting
model = Sequential()
model.add(Bidirectional(LSTM(50, return_sequences=True, input_shape=(X_train.shape[1],1))))
model.add(Dropout(0.2))
model.add(Bidirectional(LSTM(50, return_sequences=True)))
model.add(Dropout(0.2))
model.add(Bidirectional(LSTM(50, return_sequences=False))) # Final LSTM layer, only want one output
model.add(Dropout(0.2))
model.add(Dense(1)) # Denase layer holds the output prediction

model.compile(loss='mean_squared_error', optimizer='adam')
model.summary()

In [None]:
# Train the model on our training data
model.fit(X_train, y_train, epochs=100, batch_size=64, verbose=1)

In [None]:
import tensorflow as tf
import math
from sklearn.metrics import mean_squared_error

In [None]:
# Perform predictions
predicted_price = model.predict(X_test)

In [None]:
# Calculate RMSE for expected y values and the predictions from the model
math.sqrt(mean_squared_error(y_test, predicted_price))

In [None]:
# Transform predicted normalized values back to prices in dollars
predicted_price = scaler.inverse_transform(predicted_price)
predicted_price

In [None]:
# Create prediction df where the date is matched up with the predicted closing price for test data
prediction_df = pd.DataFrame(predicted_price, columns=['Close'])
prediction_df['Date'] = aapl.iloc[training_size+window:]['Date'].values
prediction_df['Date'] = pd.to_datetime(prediction_df['Date'])
prediction_df.head()

In [None]:
# Create new dataframe with the Date as the index (so that dates appear on x-axis of plots)
aapl_dateind = aapl
aapl_dateind['Date'] = pd.to_datetime(aapl_dateind['Date'])
aapl_dateind = aapl_dateind.set_index('Date')
aapl_dateind

In [None]:
# Set index of prediction dataframe to the Date so that it can be plotted with the entire dataset
prediction_df
prediction = prediction_df.set_index(['Date'])
prediction

In [None]:
# Same as window for training above
look_back = window

# Create the plot for model's predictions on the test data 
plt.figure(figsize=(18,9))
# Plot the actual price of the test data
plt.plot(aapl_dateind[training_size+look_back:len(close_scaled)]['Close'], label='Validation')
# Plot the prediction on test data
plt.plot(prediction['Close'], label='Prediction')
plt.plot(aapl_dateind[:training_size+look_back]['Close'], label='Historical')
plt.legend()
plt.ylabel('Price')
plt.xlabel('Date')
plt.show()

In [None]:
model.save('../models/LSTMmodel100')

### 30 days

In [None]:
# Train test split
X_train = []
y_train = []
X_test = []
y_test = []
window = 30 # Window is the number of previous days data we will use for LSTM

# Select how much data we want to train / test on (75% for LSTM is appropriate)
training_size = round(len(close_scaled) * 0.75)
training_data = close_scaled[:training_size]
test_data = close_scaled[training_size:]
for i in range(window, len(training_data)):
    # Training will use the trailing 60 days (can tweak this parameter)
    X_train.append(close_scaled[i-window:i, 0])
    y_train.append(close_scaled[i, 0])

for i in range(len(training_data)+window, len(close_scaled)):
    # Testing will use the trailing 60 days (can tweak this parameter)
    X_test.append(close_scaled[i-window:i, 0])
    y_test.append(close_scaled[i, 0])


In [None]:
X_train, y_train = np.array(X_train), np.array(y_train)
X_test, y_test = np.array(X_test), np.array(y_test)
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

In [None]:
# Create the LSTM model with multiple dropout layers to prevent overfitting
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(X_train.shape[1],1)))
model.add(Dropout(0.2))
model.add(LSTM(50, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(50))
model.add(Dropout(0.2))
model.add(Dense(1))

model.compile(loss='mean_squared_error', optimizer='adam')
model.summary()

In [None]:
# Train the model on our training data
model.fit(X_train, y_train, epochs=100, batch_size=64, verbose=1)

In [None]:
# Perform predictions
predicted_price = model.predict(X_test)

In [None]:
# Calculate RMSE for expected y values and the predictions from the model
math.sqrt(mean_squared_error(y_test, predicted_price))

In [None]:
# Transform predicted normalized values back to prices in dollars
predicted_price = scaler.inverse_transform(predicted_price)
predicted_price

In [None]:
# Create prediction df where the date is matched up with the predicted closing price for test data
prediction_df = pd.DataFrame(predicted_price, columns=['Close'])
prediction_df['Date'] = aapl.iloc[training_size+window:]['Date'].values
prediction_df['Date'] = pd.to_datetime(prediction_df['Date'])
prediction_df.head()

In [None]:
# Create new dataframe with the Date as the index (so that dates appear on x-axis of plots)
aapl_dateind = aapl
aapl_dateind['Date'] = pd.to_datetime(aapl_dateind['Date'])
aapl_dateind = aapl_dateind.set_index('Date')
aapl_dateind

In [None]:
# Set index of prediction dataframe to the Date so that it can be plotted with the entire dataset
prediction_df
prediction = prediction_df.set_index(['Date'])
prediction

In [None]:
# Same as window for training above
look_back = window

# Create the plot for model's predictions on the test data 
plt.figure(figsize=(18,9))
# Plot the actual price of the test data
plt.plot(aapl_dateind[training_size+look_back:len(close_scaled)]['Close'], label='Validation')
# Plot the prediction on test data
plt.plot(prediction['Close'], label='Prediction')
plt.plot(aapl_dateind[:training_size+look_back]['Close'], label='Historical')
plt.legend()
plt.ylabel('Price')
plt.xlabel('Date')
plt.show()

In [None]:
model.save('../models/LSTMmodel30')

## Now compare the three models based on RMSE

In [None]:
model60 = tf.keras.models.load_model('../models/LSTMmodel')
model100 = tf.keras.models.load_model('../models/LSTMmodel100')
model30 = tf.keras.models.load_model('../models/LSTMmodel30')

### 30 day model

In [None]:
# Train test split
X_train_30 = []
y_train_30 = []
X_test_30 = []
y_test_30 = []
window = 30 # Window is the number of previous days data we will use for LSTM

# Select how much data we want to train / test on (75% for LSTM is appropriate)
training_size = round(len(close_scaled) * 0.75)
training_data = close_scaled[:training_size]
test_data = close_scaled[training_size:]
for i in range(window, len(training_data)):
    # Training will use the trailing 60 days (can tweak this parameter)
    X_train_30.append(close_scaled[i-window:i, 0])
    y_train_30.append(close_scaled[i, 0])

for i in range(len(training_data)+window, len(close_scaled)):
    # Testing will use the trailing 60 days (can tweak this parameter)
    X_test_30.append(close_scaled[i-window:i, 0])
    y_test_30.append(close_scaled[i, 0])


In [None]:
X_train_30, y_train_30 = np.array(X_train_30), np.array(y_train_30)
X_test_30, y_test_30 = np.array(X_test_30), np.array(y_test_30)
X_train_30 = np.reshape(X_train_30, (X_train_30.shape[0], X_train_30.shape[1], 1))
X_test_30 = np.reshape(X_test_30, (X_test_30.shape[0], X_test_30.shape[1], 1))

### 60 day model

In [None]:
# Train test split
X_train_60 = []
y_train_60 = []
X_test_60 = []
y_test_60 = []
window = 60 # Window is the number of previous days data we will use for LSTM

# Select how much data we want to train / test on (75% for LSTM is appropriate)
training_size = round(len(close_scaled) * 0.75)
training_data = close_scaled[:training_size]
test_data = close_scaled[training_size:]
for i in range(window, len(training_data)):
    # Training will use the trailing 60 days (can tweak this parameter)
    X_train_60.append(close_scaled[i-window:i, 0])
    y_train_60.append(close_scaled[i, 0])

for i in range(len(training_data)+window, len(close_scaled)):
    # Testing will use the trailing 60 days (can tweak this parameter)
    X_test_60.append(close_scaled[i-window:i, 0])
    y_test_60.append(close_scaled[i, 0])


In [None]:
X_train_60, y_train_60 = np.array(X_train_60), np.array(y_train_60)
X_test_60, y_test_60 = np.array(X_test_60), np.array(y_test_60)
X_train_60 = np.reshape(X_train_60, (X_train_60.shape[0], X_train_60.shape[1], 1))
X_test_60 = np.reshape(X_test_60, (X_test_60.shape[0], X_test_60.shape[1], 1))

### 100 day model

In [None]:
# Train test split
X_train_100 = []
y_train_100 = []
X_test_100 = []
y_test_100 = []
window = 100 # Window is the number of previous days data we will use for LSTM

# Select how much data we want to train / test on (75% for LSTM is appropriate)
training_size = round(len(close_scaled) * 0.75)
training_data = close_scaled[:training_size]
test_data = close_scaled[training_size:]
for i in range(window, len(training_data)):
    # Training will use the trailing 60 days (can tweak this parameter)
    X_train_100.append(close_scaled[i-window:i, 0])
    y_train_100.append(close_scaled[i, 0])

for i in range(len(training_data)+window, len(close_scaled)):
    # Testing will use the trailing 60 days (can tweak this parameter)
    X_test_100.append(close_scaled[i-window:i, 0])
    y_test_100.append(close_scaled[i, 0])


In [None]:
X_train_100, y_train_100 = np.array(X_train_100), np.array(y_train_100)
X_test_100, y_test_100 = np.array(X_test_100), np.array(y_test_100)
X_train_100 = np.reshape(X_train_100, (X_train_100.shape[0], X_train_100.shape[1], 1))
X_test_100 = np.reshape(X_test_100, (X_test_100.shape[0], X_test_100.shape[1], 1))

In [None]:
# Perform predictions
predicted_price_60 = model60.predict(X_test_60)
predicted_price_100 = model100.predict(X_test_100)
predicted_price_30 = model30.predict(X_test_30)

In [None]:
# Calculate RMSE for expected y values and the predictions from the model
math.sqrt(mean_squared_error(y_test_30, predicted_price_30))

In [None]:
math.sqrt(mean_squared_error(y_test_60, predicted_price_60))

In [None]:
math.sqrt(mean_squared_error(y_test_100, predicted_price_100))