In [None]:
'''
Deep Learning Stock Price Predictor 
Author: Gil Akos
Objective: Create Deep Learning Model with Keras LSTM
Reference: http://machinelearningmastery.com/time-series-prediction-lstm-recurrent-neural-networks-python-keras/

SET UP A BASIC LSTM MODEL 
'''

In [None]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

# Import required libraries for Machine Learning
import math
import numpy as np
# Fix random seed for reproducibility
np.random.seed(7)
import pandas as pd

# Import Keras Libraries
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.metrics import mean_squared_error

# Import required libraries for Yahoo Finance API
import yahoo_finance as yf
from yahoo_finance import Share

# Import required libraries for visualization and printing
from pprint import pprint
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = (18, 12)
import matplotlib.dates as mdates

# Import utility libraries
import utilities as util
import datetime
import time

In [None]:
# Set Benchmark Dates based on Machine Learning for Trading course 1/1/09-6/18/15
date_start, date_end = '1995-01-01', '2015-06-18'#'2015-06-18'

# Set Additional Tickers
tick_0 = 'SPY'
tick_1 = 'GE'
tick_2 = 'MSFT'
tick_3 = 'AAPL'

# Set Baseline Model Ticker
base_ticker = tick_0

In [None]:
#Request Stock Data
stock_data = Share(base_ticker)
stock_historical = stock_data.get_historical(date_start,date_end)

#pprint(stock_historical)

#Define columns of data to keep from historical stock data
stock_item = []
stock_date = []
stock_close = []
stock_volume = []

#Loop through the stock data objects backwards and store factors we want to keep
i_counter = 0
for i in xrange(len(stock_historical)-1,-1,-1):
    d = stock_historical[i]
    stock_item.append(i_counter)
    stock_date.append(d['Date'])
    stock_close.append(d['Adj_Close'])
    stock_volume.append(d['Volume'])
    i_counter += 1

#Create a dataframe for stock data
stock_df = pd.DataFrame()
    
#Add factors to dataframe
stock_df['Item'] = stock_item
stock_df['Date'] = stock_date
stock_df['Adj_Close'] = pd.to_numeric(stock_close)
stock_df['Volume'] = pd.to_numeric(stock_volume)

#Print the dataframe head and tail
print(stock_df.head())
print("---")
print(stock_df.tail())

In [None]:
#Plot Basic Data
fig, ax = plt.subplots()
ax.plot(stock_df['Item'], stock_df['Adj_Close'], '#0A7388', label='Adjusted Close')

# format the coords message box
def price(x):
    return '$%1.2f' % x
#ax.format_xdata = mdates.DateFormatter('%Y-%m-%d')
ax.format_ydata = price

# Add labels
plt.ylabel('Price USD')
plt.xlabel('Trading Days')
# Add title and legend
ax.set_title(base_ticker + ' Trading')
ax.legend(loc='upper left')

plt.show()

In [None]:
#Create numpy arrays for features and targets
X_day_all = []
y_close_all = []

#Convert dataframe columns to numpy arrays for scikit learn
for index, row in stock_df.iterrows():
    #print([np.array(row['Item'])])
    X_day_all.append([(row['Item'])])
    y_close_all.append([(row['Adj_Close'])])

#pprint(X_day_all)
pprint(y_close_all)

In [None]:
# Regularize the feature and target arrays and store min/max of input data for rescaling later
X_day_bounds = [min(X_day_all), max(X_day_all)]
X_day_bounds = [X_day_bounds[0][0], X_day_bounds[1][0]]
y_close_bounds = [min(y_close_all), max(y_close_all)]
y_close_bounds = [y_close_bounds[0][0], y_close_bounds[1][0]]

X_day_scaled, X_day_range = util.scale_range(np.array(X_day_all), input_range=X_day_bounds)
y_close_scaled, y_close_range = util.scale_range(np.array(y_close_all), input_range=y_close_bounds)

pprint(y_close_scaled)
print(y_close_range)

In [None]:
# Create datasets
tt_split = .315
tti_split = int(math.floor(len(stock_df['Item'])*tt_split))

# Set up training and test sets
X_day_train = X_day_scaled[:-tti_split]
X_day_test = X_day_scaled[-tti_split:]
print(len(X_day_train))
print(len(X_day_test))

y_close_train = y_close_scaled[:-tti_split]
y_close_test = y_close_scaled[-tti_split:]

#pprint(X_train)
#pprint(X_test)

# Create dataframe for all training, testing, and predicted sets
results_df = pd.DataFrame(columns=['Item', 'Close', 'Close_Scaled', 'Close_Train', 'Close_Test', 'Close_Test_Scaled', 'Close_Predicted', 'Close_Predicted_Scaled'])
results_df['Item'] = stock_df['Item']
results_df['Close'] = stock_df['Adj_Close']
results_df['Close_Scaled'] = y_close_scaled.flatten()
results_df['Close_Train'][:-tti_split] = y_close_train.flatten()
results_df['Close_Test'][-tti_split:] = stock_df['Adj_Close'][-tti_split:]
results_df['Close_Test_Scaled'][-tti_split:] = y_close_test.flatten()

pprint(results_df)


In [None]:
# Convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back), 0]
        dataX.append(a)
        dataY.append(dataset[i + look_back, 0])
    return np.array(dataX), np.array(dataY)

In [None]:
# Reshape into t and t+1
look_back = 3
#trainX, trainY = create_dataset(X_train, look_back)
#testX, testY = create_dataset(y_test, look_back)

X_day_train_t, X_day_train_t1 = create_dataset(X_day_train, look_back)
y_close_test_t, y_close_test_t1 = create_dataset(y_close_test, look_back)

pprint(X_day_train_t)
print('--')
#pprint(trainY)

# Reshape input to be [samples, time steps, features]
X_day_train_t = np.reshape(X_day_train_t, (X_day_train_t.shape[0], X_day_train_t.shape[1], 1))
y_close_test_t = np.reshape(y_close_test_t, (y_close_test_t.shape[0], y_close_test_t.shape[1], 1))

pprint(X_day_train_t)
print('--')
#pprint(testX)

In [None]:
# Create Keras Model
# http://machinelearningmastery.com/time-series-prediction-lstm-recurrent-neural-networks-python-keras/

# Create and fit the LSTM network
batch_size = 1
epochs = 20 # 200
model = Sequential()
model.add(LSTM(8, batch_input_shape=(batch_size, look_back, 1)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(X_day_train_t, X_day_train_t1, nb_epoch=epochs, batch_size=batch_size, verbose=2, shuffle=False)


In [None]:
# Generate predictions for training
trainPredict = model.predict(X_day_train_t, batch_size=batch_size)
testPredict = model.predict(y_close_test_t, batch_size=batch_size)
trainPredict_rescaled, r1 = util.scale_range(np.array(trainPredict), input_range=[0.0, 1.0], target_range=y_close_bounds)
testPredict_rescaled, r2 = util.scale_range(np.array(testPredict), input_range=[0.0, 1.0], target_range=y_close_bounds)

# shift train predictions for plotting
trainPredictPlot = np.empty_like(X_day_scaled)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[look_back:len(trainPredict_rescaled)+look_back, :] = trainPredict_rescaled
 
# shift test predictions for plotting
testPredictPlot = np.empty_like(X_day_scaled)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(trainPredict_rescaled)+(look_back*2)+1:len(X_day_scaled)-1, :] = testPredict_rescaled

# Create actual values in aligned array
yPlot = np.empty_like(y_close_scaled)
yPlot[:, :] = np.nan
yPlot[-len(testPredict_rescaled):, :] = y_close_all[-len(testPredict_rescaled):]

# Set up plot
fig = plt.figure()
ax = fig.add_subplot(111)

# Add labels
plt.ylabel('Price USD')
plt.xlabel('Trading Days')
# Plot actual and predicted close values

plt.plot(yPlot,'#5F5F5F', label='Adjusted Close')
plt.plot(testPredictPlot, '#0C91AB', label='Predicted Close')

# Set title
ax.set_title(base_ticker + ' Trading vs Prediction')
ax.legend(loc='upper left')

plt.show()

In [None]:
# The mean squared error
testScore = model.evaluate(y_close_test_t, y_close_test_t1, batch_size=batch_size, verbose=0)
print('Test Score: %.4f MSE (%.4f RMSE)' % (testScore, math.sqrt(testScore)))

# Log of Results:
Lookback: 3

### SPY
Test Score: 0.0002 MSE (0.0149 RMSE)


### GE
Test Score: 0.0003 MSE (0.0165 RMSE)


### MSFT
Test Score: 0.0004 MSE (0.0206 RMSE)


### AAPL
Test Score: 0.0003 MSE (0.0186 RMSE)
