In [None]:
# This program utilizes a Long Short Term Memory reccurent neural network trained using inputs
# of High, Low, and Open daily stock prices. The neural network predicts the next day price using
# the testing portion of data after it has been trained with the previous actual movements 

# cd Desktop

# load libraries
import datetime as dt
import matplotlib.pyplot as plt
from matplotlib import style
import numpy as np
import pandas as pd
import pandas_datareader.data as web
import os
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.models import Sequential
import lstm, time # lstm, our 'library' of functions, of which we only utilize load_data_mult()

In [None]:
# Get our stock data
style.use('ggplot')

start = dt.datetime(2000, 1, 1)
end = dt.datetime(2016, 12, 31)

df = web.DataReader('MSFT', "yahoo", start, end)
# aquire price info from start date to end date
df.drop(df.columns[[3,4,5]], axis=1, inplace=True) # only utilizing open, high, and low
df.head()

# print(df.to_csv('MSFT_data.csv', index=False))

In [None]:
# "pre-normalization"
df['Low'] = df['Low'] / 1000
df['Open'] = df['Open'] / 1000
df['High'] = df['High'] / 1000
df.head(5)

In [None]:
# read our dataframe into an array of values and normalize them
# normalize formula: (price-initial_price)/1

# X_train, y_train, X_test, y_test = lstm.load_data_mult('MSFT_data.csv', 50, True) 
X_train, y_train, X_test, y_test = lstm.load_data_mult(df[::-1], 5)
print("X_train", X_train.shape)
print("y_train", y_train.shape)
print("X_test", X_test.shape)
print("y_test", y_test.shape)



In [None]:
# Build the reccurrent neural network: Long Short Term Memory (LSTM)

# In a Reccurrent NN, any hidden layer is not based solely on the input layer, but also
# on previous hidden layers, thus allowing it to use data from the present and recent past
# to predict outcomes

# The LSTM framework allows us to retain information from far into the past, wheras
# with a regular recurrent network, information decays as time moves on.
# It does so by using 'memory cells' rather than regular activation function neurons. 
# Within each cell, the input and output gates utilize the activation functions while an
# internal state feeds information into itself accross timesteps with constant weight of 1.
# Constant weight of 1 means that when gradients move through the cell, the error is multiplied
# by a weight of 1, and thus stays the same value with no decay. 
# Value is preserved in the memory cell.


model = Sequential() # linear stack of layers

# First layer: 50 units,
model.add(LSTM(
    input_dim=3, # Open, Low, High
    output_dim=50,
    return_sequences=True)) # This layer's output is fed into the next hidden layer
model.add(Dropout(0.2)) # 20% dropout

# Second layer: 100 units, returns a vector of predictions
model.add(LSTM(
    100,
    return_sequences=False)) # Its output is only fed to next layer at end of sequence
model.add(Dropout(0.2))

# Use the linear Dense layer to group the vector into a single value
model.add(Dense(
    output_dim=1))
model.add(Activation('linear'))

# Compile using mean square error loss function, and gradient decent optimizer (rmsprop)
start = time.time()
model.compile(loss='mse', optimizer='rmsprop', metrics=['accuracy'])
print('compilation time : ', time.time() - start)



In [None]:
# train model
model.fit(
    X_train,
    y_train,
    batch_size=500,
    nb_epoch=10,
    validation_split=0.1) # 10% for validation

In [None]:
diff=[]
ratio=[]
p = model.predict(X_test)
for u in range(len(y_test)):
    pr = p[u][0]
    ratio.append((y_test[u]/pr)-1) # denormalize
    diff.append(abs(y_test[u]- pr))

In [None]:
import matplotlib.pyplot as plt2

# View results. This graph shows the prediction of the price movement using the last 20%
# of the open, low, and high, daily price inputs

plt2.plot(p,color='green', label='prediction')
plt2.plot(y_test,color='blue', label='actual')
plt2.legend(loc='upper left')
plt2.show()