In [23]:
import numpy as np
import keras 
import time
import math

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error



from keras.layers import Dropout
from keras.layers import Flatten
from keras.constraints import maxnorm
from keras.optimizers import SGD
from keras.optimizers import Nadam

from keras.layers.normalization import BatchNormalization
from keras.callbacks import ModelCheckpoint
from keras.layers import Activation
from keras.utils import np_utils
import matplotlib.pyplot as plt


from keras.models import load_model

In [2]:
# read the data set 
f = open('StockExchangeDataset.txt', 'r')
dataset = f.readlines()
f.close()

In [3]:
print (len(dataset))
print (dataset[0])
print (dataset[len(dataset) -1])

2522
1385.59

2663.42


In [4]:
dataset = np.array(dataset).reshape(-1, 1)
scaler = MinMaxScaler()
dataset = scaler.fit_transform(dataset)



In [5]:
# divide the data into testing and training. Here it takes 67% for training and the remaining 33% for testing 

train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]
print(len(train), len(test))


2017 505


In [6]:
#train = dataset[:2270]
#test = dataset[2270:]   # take 10% for testing 

In [7]:
def create_dataset(dataset, look_back = 5):   # the look_back is the time step or in other words is the # of previous time steps to use as input variables to predict the output correspondingly
    X, Y = [], []
    for i in range(len(dataset) - look_back-1):
        a = dataset[i:(i+look_back), 0]
        X.append(a)
        Y.append(dataset[i+look_back , 0])
    return np.array(X), np.array(Y)
    

In [8]:

np.random.seed(7)

In [9]:
look_back = 5
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)

print(trainX)

[[0.32283708 0.33365053 0.33572671 0.33280822 0.33771183]
 [0.33365053 0.33572671 0.33280822 0.33771183 0.3260151 ]
 [0.33572671 0.33280822 0.33771183 0.3260151  0.3283417 ]
 ...
 [0.64423086 0.64427639 0.64255079 0.64433102 0.64590182]
 [0.64427639 0.64255079 0.64433102 0.64590182 0.63709626]
 [0.64255079 0.64433102 0.64590182 0.63709626 0.63231103]]


In [10]:
# LSTM requires the train and test to be in a specific format which is [samples, time steps, features]
# Therefore we will reshape 
trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1]))


In [11]:
model = Sequential()
model.add(LSTM(32, input_shape=(1, look_back)))
model.add(LSTM(16, input_shape=(1, look_back)))

model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')


In [12]:
model.fit(trainX, trainY, epochs=100, batch_size=1, verbose=2)

Epoch 1/100
 - 49s - loss: 0.0050
Epoch 2/100
 - 21s - loss: 1.6430e-04
Epoch 3/100
 - 17s - loss: 1.5311e-04
Epoch 4/100
 - 13s - loss: 1.4517e-04
Epoch 5/100
 - 13s - loss: 1.3135e-04
Epoch 6/100
 - 14s - loss: 1.2241e-04
Epoch 7/100
 - 17s - loss: 1.1883e-04
Epoch 8/100
 - 23s - loss: 1.1144e-04
Epoch 9/100
 - 18s - loss: 1.0391e-04
Epoch 10/100
 - 18s - loss: 1.0472e-04
Epoch 11/100
 - 18s - loss: 1.0250e-04
Epoch 12/100
 - 18s - loss: 1.0165e-04
Epoch 13/100
 - 18s - loss: 9.9872e-05
Epoch 14/100
 - 18s - loss: 9.6322e-05
Epoch 15/100
 - 14s - loss: 9.8375e-05
Epoch 16/100
 - 12s - loss: 9.8147e-05
Epoch 17/100
 - 12s - loss: 9.4462e-05
Epoch 18/100
 - 12s - loss: 8.7300e-05
Epoch 19/100
 - 22s - loss: 9.4025e-05
Epoch 20/100
 - 16s - loss: 9.1544e-05
Epoch 21/100
 - 12s - loss: 8.8948e-05
Epoch 22/100
 - 12s - loss: 8.8002e-05
Epoch 23/100
 - 12s - loss: 8.5275e-05
Epoch 24/100
 - 12s - loss: 8.8937e-05
Epoch 25/100
 - 12s - loss: 8.5428e-05
Epoch 26/100
 - 14s - loss: 8.4792e-05

<keras.callbacks.History at 0x27c2cfd14a8>

In [13]:
model.save('keras_70.6%_100epochs_16neurons_1LSTMlayer_5lookback.h5')

In [26]:
keras.models.load_model('keras_70.6%_100epochs_16neurons_1LSTMlayer_5lookback.h5')

<keras.models.Sequential at 0x27c3b6dedd8>

In [14]:
print (trainY.shape)

(2011,)


In [28]:
# ezay a7adedlo eno y predict 5 days ????

trainPredict = model.predict(trainX)
testPredict = model.predict(testX)

trainPredict = scaler.inverse_transform(trainPredict)
trainY = scaler.inverse_transform([trainY])
testPredict = scaler.inverse_transform(testPredict)
testY = scaler.inverse_transform([testY])

trainError = math.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
testError = math.sqrt(mean_squared_error(testY[0], testPredict[:,0]))

print('Train Error: %.2f RMSE' % (trainError))
print('Test Error: %.2f RMSE' % (testError))
print("----------------------------------------")
print('Train Accuracy: %.2f RMSE' % (100-trainError))
print('Test Accuracy: %.2f RMSE' % (100-testError))


print(testPredict[-1::])   # figure out the syntax for predicting 5 days ?????

'''
# make predictions
trainPredict = model.predict(trainX)
testPredict = model.predict(testX)

print(trainPredict)
print (testPredict)

# invert predictions
trainPredict = scaler.inverse_transform(trainPredict)
print(trainY.shape)
trainY = scaler.inverse_transform(trainY)
print(trainY.shape)
testPredict = scaler.inverse_transform(testPredict)
testY = scaler.inverse_transform(testY)

# calculate root mean squared error
trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,0]))
print('Test Score: %.2f RMSE' % (testScore))

'''




'''
# shift train predictions for plotting
trainPredictPlot = np.empty_like(dataset)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict

# shift test predictions for plotting
testPredictPlot = np.empty_like(dataset)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(trainPredict)+(look_back*2)+1:len(dataset)-1, :] = testPredict

# plot baseline and predictions
plt.plot(scaler.inverse_transform(dataset))
plt.plot(trainPredictPlot)
plt.plot(testPredictPlot)
plt.show()

'''

ValueError: Found array with dim 3. Estimator expected <= 2.

In [None]:
'''
model = Sequential()

model.add(LSTM(
    input_dim=1,
    output_dim=50,
    return_sequences=True))
model.add(Dropout(0.2))

model.add(LSTM(
    100,
    return_sequences=False))
model.add(Dropout(0.2))

model.add(Dense(
    output_dim=1))
model.add(Activation('linear'))

start = time.time()
model.compile(loss='mse', optimizer='rmsprop')
print ('compilation time : ', time.time() - start)
'''

In [None]:
'''
trainPredict = models.predict(X_train)
testPredict = models.predict(X_test)

trainPredict = sc.inverse_transform(trainPredict)
trainY = sc.inverse_transform([y_train])
testPredict = sc.inverse_transform(testPredict)
testY = sc.inverse_transform([y_test])

trainError = math.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
testError = math.sqrt(mean_squared_error(testY[0], testPredict[:,0]))

print('Train Error: %.2f RMSE' % (trainError))
print('Test Error: %.2f RMSE' % (testError))
print("----------------------------------------")
print('Train Accuracy: %.2f RMSE' % (100-trainError))
print('Test Accuracy: %.2f RMSE' % (100-testError))

'''



