In [32]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dense
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
from tensorflow.keras.layers import LSTM
from sklearn import linear_model
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from keras.wrappers.scikit_learn import KerasClassifier

In [33]:
from sklearn import metrics

In [34]:
import keras
import keras.utils
from keras import utils as np_utils

## DATA

In [35]:
df = pd.read_csv("dummy_data.csv")
df.drop(['Open', 'High', 'Low', 'Adj Close', 'Volume', 'Date'], axis=1, inplace=True)

# plt.figure(figsize = (12,7))
# plt.plot(df['Date'], df['Close'])
# plt.grid(ls = 'dotted')

In [36]:
df

Unnamed: 0,Close
0,29.334999
1,29.070000
2,28.799999
3,29.757500
4,29.570000
...,...
1758,151.070007
1759,148.110001
1760,144.220001
1761,141.169998


In [37]:
tf.random.set_seed(7)
dataset = df.values
dataset = dataset.astype('float32')

## Normalization

In [38]:
# normalizing the dataset
scaler = MinMaxScaler(feature_range=(0, 1))
dataset = scaler.fit_transform(dataset)
dataset

array([[0.04233965],
       [0.04067743],
       [0.03898384],
       ...,
       [0.7629607 ],
       [0.74382937],
       [0.78685904]], dtype=float32)

## Train/Test Sets

In [39]:
train_size = int(len(dataset) * 0.7)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]
print("Train Size:", len(train), "\nTest Size:", len(test))

Train Size: 1234 
Test Size: 529


In [40]:
#building the dataset based on the number of previous days we are considering
def create_dataset(dataset, look_back=1):
	dataX, dataY = [], []
	for i in range(len(dataset)-look_back-1):
		a = dataset[i:(i+look_back), 0]
		dataX.append(a)
		dataY.append(dataset[i + look_back, 0])
	return np.array(dataX), np.array(dataY)

In [41]:
look_back = 4    #number of days from today we are looking back into past

In [42]:
# reshape into X=t and Y=t+1
train_X, train_Y = create_dataset(train, look_back)
test_X, test_Y = create_dataset(test, look_back)

In [43]:
# reshape input to be [samples, time steps, features]
train_X = np.reshape(train_X, (train_X.shape[0], 1, train_X.shape[1]))
test_X =  np.reshape(test_X, (test_X.shape[0], 1, test_X.shape[1]))

In [44]:
train_X

array([[[0.04233965, 0.04067743, 0.03898384, 0.04498981]],

       [[0.04067743, 0.03898384, 0.04498981, 0.04381371]],

       [[0.03898384, 0.04498981, 0.04381371, 0.04373531]],

       ...,

       [[0.61850405, 0.6154932 , 0.6048926 , 0.58582413]],

       [[0.6154932 , 0.6048926 , 0.58582413, 0.5954211 ]],

       [[0.6048926 , 0.58582413, 0.5954211 , 0.5914067 ]]], dtype=float32)

## LSTM Model

In [45]:
# create and fit the LSTM network
model = Sequential()
model.add(LSTM(4, input_shape=(1, look_back)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')

## Training the Model

In [15]:
model.fit(train_X, train_Y, epochs=10, batch_size=1, verbose=2) #epchs=10 just to see if it works or not

Epoch 1/10
1229/1229 - 4s - loss: 0.0041 - 4s/epoch - 3ms/step
Epoch 2/10
1229/1229 - 3s - loss: 1.0151e-04 - 3s/epoch - 2ms/step
Epoch 3/10
1229/1229 - 3s - loss: 9.3950e-05 - 3s/epoch - 2ms/step
Epoch 4/10
1229/1229 - 3s - loss: 9.8440e-05 - 3s/epoch - 2ms/step
Epoch 5/10
1229/1229 - 3s - loss: 9.5926e-05 - 3s/epoch - 2ms/step
Epoch 6/10
1229/1229 - 3s - loss: 9.8863e-05 - 3s/epoch - 2ms/step
Epoch 7/10
1229/1229 - 3s - loss: 9.2906e-05 - 3s/epoch - 2ms/step
Epoch 8/10
1229/1229 - 3s - loss: 9.5691e-05 - 3s/epoch - 2ms/step
Epoch 9/10
1229/1229 - 3s - loss: 9.2624e-05 - 3s/epoch - 2ms/step
Epoch 10/10
1229/1229 - 3s - loss: 9.3982e-05 - 3s/epoch - 2ms/step


<keras.callbacks.History at 0x7f12243a8430>

## Prediction on Test Set

In [15]:
# make predictions
train_Predict = model.predict(train_X)
test_Predict = model.predict(test_X)

# invert predictions
train_Predict = scaler.inverse_transform(train_Predict)
train_Y = scaler.inverse_transform([train_Y])
test_Predict = scaler.inverse_transform(test_Predict)
test_Y = scaler.inverse_transform([test_Y])

# calculate root mean squared error
train_Score = np.sqrt(mean_squared_error(train_Y[0], train_Predict[:,0]))
print('Train Score: %.2f RMSE' % (train_Score))
test_Score = np.sqrt(mean_squared_error(test_Y[0], test_Predict[:,0]))
print('Test Score: %.2f RMSE' % (test_Score))

Train Score: 1.62 RMSE
Test Score: 9.73 RMSE


## Tuning the Hyper Paramters of the Model

## RandomizedSearch

In [75]:
epochs=10 # number of epochs
LSTM_units=4 # number of lstm units
# num_samples=1 # number of samples 
look_back=4 # time_steps
# num_features_2_lstm= train_X.shape[1] # numer of features
dropout_rate=0 #0.2 is a good value # Regularization
recurrent_dropout=0
verbose=2
tscv = 4   #k-fold corss validation paramter
# scoring_lstm = 'mean_squared_error'  

# hyperparameters

param_random_search = {'batch_size':[10,100], 'epochs':[1, 10]}

# param_random_search ={'look_back':[1, 5 , 10],
#                       'LSTM_units':[1, 5, 10],
#                       'dropout_rate':[0, 0.2, 0.4]}

In [76]:
# def create_LSTM(epochs=1,LSTM_units=1,num_samples=1,look_back=4,num_features=None,dropout_rate=0,recurrent_dropout=0,verbose=0):
#     model=Sequential()
#     model.add(LSTM(units=LSTM_units,
#                    input_shape=(1, look_back), 
#                 #    batch_input_shape=(num_samples, look_back, num_features), 
#                 #    stateful=True, 
#                    recurrent_dropout=recurrent_dropout)) 
#     model.add(Dropout(dropout_rate))
#     model.add(Dense(1, activation='sigmoid', kernel_initializer=keras.initializers.he_normal(seed=1)))
#     model.compile(loss='binary_crossentropy', optimizer="adam", metrics=['accuracy'])
#     return model



def create_LSTM(epochs=1,LSTM_units=1,num_samples=1,look_back=4,num_features=None,dropout_rate=0,recurrent_dropout=0,verbose=0):
# create and fit the LSTM network
    model = Sequential()
    model.add(LSTM(4, input_shape=(1, look_back)))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy'])
    return(model)



# Wrapping the Classifier
wrapped_lstm =KerasClassifier(build_fn=create_LSTM, 
                           epochs=epochs,
                           LSTM_units=LSTM_units, 
                        #    num_samples=num_samples, 
                           look_back=look_back, 
                        #    num_features=num_features_2_lstm, 
                           dropout_rate=dropout_rate,
                           recurrent_dropout=recurrent_dropout,
                           verbose=verbose)

# Create randomized search 
random_search =RandomizedSearchCV(wrapped_lstm, 
                                 param_random_search, 
                                 n_iter=10, 
                                 random_state=1, 
                                 cv=tscv, 
                                 verbose=1, 
                                 n_jobs=-1, 
                                #  scoring=scoring_lstm, 
                                #  refit=scoring_lstm, 
                                 return_train_score=True,
                                 error_score='raise')


# Fit randomized search

random_search_fit = random_search.fit(train_X, train_Y)

#saving the randomizedsearch result
pd.DataFrame(random_search_fit.cv_results_).to_csv("random_search_res.csv")

  wrapped_lstm =KerasClassifier(build_fn=create_LSTM,
Fitting 4 folds for each of 4 candidates, totalling 16 fits
93/93 - 4s - loss: 259934.3906 - accuracy: 0.0011 - 4s/epoch - 46ms/step
31/31 - 1s - loss: 0.0314 - accuracy: 0.9968 - 1s/epoch - 35ms/step
93/93 - 0s - loss: 259804.6094 - accuracy: 0.0011 - 388ms/epoch - 4ms/step
93/93 - 4s - loss: 262332.8438 - accuracy: 0.0011 - 4s/epoch - 47ms/step
31/31 - 1s - loss: 102252.6797 - accuracy: 0.0000e+00 - 1s/epoch - 42ms/step
93/93 - 0s - loss: 262221.3438 - accuracy: 0.0011 - 314ms/epoch - 3ms/step
93/93 - 4s - loss: 255675.9219 - accuracy: 0.0011 - 4s/epoch - 48ms/step
31/31 - 4s - loss: 317483.0938 - accuracy: 0.0000e+00 - 4s/epoch - 127ms/step
93/93 - 0s - loss: 255544.9844 - accuracy: 0.0011 - 404ms/epoch - 4ms/step
93/93 - 5s - loss: 252441.0000 - accuracy: 0.0011 - 5s/epoch - 53ms/step
31/31 - 1s - loss: 754271.5625 - accuracy: 0.0000e+00 - 1s/epoch - 41ms/step
93/93 - 0s - loss: 252366.2188 - accuracy: 0.0011 - 394ms/epoch - 4ms

In [77]:
print("best parameters are:", random_search_fit.best_params_)

best parameters are: {'epochs': 1, 'batch_size': 10}


## GridSeachCV

In [80]:
epochs=10 #number of epochs
LSTM_units=4 # number of lstm units
num_samples=1 # number of samples
look_back=1
num_features_2_lstm=train_X.shape[1] # numer of features
dropout_rate=0 # Regularization
recurrent_dropout=0
verbose=0
tscv = 4 #k-fold corss validation paramter
scoring_lstm= 'accuracy' #scoring method

# hyperparameter
param_grid_search={'batch_size':[1,10,100]}

In [81]:
# Gridsearch
grid_search =GridSearchCV(estimator=wrapped_lstm, 
                           param_grid=param_grid_search,  
                           n_jobs=-1,  
                           cv=tscv, 
                           scoring=scoring_lstm, # accuracy
                        #    refit=True, 
                           return_train_score=False)

# # Fit model
grid_search_fit =grid_search.fit(train_X, train_Y, shuffle=False)

#saving the gridsearch result
pd.DataFrame(grid_search_fit.cv_results_).to_csv("grid_search_res.csv")

Epoch 1/10
921/921 - 8s - loss: 255697.1406 - accuracy: 0.0011 - 8s/epoch - 8ms/step
Epoch 2/10
921/921 - 4s - loss: 251638.0781 - accuracy: 0.0011 - 4s/epoch - 4ms/step
Epoch 3/10
921/921 - 4s - loss: 248486.1250 - accuracy: 0.0011 - 4s/epoch - 4ms/step
Epoch 4/10
921/921 - 4s - loss: 245581.3438 - accuracy: 0.0011 - 4s/epoch - 4ms/step
Epoch 5/10
921/921 - 4s - loss: 242794.5156 - accuracy: 0.0011 - 4s/epoch - 4ms/step
Epoch 6/10
921/921 - 4s - loss: 240077.9688 - accuracy: 0.0011 - 4s/epoch - 4ms/step
Epoch 7/10
921/921 - 4s - loss: 237409.7188 - accuracy: 0.0011 - 4s/epoch - 4ms/step
Epoch 8/10
921/921 - 4s - loss: 234779.6250 - accuracy: 0.0011 - 4s/epoch - 4ms/step
Epoch 9/10
921/921 - 4s - loss: 232182.5000 - accuracy: 0.0011 - 4s/epoch - 4ms/step
Epoch 10/10
921/921 - 4s - loss: 229614.8750 - accuracy: 0.0011 - 4s/epoch - 4ms/step
Traceback (most recent call last):
  File "/shared-libs/python3.9/py/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 767, i

In [82]:
print("best parameters are:", grid_search_fit.best_params_)

best parameters are: {'batch_size': 1}


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=fd4d66f4-9d7c-4cde-9a98-160e4af16392' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>