# DEEP LEARNING TIME: simple LSTM 

In [4]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dense, Dropout
from sklearn.preprocessing import MinMaxScaler
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import GridSearchCV

In [5]:
df = pd.read_csv("databases/lytton.csv", index_col='date', parse_dates=['date'])

In [6]:
df

Unnamed: 0_level_0,tempmax.1,tempmin.1,temp.1,humidity.1,precip.1,precipcover.1,snowdepth.1,windgust.1,windspeed.1,sealevelpressure.1,avg_temp_global
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1971-01-01,18.0,3.7,9.8,76.8,0.00,0.00,0.0,18.4,13.0,1020.1,-0.186
1971-01-02,18.0,3.7,9.8,76.8,0.00,0.00,0.0,18.4,13.0,1020.1,-0.186
1971-01-03,18.0,3.7,9.8,76.8,0.00,0.00,0.0,18.4,13.0,1020.1,-0.186
1971-01-04,18.0,3.7,9.8,76.8,0.00,0.00,0.0,18.4,13.0,1020.1,-0.186
1971-01-05,18.0,3.7,9.8,76.8,0.00,0.00,0.0,18.4,13.0,1020.1,-0.186
...,...,...,...,...,...,...,...,...,...,...,...
2021-12-27,-20.0,-25.0,-22.1,62.3,0.00,0.00,27.0,33.5,19.5,1021.5,0.850
2021-12-28,-16.0,-22.0,-18.5,69.8,0.01,4.17,27.0,35.3,18.0,1019.3,0.850
2021-12-29,-15.0,-19.0,-17.2,67.9,0.00,0.00,27.0,35.3,22.0,1020.0,0.850
2021-12-30,-10.0,-16.0,-13.3,70.1,0.01,4.17,27.0,37.1,18.0,1012.7,0.850


In [5]:
test_split=round(len(df)*0.20)
df_for_training=df[:16976]
df_for_testing=df[16976:]
print(df_for_training.shape)
print(df_for_testing.shape)


(16976, 11)
(1652, 11)


#### to avoid prediction errors, let’s scale the data with minmaxscaler

In [6]:
scaler = MinMaxScaler(feature_range=(0,1))
df_for_training_scaled = scaler.fit_transform(df_for_training)
df_for_testing_scaled=scaler.transform(df_for_testing)
df_for_training_scaled

array([[0.60180995, 0.58627087, 0.60278746, ..., 0.13727561, 0.58108108,
        0.05298651],
       [0.60180995, 0.58627087, 0.60278746, ..., 0.13727561, 0.58108108,
        0.05298651],
       [0.60180995, 0.58627087, 0.60278746, ..., 0.13727561, 0.58108108,
        0.05298651],
       ...,
       [0.66214178, 0.79591837, 0.75783972, ..., 0.40760296, 0.56891892,
        0.88439306],
       [0.75263952, 0.70315399, 0.76655052, ..., 0.25765576, 0.63243243,
        0.88439306],
       [0.81447964, 0.70315399, 0.80139373, ..., 0.17317846, 0.61216216,
        0.88439306]])

#### split the data into train and test, reshape for NN

In [16]:
def createXY(dataset,n_past):
    dataX = []
    dataY = []
    for i in range(n_past, len(dataset)):
            dataX.append(dataset[i - n_past:i, 0:dataset.shape[1]])
            dataY.append(dataset[i,0])
    return np.array(dataX),np.array(dataY)

trainX,trainY=createXY(df_for_training_scaled,1000)
testX,testY=createXY(df_for_testing_scaled,1000)

In [17]:
print("trainX Shape-- ",trainX.shape)
print("trainY Shape-- ",trainY.shape)

trainX Shape--  (15976, 1000, 11)
trainY Shape--  (15976,)


In [18]:
print("testX Shape-- ",testX.shape)
print("testY Shape-- ",testY.shape)


testX Shape--  (652, 1000, 11)
testY Shape--  (652,)


In [19]:
print("trainX[0]-- \n",trainX[0])
print("trainY[0]-- ",trainY[0])

trainX[0]-- 
 [[0.60180995 0.58627087 0.60278746 ... 0.13727561 0.58108108 0.05298651]
 [0.60180995 0.58627087 0.60278746 ... 0.13727561 0.58108108 0.05298651]
 [0.60180995 0.58627087 0.60278746 ... 0.13727561 0.58108108 0.05298651]
 ...
 [0.62292609 0.6419295  0.65853659 ... 0.25131996 0.38648649 0.29190751]
 [0.58974359 0.68274583 0.65853659 ... 0.36853221 0.55675676 0.29190751]
 [0.59728507 0.76623377 0.69686411 ... 0.34952482 0.61351351 0.29190751]]
trainY[0]--  0.7254901960784313


### create LSTM, then train the model
#### - use gridsearchcv to make hyperparameter adjustments to find the basic model

In [28]:
def build_model(optimizer):
    grid_model = Sequential()
    grid_model.add(LSTM(50,return_sequences=True,input_shape=(1000,11)))
    grid_model.add(LSTM(50))
    grid_model.add(Dropout(0.2))
    grid_model.add(Dense(1))

    grid_model.compile(loss = 'mse',optimizer = optimizer)
    return grid_model

grid_model = KerasRegressor(build_fn=build_model,verbose=1,validation_data=(testX,testY))
    

parameters = {'batch_size' : [16,20],
              'epochs' : [8],
              'optimizer' : ['adam','Adadelta'] }

grid_search  = GridSearchCV(estimator = grid_model,
                            param_grid = parameters,
                            cv = 2)

  grid_model = KerasRegressor(build_fn=build_model,verbose=1,validation_data=(testX,testY))


In [29]:
grid_search = grid_search.fit(trainX,trainY)


Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


### best parameters found:

In [30]:
grid_search.best_params_

{'batch_size': 16, 'epochs': 8, 'optimizer': 'adam'}

### save as model

In [31]:
my_model=grid_search.best_estimator_.model

In [32]:
prediction=my_model.predict(testX)
print("prediction\n", prediction)
print("\nPrediction Shape-",prediction.shape)

prediction
 [[0.53306687]
 [0.56026983]
 [0.5725881 ]
 [0.5073286 ]
 [0.47388136]
 [0.4752742 ]
 [0.48759896]
 [0.5035323 ]
 [0.51312345]
 [0.50672495]
 [0.49422827]
 [0.45985073]
 [0.45178947]
 [0.44010532]
 [0.45885506]
 [0.44062746]
 [0.42546743]
 [0.48388258]
 [0.51191735]
 [0.54677767]
 [0.60336447]
 [0.6419878 ]
 [0.6173706 ]
 [0.5225154 ]
 [0.5634895 ]
 [0.60098374]
 [0.61451507]
 [0.6250115 ]
 [0.60444766]
 [0.5697895 ]
 [0.6504686 ]
 [0.5968895 ]
 [0.6083263 ]
 [0.60688853]
 [0.5759181 ]
 [0.6077447 ]
 [0.60924864]
 [0.5451815 ]
 [0.5584999 ]
 [0.5649477 ]
 [0.6032754 ]
 [0.5570402 ]
 [0.6126048 ]
 [0.6214475 ]
 [0.6218126 ]
 [0.58364236]
 [0.6413205 ]
 [0.6939119 ]
 [0.589564  ]
 [0.7188803 ]
 [0.7436496 ]
 [0.76147753]
 [0.73226   ]
 [0.71603936]
 [0.6134053 ]
 [0.649323  ]
 [0.70165896]
 [0.6752071 ]
 [0.63699293]
 [0.64659065]
 [0.68536067]
 [0.70349455]
 [0.6614127 ]
 [0.5879668 ]
 [0.6032042 ]
 [0.6467566 ]
 [0.6799094 ]
 [0.63823783]
 [0.6841009 ]
 [0.7385176 ]
 [0.7687

In [33]:
scaler.inverse_transform(prediction)

ValueError: non-broadcastable output operand with shape (652,1) doesn't match the broadcast shape (652,11)

In [35]:
prediction_copies_array = np.repeat(prediction,11, axis=-1)

In [36]:
prediction_copies_array.shape


(652, 11)

In [37]:
pred=scaler.inverse_transform(np.reshape(prediction_copies_array,(len(prediction),11)))[:,0]

In [38]:
original_copies_array = np.repeat(testY,11, axis=-1)
original=scaler.inverse_transform(np.reshape(original_copies_array,(len(testY),11)))[:,0]

In [39]:
print("Pred Values-- " ,pred)
print("\nOriginal Values-- " ,original)

Pred Values--  [ 13.442334    15.245891    16.06259     11.7358885    9.518334
   9.610681    10.427811    11.484191    12.120085    11.695865
  10.867335     8.588103     8.053641     7.2789826    8.522091
   7.3136       6.3084908   10.181416    12.040121    14.351359
  18.103064    20.66379     19.03167     12.742772    15.459354
  17.94522     18.842348    19.538261    18.17488     15.877046
  21.226067    17.673773    18.432034    18.33671     16.28337
  18.393473    18.493183    14.245535    15.128542    15.556034
  18.09716     15.031767    18.715698    19.30197     19.326174
  16.795488    20.61955     24.10636     17.188095    25.761763
  27.403969    28.58596     26.648836    25.573408    18.76877
  21.150114    24.61999     22.866228    20.33263     20.96896
  23.539412    24.741688    21.951662    17.0822      18.092438
  20.979961    23.177994    20.415167    23.45589     27.063715
  29.06794     29.07288     18.058914    19.801569    22.039661
  19.90321     21.675093    

In [40]:
df_1000_days_past=df.iloc[-1000:,:]
df_1000_days_past.tail()

Unnamed: 0_level_0,tempmax.1,tempmin.1,temp.1,humidity.1,precip.1,precipcover.1,snowdepth.1,windgust.1,windspeed.1,sealevelpressure.1,avg_temp_global
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2021-12-27,-20.0,-25.0,-22.1,62.3,0.0,0.0,27.0,33.5,19.5,1021.5,0.85
2021-12-28,-16.0,-22.0,-18.5,69.8,0.01,4.17,27.0,35.3,18.0,1019.3,0.85
2021-12-29,-15.0,-19.0,-17.2,67.9,0.0,0.0,27.0,35.3,22.0,1020.0,0.85
2021-12-30,-10.0,-16.0,-13.3,70.1,0.01,4.17,27.0,37.1,18.0,1012.7,0.85
2022-01-01,-2.0,-19.0,-11.9,69.1,0.05,8.33,26.0,82.0,62.0,1020.8,


### dataframe with actual and predicted temperatures

In [47]:
comparison = pd.DataFrame({'original': original, 'pred': pred}, columns=['original', 'pred'])

In [61]:
original_df = pd.DataFrame(original)

In [72]:
comparison.iloc[450:500,:]

Unnamed: 0,original,pred
450,23.1,25.554848
451,25.1,23.69171
452,23.1,26.792322
453,25.0,24.569805
454,28.1,28.435606
455,29.1,28.308559
456,24.1,29.12656
457,32.9,24.69746
458,35.2,34.224075
459,34.1,31.355057
