In [53]:
import math
import matplotlib.pyplot as plt
import keras
import pandas as pd
import numpy as np
from keras.applications.densenet import layers
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from keras.layers import *
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split
from keras.callbacks import EarlyStopping
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler
import sns
%matplotlib inline

In [54]:

df = pd.read_csv('../datasets/TSLA.csv')
print(df.head()) #7 columns, including the Date.

         Date       Open       High        Low      Close  Adj Close  \
0  2018-01-16  22.502666  23.000000  22.320000  22.670668  22.670668   
1  2018-01-17  22.698000  23.266666  22.650000  23.143999  23.143999   
2  2018-01-18  23.044666  23.486668  22.916000  22.971333  22.971333   
3  2018-01-19  23.000000  23.372667  22.840000  23.334667  23.334667   
4  2018-01-22  23.293333  23.855333  23.280001  23.437332  23.437332   

      Volume  
0   97114500  
1  106552500  
2   85287000  
3   73324500  
4   93156000  


In [55]:

#Separate dates for future plotting
train_dates = pd.to_datetime(df['Date'])
print(train_dates.tail(15)) #Check last few dates.

#Variables for training
cols  = list(df)[1:6]
#Date and volume columns are not used in training.
print(cols) #['Open', 'High', 'Low', 'Close', 'Adj Close']

#New dataframe with only training data - 5 columns
df_for_training = df[cols].astype(float)

1244   2022-12-22
1245   2022-12-23
1246   2022-12-27
1247   2022-12-28
1248   2022-12-29
1249   2022-12-30
1250   2023-01-03
1251   2023-01-04
1252   2023-01-05
1253   2023-01-06
1254   2023-01-09
1255   2023-01-10
1256   2023-01-11
1257   2023-01-12
1258   2023-01-13
Name: Date, dtype: datetime64[ns]
['Open', 'High', 'Low', 'Close', 'Adj Close']


In [56]:

# df_for_plot=df_for_training.tail(5000)
# df_for_plot.plot.line()

#LSTM uses sigmoid and tanh that are sensitive to magnitude so values need to be normalized
# normalize the dataset
scaler = StandardScaler()
scaler = scaler.fit(df_for_training)
df_for_training_scaled = scaler.transform(df_for_training)


In [57]:

#As required for LSTM networks, we require to reshape an input data into n_samples x timesteps x n_features.
#In this example, the n_features is 5. We will make timesteps = 14 (past days data used for training).

#Empty lists to be populated using formatted training data
trainX = []
trainY = []

n_future = 1   # Number of days we want to look into the future based on the past days.
n_past = 14  # Number of past days we want to use to predict the future.

#Reformat input data into a shape: (n_samples x timesteps x n_features)
#In my example, my df_for_training_scaled has a shape (12823, 5)
#12823 refers to the number of data points and 5 refers to the columns (multi-variables).
for i in range(n_past, len(df_for_training_scaled) - n_future +1):
    trainX.append(df_for_training_scaled[i - n_past:i, 0:df_for_training.shape[1]])
    trainY.append(df_for_training_scaled[i + n_future - 1:i + n_future, 0])

trainX, trainY = np.array(trainX), np.array(trainY)

print('trainX shape == {}.'.format(trainX.shape))
print('trainY shape == {}.'.format(trainY.shape))

trainX shape == (1245, 14, 5).
trainY shape == (1245, 1).


In [58]:
testX = trainX[1000:]
testY = trainY[1000:]
trainX = trainX[:1000]
trainY = trainY[:1000]
testX.shape

(245, 14, 5)

In [59]:
model = Sequential()
model.add(LSTM(64, activation='relu', input_shape=(trainX.shape[1], trainX.shape[2]), return_sequences=True))
model.add(LSTM(32, activation='relu', return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(trainY.shape[1]))

model.compile(optimizer='adam', loss='mse')
model.summary()

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_12 (LSTM)              (None, 14, 64)            17920     
                                                                 
 lstm_13 (LSTM)              (None, 32)                12416     
                                                                 
 dropout_6 (Dropout)         (None, 32)                0         
                                                                 
 dense_6 (Dense)             (None, 1)                 33        
                                                                 
Total params: 30,369
Trainable params: 30,369
Non-trainable params: 0
_________________________________________________________________


In [60]:


# fit the model
history = model.fit(trainX, trainY, epochs=5, batch_size=16, validation_split=0.1, verbose=1)




Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [61]:
predictions = model.predict(testX)



In [62]:
predictions = scaler.inverse_transform(predictions)
predictions

ValueError: non-broadcastable output operand with shape (245,1) doesn't match the broadcast shape (245,5)