In [37]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

In [38]:
# fix random seed for reproducibility
tf.random.set_seed(7)

In [54]:
# load the dataset
dataframe = pd.read_csv('dataset_custom.csv', usecols=[2,3,4,5], engine='python')
dataset = dataframe.values
dataset = dataset.astype('float32')
dataset

array([[1.2000e+01, 4.0000e+00, 1.0000e+00, 1.3478e+04],
       [1.2000e+01, 4.0000e+00, 2.0000e+00, 1.2865e+04],
       [1.2000e+01, 4.0000e+00, 3.0000e+00, 1.2577e+04],
       ...,
       [1.0000e+00, 0.0000e+00, 2.2000e+01, 2.0820e+04],
       [1.0000e+00, 0.0000e+00, 2.3000e+01, 2.0415e+04],
       [1.0000e+00, 1.0000e+00, 0.0000e+00, 1.9993e+04]], dtype=float32)

In [55]:
# normalize the dataset
scaler = MinMaxScaler(feature_range=(0, 1))
dataset = scaler.fit_transform(dataset)
dataset

array([[1.        , 0.6666667 , 0.04347826, 0.30037004],
       [1.        , 0.6666667 , 0.08695652, 0.25312167],
       [1.        , 0.6666667 , 0.13043478, 0.23092341],
       ...,
       [0.        , 0.        , 0.95652175, 0.8662711 ],
       [0.        , 0.        , 1.        , 0.8350547 ],
       [0.        , 0.16666667, 0.        , 0.8025282 ]], dtype=float32)

In [56]:
# split into train and test sets
train_size = int(len(dataset) * 0.67)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]
print(len(train), len(test))

81252 40021


In [57]:
# convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=1):
	dataX, dataY = [], []
	for i in range(len(dataset)-look_back-1):
		a = dataset[i:(i+look_back), 0]
		dataX.append(a)
		dataY.append(dataset[i + look_back, 0])
	return np.array(dataX), np.array(dataY)

In [75]:
# reshape into X=t and Y=t+1
look_back = 1
trainX, trainY = train[:, :3], train[:, 3:4]
testX, testY = test[:, :3], test[:, 3:4]

In [61]:
# reshape input to be [samples, time steps, features]
num_samples = 1000
time_steps = 24
features = 3

X = np.zeros((num_samples, time_steps, features))

for i in range(num_samples):
    X[i] = trainX[i*time_steps:(i+1)*time_steps, :]
trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1]))

array([[[1.]],

       [[1.]],

       [[1.]],

       ...,

       [[0.]],

       [[0.]],

       [[0.]]], dtype=float32)

In [31]:
# create and fit the LSTM network
model = Sequential()
model.add(LSTM(4, input_shape=(1, look_back)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(trainX, trainY, epochs=100, batch_size=1, verbose=2)

Epoch 1/100


  super().__init__(**kwargs)


81250/81250 - 59s - 731us/step - loss: 0.0033
Epoch 2/100
81250/81250 - 51s - 625us/step - loss: 0.0025
Epoch 3/100
81250/81250 - 50s - 610us/step - loss: 0.0025
Epoch 4/100
81250/81250 - 49s - 605us/step - loss: 0.0025
Epoch 5/100
81250/81250 - 50s - 611us/step - loss: 0.0025
Epoch 6/100
81250/81250 - 50s - 621us/step - loss: 0.0025
Epoch 7/100
81250/81250 - 49s - 599us/step - loss: 0.0025
Epoch 8/100
81250/81250 - 50s - 612us/step - loss: 0.0025
Epoch 9/100
81250/81250 - 51s - 624us/step - loss: 0.0025
Epoch 10/100
81250/81250 - 50s - 616us/step - loss: 0.0025
Epoch 11/100
81250/81250 - 51s - 626us/step - loss: 0.0025
Epoch 12/100
81250/81250 - 50s - 614us/step - loss: 0.0025
Epoch 13/100
81250/81250 - 57s - 696us/step - loss: 0.0025
Epoch 14/100
81250/81250 - 44s - 538us/step - loss: 0.0025
Epoch 15/100
81250/81250 - 44s - 543us/step - loss: 0.0025
Epoch 16/100
81250/81250 - 44s - 544us/step - loss: 0.0025
Epoch 17/100
81250/81250 - 44s - 537us/step - loss: 0.0025
Epoch 18/100
81250

<keras.src.callbacks.history.History at 0x147652e10>

In [47]:
# make predictions
trainPredict = model.predict(trainX)
testPredict = model.predict(testX)
# invert predictions
print(trainPredict)
trainPredict = scaler.inverse_transform(trainPredict)
print(trainPredict)
trainY = scaler.inverse_transform([trainY])
testPredict = scaler.inverse_transform(testPredict)
testY = scaler.inverse_transform([testY])
# calculate root mean squared error
trainScore = np.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = np.sqrt(mean_squared_error(testY[0], testPredict[:,0]))
print('Test Score: %.2f RMSE' % (testScore))

[1m2540/2540[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 561us/step
[1m1251/1251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 494us/step
[[0.88958156]
 [0.74992406]
 [0.6845586 ]
 ...
 [0.40091074]
 [0.482745  ]
 [0.62294036]]


ValueError: non-broadcastable output operand with shape (81250,1) doesn't match the broadcast shape (81250,4)

In [34]:
model.save('my_model.keras')  # creates a HDF5 file 'my_model.h5'