# Baseline: LSTM Neural Network on real world data

In [1]:
import tensorflow.keras as keras
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import seaborn as sns

np.random.seed(1)

sns.set_style("darkgrid")

2021-12-22 18:19:47.135939: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-12-22 18:19:47.135993: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [4]:
df = pd.read_csv('prepared_dataset.csv')

## Exploring the dataset

In [5]:
df.shape

(44031, 9)

In [6]:
df.head()

Unnamed: 0,0.0,0,0.1,10055.77,16858.29,166.07,5.1,0.0.1,288088612#0_0
0,1.0,1,0,10056.35,16855.95,166.07,7.51,2.41,288088612#0_0
1,1.0,2,1,7268.33,13378.84,232.86,5.1,0.0,-347355524#0_0
2,2.0,3,0,10057.28,16852.19,166.07,11.39,3.87,288088612#0_0
3,2.0,4,1,7266.77,13377.66,232.86,7.06,1.96,-347355524#0_0
4,2.0,5,2,465.57,9993.49,53.04,5.1,0.0,347355524#0_0


In [None]:
df.describe()

In [None]:
df.count()

In [None]:
df.drop(columns=['Unnamed: 0'], inplace=True)

In [None]:
df.head()

In [None]:
df['Car1'].unique()

## Visualizing the dataset

In [None]:
cols = df.columns

In [None]:
cols = list(cols)

In [None]:
cols

In [None]:
values = df.to_numpy()
i = 1

plt.figure(figsize=(25, 35), dpi=88)
for col in cols:
  plt.subplot(len(cols), 1, i)
  plt.plot(values[:, cols.index(col)])
  i+=1

plt.show()

In [None]:
values

In [None]:
values[:]

In [None]:
values.shape

## Preparing data for LSTM


### Train Test Split

In [None]:
df.shape

In [None]:
0.9*5012

In [None]:
train_dataset = df.iloc[:4510]
test_dataset = df.iloc[4510:]

In [None]:
train_dataset.shape

In [None]:
test_dataset.shape

### Normalize the newly generated datasets

In [None]:
from sklearn.preprocessing import RobustScaler

f_transformer = RobustScaler()

f_transformer = f_transformer.fit(train_dataset.to_numpy())

normalized_train_dataset = f_transformer.transform(train_dataset.to_numpy())
normalized_test_dataset = f_transformer.transform(test_dataset.to_numpy())

In [None]:
normalized_train_dataset.shape

In [None]:
normalized_test_dataset.shape

In [None]:
def create_dataset(data, steps):
  features, labels = [], []
  for i in range(len(data)-steps):
    data_seq = data[i: i+steps, :]
    features.append(data_seq)

    seq_label = data[i+steps, :]
    labels.append(seq_label)

  return np.array(features), np.array(labels)

In [None]:
TIME_STEPS = 15

train_features, train_labels = create_dataset(normalized_train_dataset, TIME_STEPS)
test_features, test_labels = create_dataset(normalized_test_dataset, TIME_STEPS)

In [None]:
print(train_features.shape, train_labels.shape)
print(test_features.shape, test_labels.shape)

## Data should be in the shape of [samples, timesteps, n_features]

## LSTM Model

In [None]:
model = keras.models.Sequential()
model.add(
    keras.layers.LSTM(
          50, 
          input_shape=(train_features.shape[1], train_features.shape[2])
        )
    )
model.add(keras.layers.Dense(9))

model.compile(optimizer='adam', loss='mse')

In [None]:
model.summary()

In [None]:
history = model.fit(train_features, train_labels, batch_size=64, epochs=50, shuffle=False, validation_split=0.1)

In [None]:
plt.plot(history.history['loss'], label='Train')
plt.plot(history.history['val_loss'], label='Validation')
plt.title('Training Loss')
plt.ylabel('Loss')
plt.xlabel('Epochs')
plt.legend()
plt.show()

In [None]:
predictions = model.predict(test_features)

In [None]:
predictions.shape

In [None]:
test_labels.shape

In [None]:
i = 1

plt.figure(figsize=(25, 35), dpi=88)

for col in cols:
  plt.title('Comparison between Actual and Predicted values')

  plt.subplot(len(cols), 1, i)
  plt.plot(test_labels[:, cols.index(col)], color='blue', label='Actual')
  plt.plot(predictions[:, cols.index(col)], color='red', label='Predicted')
  plt.legend()
  i+=1

plt.xlabel('Time')
plt.show()