In [None]:
import pandas as pd
import numpy as np
import os
import typing
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras

df = pd.read_csv('data/K120.csv', delimiter=';').to_numpy()
#remove first row of timestamps
data = np.array(df[:, 1:], dtype=float)
#transform nans to 0 (bandaid solution)
data = np.nan_to_num(data, 0)

In [None]:
num_of_steps = data.shape[0]
train_size = 0.5
val_size = 0.2

num_train = int(num_of_steps * train_size)
num_val = int(num_of_steps * val_size)

train_set = data[:num_train]
mean, std = np.nanmean(train_set, axis=0), np.nanstd(train_set, axis=0)

#Will give normalized data as input
# train_set = (train_set - mean) / std
# val_set = (data[num_train:num_train + num_val] - mean) / std
# test_set = (data[num_train + num_val:] - mean) / std

#raw data as input
train_set = train_set
val_set = data[num_train:num_train + num_val]
test_set = data[num_train + num_val:]

print(train_set.shape)
print(val_set.shape)
print(test_set.shape)

In [None]:
from keras.utils import timeseries_dataset_from_array

#creates tf.data.Dataset objects which contain tuples of (input, label)
#The function timeseries_dataset_from_array transforms the data to a sliding window format and batches it for improved performance
#inspired by(directly taken from) https://keras.io/examples/timeseries/timeseries_traffic_forecasting/

batch_size = 32
input_sequence_length = 8
forecast_horizon = 1
multi_horizon = False

def create_tf_dataset(
    data_array: np.ndarray,
    input_sequence_length: int,
    forecast_horizon: int,
    batch_size: int = 128,
    shuffle=True,
    multi_horizon=False,
):
    inputs = timeseries_dataset_from_array(
        data_array[:-forecast_horizon],
        None,
        sequence_length=input_sequence_length,
        shuffle=False,
        batch_size=batch_size,
    )

    target_offset = (
        input_sequence_length
        if multi_horizon
        else input_sequence_length + forecast_horizon - 1
    )
    target_seq_length = forecast_horizon if multi_horizon else 1
    targets = timeseries_dataset_from_array(
        data_array[target_offset:],
        None,
        sequence_length=target_seq_length,
        shuffle=False,
        batch_size=batch_size,
    )

    dataset = tf.data.Dataset.zip((inputs, targets))
    if shuffle:
        dataset = dataset.shuffle(100)

    return dataset.prefetch(16).cache()

train_dataset, val_dataset = (
    create_tf_dataset(data_array, input_sequence_length, forecast_horizon, batch_size)
    for data_array in [train_set, val_set]
)

test_dataset = create_tf_dataset(
    test_set,
    input_sequence_length,
    forecast_horizon,
    batch_size=test_set.shape[0],
    shuffle=False,
    multi_horizon=multi_horizon,
)


In [None]:
list(train_dataset.as_numpy_iterator())[0][0][1][4]

In [None]:
def splitSequence(seq, n_steps):
    X = []
    y = []
    for i in range(len(seq)):
        #get the last index
        lastIndex = i + n_steps

        #if lastIndex is greater than length of sequence then break
        if lastIndex > len(seq) - 1:
            break

        #Create input and output sequence
        seq_X, seq_y = seq[i:lastIndex], seq[lastIndex]

        #append seq_X, seq_y in X and y list
        X.append(seq_X)
        y.append(seq_y)
        pass    #Convert X and y into numpy array
    X = np.array(X)
    y = np.array(y)

    return X,y

x_train, y_train = splitSequence(train_set, input_sequence_length)
x_val, y_val = splitSequence(val_set, input_sequence_length)
x_test, y_test = splitSequence(test_set, input_sequence_length)

In [None]:
from keras.losses import *
from keras.layers import *
from keras.optimizers import Adam

input_dim = data.shape[1]
units = 64
output_size = data.shape[1]

model = keras.models.Sequential()
model.add(InputLayer((input_sequence_length, input_dim)))
#return sequences is necessary for sequential LSTM layers
model.add(LSTM(units, return_sequences=True))
model.add(LSTM(units))
model.add(Dense(output_size))
model.summary()

model.compile(
    loss=MeanSquaredError(),
    optimizer=Adam(learning_rate=0.01),
    metrics=[keras.metrics.RootMeanSquaredError()],
)

model.fit(
    x=x_train,
    y=y_train,
    validation_data=(x_val, y_val),
    epochs=500,
    batch_size=32,
    #makes the training stop early if it notices no improvements on the validation set 10 times in a row, to prevent overfitting
    callbacks=[keras.callbacks.EarlyStopping(patience=10)],
)

In [None]:
results = model.predict(test_dataset)
print(np.min(results))
#takes labels from the test_dataset
y = np.concatenate([y for x, y in test_dataset], axis=0)
#since each result is an array of each predicted sensor you can input 0 through 9 to look at the graph
plt.plot(results[:, 7], label='predicted')
#if you are wondering why it is like this look at y.shape
plt.plot(y[:,0,7], label='should')
plt.legend()
plt.show()

In [None]:
model.evaluate(x_test, y_test)