In [None]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

from keras.models import Sequential
from keras.layers import LSTM, Dense
from keras.losses import MeanSquaredError
from keras.optimizers import Adam

In [None]:
DATA_PATH = 'data_cars/'
all_files = os.listdir(DATA_PATH)

all_dataframes = []
for index, file in enumerate(all_files):
    print(f"Reading file: {file}")
    file_name = file.split('.')[0]
    df = pd.read_csv(DATA_PATH + file, sep=';')

    df['date'] = pd.to_datetime(df[file_name], format='%Y-%m-%d %H:%M')
    df = df.drop(columns=[file_name])

    df = df.set_index('date')
    df.columns = [f"{file_name}_{col}" for col in df.columns if col != 'date']
    all_dataframes.append(df)
    print(f"Finished reading file: {file}, shape = {df.shape}")

combined_df = pd.concat(all_dataframes, axis=1)
combined_df.fillna(method='ffill', inplace=True)
combined_df['hour'] = combined_df.index.hour
combined_df['day_of_week'] = combined_df.index.dayofweek

data = np.array(combined_df, dtype=float)[:, :-2]

In [None]:
from keras import layers

def getMostCorrelated(sensor, no_extraSensors, data):
    corr = np.corrcoef(data.T)
    best = np.argsort(corr[sensor])[-no_extraSensors:]
    return data[:, best]

def getScores(model, x, y, scaler):
    predicts = model.predict(x)
    scaled_predict, y_scaled = scaler.inverse_transform(predicts), scaler.inverse_transform(y)
    rmse =  np.sqrt(mean_squared_error(y_scaled, scaled_predict))
    mae = mean_absolute_error(y_scaled, scaled_predict)
    return rmse, mae

def getScoresConv(model, x, y, scaler):
    predicts = model.predict(x)
    scaled_predict, y_scaled = scaler.inverse_transform(predicts.reshape(predicts.shape[0], predicts.shape[1])), scaler.inverse_transform(y)
    rmse =  np.sqrt(mean_squared_error(y_scaled[:, -1], scaled_predict[:, -1]))
    mae = mean_absolute_error(y_scaled[:, -1], scaled_predict[:, -1])
    return rmse, mae


def splitSequence(seq, n_steps):

    #Declare X and y as empty list
    X = []
    y = []

    for i in range(len(seq)):
        #get the last index
        lastIndex = i + n_steps

        #if lastIndex is greater than length of sequence then break
        if lastIndex > len(seq) - 1:
            break

        # Create input and output sequence
        # Last 2 columns are time of day and day of week
        seq_X, seq_y = seq[i:lastIndex], seq[lastIndex]

        #append seq_X, seq_y in X and y list
        X.append(seq_X)
        y.append(seq_y)
        #Convert X and y into numpy array
    X = np.array(X)
    y = np.array(y)

    return X,y

def shiftSequence(seq, n_steps):

    #Declare X and y as empty list
    X = []
    y = []

    for i in range(len(seq)):
        #get the last index
        lastIndex = i + n_steps

        #if lastIndex is greater than length of sequence then break
        if lastIndex > len(seq) - 1:
            break

        # Create input and output sequence
        # Last 2 columns are time of day and day of week
        seq_X, seq_y = seq[i:lastIndex], seq[i+1:lastIndex+1]

        #append seq_X, seq_y in X and y list
        X.append(seq_X)
        y.append(seq_y)
        #Convert X and y into numpy array
    X = np.array(X)
    y = np.array(y)

    return X,y

#Following structure layed out in github.com/locuslab/TCN
from keras.models import Sequential
def createTemporalConvNetwork(n_layers, n_sensors, look_back, n_outputs, kernel_size=2, dropout=0.2):
    modelTCN = Sequential()
    modelTCN.add(layers.InputLayer((look_back, n_sensors)))
    for i in range(n_layers-1):
        modelTCN.add(layers.Conv1D(n_outputs[i], kernel_size=kernel_size, padding='causal', activation='relu', dilation_rate=2**i))
        modelTCN.add(layers.Dropout(dropout))
        modelTCN.add(layers.Conv1D(n_outputs[i], kernel_size=kernel_size, padding='causal', activation='relu', dilation_rate=2**i))
        modelTCN.add(layers.Dropout(dropout))
    modelTCN.add(layers.Conv1D(n_outputs[-1], kernel_size=kernel_size, padding='causal', activation='relu', dilation_rate=2**(n_layers-1)))
    return modelTCN

#Following structure layed out in github.com/locuslab/TCN
def createLSTMNetwork(n_sensors, look_back, n_outputs):
    modelLSTM = Sequential()
    modelLSTM.add(layers.InputLayer((look_back, n_sensors)))
    modelLSTM.add(layers.LSTM(128, return_sequences=True))
    modelLSTM.add(layers.LSTM(128))
    modelLSTM.add(layers.Dense(n_outputs))
    return modelLSTM

In [None]:
def experimentTCN(diff_inputs, samples, averaging, look_back):
    num_of_steps = data.shape[0] - look_back
    train_size = 0.6
    val_size = 0.15
    num_train = int(num_of_steps * train_size)
    num_val = int(num_of_steps * val_size)

    RMSE_scores = np.zeros((len(samples), len(diff_inputs)))
    MAE_scores = np.zeros((len(samples), len(diff_inputs)))
    for sensId, sensor in enumerate(samples):
        
        _, y = shiftSequence(data[:, sensor], look_back)
        y_scaler = MinMaxScaler()
        y = y_scaler.fit_transform(y)
        y_train, y_val, y_test = y[:num_train], y[num_train:num_train+num_val], y[num_train+num_val:]

        for idx, inputs in enumerate(diff_inputs):
            correlated_data = getMostCorrelated(sensor, inputs, data)
            scaler = MinMaxScaler()
            correlated_data = scaler.fit_transform(correlated_data)
            x, _ = shiftSequence(correlated_data, look_back)
            x_train, x_val, x_test = x[:num_train], x[num_train:num_train+num_val], x[num_train+num_val:]
            
            scores_rmse = np.zeros(averaging)
            scores_mae = np.zeros(averaging)
            for av in range(averaging):
                outputs = [64, 128, 128, 128, 64, 1]
                model = createTemporalConvNetwork(6, inputs, look_back, outputs, kernel_size=3)
                model.compile(loss=MeanSquaredError(), optimizer=Adam(learning_rate=0.001), metrics=[keras.metrics.RootMeanSquaredError()],)
                cb = [keras.callbacks.EarlyStopping(patience=5)]
                model.fit(x=x_train, y=y_train, validation_data=(x_val, y_val), batch_size=32, epochs=500, callbacks=cb)
                rmse, mae = getScoresConv(model, x_test, y_test, y_scaler)
                print(f'{rmse:.2f} RMSE')
                scores_rmse[av] = rmse
                scores_mae[av] = mae
            RMSE_scores[sensId, idx] = np.average(scores_rmse)
            MAE_scores[sensId, idx] = np.average(scores_mae)
    return RMSE_scores, MAE_scores


In [None]:
def experimentLSTM(diff_inputs, samples, averaging, look_back):
    num_of_steps = data.shape[0] - look_back
    train_size = 0.6
    val_size = 0.15
    num_train = int(num_of_steps * train_size)
    num_val = int(num_of_steps * val_size)

    RMSE_scores = np.zeros((len(samples), len(diff_inputs)))
    MAE_scores = np.zeros((len(samples), len(diff_inputs)))
    for sensId, sensor in enumerate(samples):
        
        _, y = splitSequence(data[:, sensor], look_back)
        y_scaler = StandardScaler()
        y = y_scaler.fit_transform(y.reshape(-1, 1))
        y_train, y_val, y_test = y[:num_train], y[num_train:num_train+num_val], y[num_train+num_val:]

        for idx, inputs in enumerate(diff_inputs):
            correlated_data = getMostCorrelated(sensor, inputs, data)
            scaler = StandardScaler()
            correlated_data = scaler.fit_transform(correlated_data)
            x, _ = splitSequence(correlated_data, look_back)
            x_train, x_val, x_test = x[:num_train], x[num_train:num_train+num_val], x[num_train+num_val:]
            
            scores_rmse = np.zeros(averaging)
            scores_mae = np.zeros(averaging)
            for av in range(averaging):
                model = createLSTMNetwork(inputs, look_back, 1)
                model.compile(loss=MeanSquaredError(), optimizer=Adam(learning_rate=0.001), metrics=[keras.metrics.RootMeanSquaredError()],)
                cb = [keras.callbacks.EarlyStopping(patience=5)]
                model.fit(x=x_train, y=y_train, validation_data=(x_val, y_val), batch_size=32, epochs=500, callbacks=cb)
                rmse, mae = getScores(model, x_test, y_test, y_scaler)
                print(f'{rmse:.2f} RMSE')
                scores_rmse[av] = rmse
                scores_mae[av] = mae
            RMSE_scores[sensId, idx] = np.average(scores_rmse)
            MAE_scores[sensId, idx] = np.average(scores_mae)
    return RMSE_scores, MAE_scores


In [None]:
outputs = [0, 23, 78]
inputs = [14, 16, 18, 20]
rmse_tcn, mae_tcn = experimentTCN(inputs, outputs, 10, 96)
rmse_lstm, mae_lstm = experimentLSTM(inputs, outputs, 10, 96)

In [None]:
rmse_tcn, mae_tcn, rmse_lstm, mae_lstm

In [None]:
outputs = [0, 23, 78]
inputs = [1, 2, 4, 6, 8, 10, 12]


In [None]:
look_back = 96
num_of_steps = data.shape[0] - look_back
train_size = 0.6
val_size = 0.15
num_train = int(num_of_steps * train_size)
num_val = int(num_of_steps * val_size)

sensor = 0
_, y = splitSequence(data[:, sensor], look_back)
y_scaler = StandardScaler()
y = y_scaler.fit_transform(y.reshape(-1, 1))
y_train, y_val, y_test = y[:num_train], y[num_train:num_train+num_val], y[num_train+num_val:]

correlated_data = getMostCorrelated(sensor, 1, data)
scaler = StandardScaler()
correlated_data = scaler.fit_transform(correlated_data)
x, _ = splitSequence(correlated_data, look_back)
x_train, x_val, x_test = x[:num_train], x[num_train:num_train+num_val], x[num_train+num_val:]

In [None]:
def getScores(model, x, y, scaler):
    predicts = model.predict(x)
    scaled_predict, y_scaled = scaler.inverse_transform(predicts), scaler.inverse_transform(y)
    rmse =  np.sqrt(mean_squared_error(y_scaled, scaled_predict))
    mae = mean_absolute_error(y_scaled, scaled_predict)
    return rmse, mae

def getScoresConv(model, x, y, scaler):
    predicts = model.predict(x)
    scaled_predict, y_scaled = scaler.inverse_transform(predicts[:, -1]), scaler.inverse_transform(y[:, -1])
    rmse =  np.sqrt(mean_squared_error(y_scaled, scaled_predict))
    mae = mean_absolute_error(y_scaled, scaled_predict)
    return rmse, mae


In [None]:
def experimentTCN_output(outputs, averaging, look_back):
        num_of_steps = data.shape[0] - look_back
        train_size = 0.6
        val_size = 0.15
        num_train = int(num_of_steps * train_size)
        num_val = int(num_of_steps * val_size)

        RMSE_scores = np.zeros(len(outputs))
        MAE_scores = np.zeros(len(outputs))

        x, _ = shiftSequence(data, look_back)
        x_train, x_val, x_test = x[:num_train], x[num_train:num_train+num_val], x[num_train+num_val:]
    

        for idx, output in enumerate(outputs):
                
                y_scaler = MinMaxScaler()
                y = y_scaler.fit_transform(data[:, :output])
                _, y = shiftSequence(y, look_back)
                y_train, y_val, y_test = y[:num_train], y[num_train:num_train+num_val], y[num_train+num_val:]

                scores_rmse = np.zeros(averaging)
                scores_mae = np.zeros(averaging)
                for av in range(averaging):
                        outputs = [64, 128, 128, 128, 64, output]
                        model = createTemporalConvNetwork(6, data.shape[1], look_back, outputs, kernel_size=3)
                        model.compile(loss=MeanSquaredError(), optimizer=Adam(learning_rate=0.001), metrics=[keras.metrics.RootMeanSquaredError()],)
                        cb = [keras.callbacks.EarlyStopping(patience=10)]
                        model.fit(x=x_train, y=y_train, validation_data=(x_val, y_val), batch_size=32, epochs=500, callbacks=cb)
                        rmse, mae = getScoresConv(model, x_test, y_test, y_scaler)
                        print(f'{rmse:.2f} RMSE')
                        scores_rmse[av] = rmse
                        scores_mae[av] = mae
                RMSE_scores[idx] = np.average(scores_rmse)
                MAE_scores[idx] = np.average(scores_mae)
        return RMSE_scores, MAE_scores

In [None]:
outputs = [1]
rmse_tcn, mae_tcn = experimentTCN_output(outputs, 10, 96)

In [None]:
np.average(data)