In [5]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import mean_squared_error

from keras.models import Sequential
from keras.layers import LSTM, Dense
from keras.losses import MeanSquaredError
from keras.optimizers import Adam

In [6]:
DATA_PATH = 'data_cars/'
all_files = os.listdir(DATA_PATH)

all_dataframes = []
for index, file in enumerate(all_files):
    print(f"Reading file: {file}")
    file_name = file.split('.')[0]
    df = pd.read_csv(DATA_PATH + file, sep=';')

    df['date'] = pd.to_datetime(df[file_name], format='%Y-%m-%d %H:%M')
    df = df.drop(columns=[file_name])

    df = df.set_index('date')
    df.columns = [f"{file_name}_{col}" for col in df.columns if col != 'date']
    all_dataframes.append(df)
    print(f"Finished reading file: {file}, shape = {df.shape}")

combined_df = pd.concat(all_dataframes, axis=1)
combined_df.fillna(method='ffill', inplace=True)
combined_df['hour'] = combined_df.index.hour
combined_df['day_of_week'] = combined_df.index.dayofweek

data = np.array(combined_df, dtype=float)[:, :-2]

Reading file: K120.csv
Finished reading file: K120.csv, shape = (2880, 7)
Reading file: K134.csv
Finished reading file: K134.csv, shape = (2880, 7)
Reading file: K140.csv
Finished reading file: K140.csv, shape = (2880, 5)
Reading file: K159.csv
Finished reading file: K159.csv, shape = (2880, 11)
Reading file: K405.csv
Finished reading file: K405.csv, shape = (2880, 19)
Reading file: K406.csv
Finished reading file: K406.csv, shape = (2880, 8)
Reading file: K701.csv
Finished reading file: K701.csv, shape = (2880, 7)
Reading file: K702.csv
Finished reading file: K702.csv, shape = (2880, 8)
Reading file: K703.csv
Finished reading file: K703.csv, shape = (2880, 10)
Reading file: K709.csv
Finished reading file: K709.csv, shape = (2880, 17)
Reading file: K711.csv
Finished reading file: K711.csv, shape = (2880, 31)


In [51]:
from keras import layers

def getMostCorrelated(sensor, no_extraSensors, data):
    corr = np.corrcoef(data.T)
    best = np.argsort(corr[sensor])[-no_extraSensors:]
    return data[:, best]

def getScore(model, x, y, scaler):
    predicts = model.predict(x)
    scaled_predict, y_scaled = scaler.inverse_transform(predicts), scaler.inverse_transform(y)
    return np.sqrt(mean_squared_error(y_scaled, scaled_predict))

def splitSequence(seq, n_steps):

    #Declare X and y as empty list
    X = []
    y = []

    for i in range(len(seq)):
        #get the last index
        lastIndex = i + n_steps

        #if lastIndex is greater than length of sequence then break
        if lastIndex > len(seq) - 1:
            break

        # Create input and output sequence
        # Last 2 columns are time of day and day of week
        seq_X, seq_y = seq[i:lastIndex], seq[lastIndex]

        #append seq_X, seq_y in X and y list
        X.append(seq_X)
        y.append(seq_y)
        #Convert X and y into numpy array
    X = np.array(X)
    y = np.array(y)

    return X,y

#Following structure layed out in github.com/locuslab/TCN
def createTemporalConvNetwork(n_layers, n_sensors, look_back, n_outputs, kernel_size=2, dropout=0.2):
    modelTCN = keras.models.Sequential()
    modelTCN.add(layers.InputLayer((look_back, n_sensors)))
    for i in range(n_layers):
        modelTCN.add(layers.Conv1D(look_back, kernel_size=kernel_size, padding='causal', activation='relu', dilation_rate=2**i))
        modelTCN.add(layers.Dropout(dropout))
        modelTCN.add(layers.Conv1D(look_back, kernel_size=kernel_size, padding='causal', activation='relu', dilation_rate=2**i))
        modelTCN.add(layers.Dropout(dropout))
    modelTCN.add(layers.Flatten())
    modelTCN.add(layers.Dense(n_outputs))
    return modelTCN

def experiment(diff_inputs, samples, averaging, look_back):
    num_of_steps = data.shape[0] - look_back
    train_size = 0.6
    val_size = 0.15
    num_train = int(num_of_steps * train_size)
    num_val = int(num_of_steps * val_size)

    sample_scores = np.zeros((len(samples), len(diff_inputs)))
    for sensId, sensor in enumerate(samples):
        
        _, y = splitSequence(data[:, sensor], look_back)
        y_scaler = StandardScaler()
        y = y_scaler.fit_transform(y.reshape(-1, 1))
        y_train, y_val, y_test = y[:num_train], y[num_train:num_train+num_val], y[num_train+num_val:]

        for idx, inputs in enumerate(diff_inputs):
            correlated_data = getMostCorrelated(sensor, inputs, data)
            scaler = StandardScaler()
            correlated_data = scaler.fit_transform(correlated_data)
            x, _ = splitSequence(correlated_data, look_back)
            x_train, x_val, x_test = x[:num_train], x[num_train:num_train+num_val], x[num_train+num_val:]
            
            scores = np.zeros(averaging)
            for av in range(averaging):
                model = createTemporalConvNetwork(4, inputs, look_back, 1)
                model.compile(loss=MeanSquaredError(), optimizer=Adam(learning_rate=0.001), metrics=[keras.metrics.RootMeanSquaredError()],)
                cb = [keras.callbacks.EarlyStopping(patience=10)]
                model.fit(x=x_train, y=y_train, validation_data=(x_val, y_val), batch_size=32, epochs=500, callbacks=cb)
                score = getScore(model, x_test, y_test, y_scaler)
                print(f'{score:.2f} RMSE')
                scores[av] = score
            sample_scores[sensId, idx] = np.average(scores)
    return sample_scores


In [52]:
outputs = np.arange(data.shape[1])
inputs = [1]
scores = experiment(inputs, outputs, 2, 24)

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
6.53 RMSE
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
6.57 RMSE
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoc

In [53]:
print(scores)
np.average(scores)

[[ 6.55406249]
 [ 5.43284666]
 [13.02749269]
 [ 6.80977752]
 [10.03409706]
 [ 9.22977876]
 [ 9.60522078]
 [ 6.40726261]
 [ 8.9119003 ]
 [ 2.5201399 ]
 [11.79182015]
 [ 7.00403095]
 [10.33886323]
 [ 7.46551908]
 [ 3.7755721 ]
 [ 5.74390692]
 [ 8.48507949]
 [14.57766814]
 [ 9.04655491]
 [11.19549441]
 [ 6.0600859 ]
 [ 5.32694044]
 [10.4701678 ]
 [ 7.14396459]
 [ 8.19059455]
 [ 7.01960964]
 [ 5.86767765]
 [ 6.95110246]
 [ 6.41028528]
 [ 3.28810204]
 [11.18442495]
 [11.75714067]
 [ 8.64851168]
 [ 8.57717656]
 [ 9.71979888]
 [ 8.33631291]
 [ 4.36135183]
 [11.836498  ]
 [11.83345532]
 [ 5.73187232]
 [ 5.03436858]
 [ 7.27718233]
 [ 9.88689335]
 [ 8.68483915]
 [ 3.73563796]
 [ 5.25912926]
 [10.80657538]
 [12.18269379]
 [ 3.41603193]
 [ 6.91007431]
 [16.36632472]
 [10.43754758]
 [ 9.88681119]
 [ 4.50855814]
 [ 7.45300183]
 [ 3.42831424]
 [ 3.3833534 ]
 [ 3.26406888]
 [14.18514012]
 [15.87960565]
 [ 2.04310153]
 [13.16991038]
 [14.76230623]
 [ 3.20030644]
 [ 3.27910983]
 [ 5.032609  ]
 [ 5.10760

8.690154558742892