In [None]:
'''
COMPSCI 760 Group Project :Extending NCP to supervised learning - traffic dataset 
Found at https://archive.ics.uci.edu/ml/machine-learning-databases/00492/Metro_Interstate_Traffic_Volume.csv.gz
Original NCP repo :https://github.com/mlech26l/keras-ncp
Original LTC repo :https://github.com/raminmh/liquid_time_constant_networks
'''

In [None]:
import pandas
import numpy as np
import tensorflow as tf
from tensorflow import keras
from kerasncp import wirings
from kerasncp.tf import LTCCell
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# change the directory here if needed
traffic_data = pandas.read_csv('/Users/adrianchoi/Desktop/760/ncp-time-series/data/Metro_Interstate_Traffic_Volume.csv')

In [None]:
pandas.DataFrame.info(traffic_data)

In [None]:
pandas.DataFrame.describe(traffic_data)

In [None]:
import matplotlib.pyplot as plt
import datetime
import scipy.stats

In [None]:
holiday = (traffic_data["holiday"].values == None).astype(np.float32)
temp = traffic_data["temp"].values.astype(np.float32)
temp -= np.mean(temp) #normalize temp by annual mean
rain = traffic_data["rain_1h"].values.astype(np.float32)
snow = traffic_data["snow_1h"].values.astype(np.float32)
clouds = traffic_data["clouds_all"].values.astype(np.float32)
date_time = traffic_data["date_time"].values
    #2012-10-02 13:00:00
date_time = [datetime.datetime.strptime(d,"%Y-%m-%d %H:%M:%S") for d in date_time]
weekday = np.array([d.weekday() for d in date_time]).astype(np.float32)
noon = np.array([d.hour for d in date_time]).astype(np.float32)
noon = np.sin(noon*np.pi/24)

features = np.stack([holiday,temp,rain,snow,clouds,weekday,noon],axis=-1)

traffic_volume = traffic_data["traffic_volume"].values.astype(np.float32)
traffic_volume -= np.mean(traffic_volume) #normalize
traffic_volume /= np.std(traffic_volume) #normalize

In [None]:
# pdf of traffic-volume
pandas.Series(traffic_volume).plot.kde()

In [None]:
#np.savetxt("preprocessed_traffic_data.csv", features, delimiter=",")

In [None]:
# change the directory here if needed
preprocessed_traffic_data = pandas.read_csv('/Users/adrianchoi/Desktop/760/ncp-time-series/preprocessed_traffic_data.csv')

In [None]:
pandas.DataFrame.describe(preprocessed_traffic_data)

In [None]:
def cut_in_sequences(x,y,seq_len,inc=1):

    sequences_x = []
    sequences_y = []

    for s in range(0,x.shape[0] - seq_len,inc):
        start = s
        end = start+seq_len
        sequences_x.append(x[start:end])
        sequences_y.append(y[start:end])

    return np.stack(sequences_x,axis=1),np.stack(sequences_y,axis=1)

In [None]:
# data preprocessing
train_x,train_y = cut_in_sequences(features,traffic_volume,32,inc=4)
train_x = np.stack(train_x,axis=1)
train_y = np.stack(train_y,axis=1)

total_seqs = train_x.shape[1]
print("Total number of training sequences: {}".format(total_seqs))
permutation = np.random.RandomState(23489).permutation(total_seqs)
valid_size = int(0.1*total_seqs)
test_size = int(0.15*total_seqs)
valid_x = train_x[:,permutation[:valid_size]]
valid_y = train_y[:,permutation[:valid_size]]
test_x = train_x[:,permutation[valid_size:valid_size+test_size]]
test_y = train_y[:,permutation[valid_size:valid_size+test_size]]
train_x = train_x[:,permutation[valid_size+test_size:]]
train_y = train_y[:,permutation[valid_size+test_size:]]


In [None]:
# cut the data in a sequence of length 32
train_x,train_y = cut_in_sequences(features,traffic_volume,32,inc=4)
train_x = np.stack(train_x,axis=1)
train_y = np.stack(train_y,axis=1)

In [None]:
neurons = [3,6,8,10,12,14,16]

# higher sparsity is likely to cause overfitting
sparsity = [0.1,0.2,0.3,0.6,0.8]

# epochs
epochs_n = 20

# batch size
batch_size_n = 16

In [None]:
# Experiment with hyparameters - number of neurons, sparsity
for i in neurons:
    for j in sparsity: 
        arch = kerasncp.wirings.Random(i, 1, sparsity_level=j)
        rnn_cell = LTCCell(arch)
        model = tf.keras.models.Sequential(
            [
                tf.keras.Input((None, 7)),
                tf.keras.layers.RNN(rnn_cell, return_sequences=True),
            ]
        )
        model.compile(
            optimizer=tf.keras.optimizers.Adam(0.01,epsilon=1e-08), loss=tf.keras.losses.MeanSquaredError()
        )

        traffic_ncp_history = model.fit(x=train_x, y=train_y, batch_size=batch_size_n, epochs=epochs_n, validation_data=(valid_x,valid_y))
        
        loss_train = traffic_ncp_history.history['loss']
        loss_val = traffic_ncp_history.history['val_loss']
        epochs = range(1,epochs_n+1)
        plt.plot(epochs, loss_train, 'g', label='Training loss')
        plt.plot(epochs, loss_val, 'b', label='validation loss')
        plt.title('Training and Validation MSE loss ')
        plt.xlabel('Epochs')
        plt.ylabel('Loss')
        plt.legend()
        plt.show()