## Importing things 

In [1]:
import sys
sys.path.append('../misc')

from MoviaBusDataset import MoviaBusDataset
from BaseNetwork import BaseNetwork
import numpy as np
from FNN import FNN
from Seq2Seq import Seq2Seq
import pandas as pd


%load_ext autoreload
%autoreload 2


## Load Data

In [2]:
previous_timesteps = 6
prediction_steps = 6
batch_size = 25

train = MoviaBusDataset('../data/train', interpolation=True, 
                        prev_timesteps=previous_timesteps, 
                        max_future_time_steps=prediction_steps, 
                        timeofday = True)
train2 = MoviaBusDataset('../data/train', interpolation=True, 
                        prev_timesteps=previous_timesteps, 
                        max_future_time_steps=prediction_steps, 
                        )

validation = MoviaBusDataset('../data/validation', interpolation=True, 
                       prev_timesteps=previous_timesteps, 
                       max_future_time_steps=prediction_steps, 
                       timeofday = True)
validation2 = MoviaBusDataset('../data/validation', interpolation=True, 
                       prev_timesteps=previous_timesteps, 
                       max_future_time_steps=prediction_steps)


test = MoviaBusDataset('../data/test', interpolation=True, 
                       prev_timesteps=previous_timesteps, 
                       max_future_time_steps=prediction_steps, 
                       timeofday = True)
test2 = MoviaBusDataset('../data/test', interpolation=True, 
                       prev_timesteps=previous_timesteps, 
                       max_future_time_steps=prediction_steps)


In [3]:
train_all = pd.concat(train.dataframes,sort=False).unstack().reset_index().rename(columns={0:"Speed"})
train_road_mean = train_all.pivot(index='Time', columns='LinkRef',values='Speed').groupby(['TimeOfDay']).mean()

train.remove_trend()
validation.remove_trend(train._historical_average)
test.remove_trend(train._historical_average)

train2.remove_trend()
validation2.remove_trend(train2._historical_average)
test2.remove_trend(train2._historical_average)

train2.normalize(individual_roads=False)
validation2.normalize(train2.mean, train2.std)
test2.normalize(train2.mean, train2.std)

In [4]:
stats = pd.read_csv('../stats.csv',index_col=['LinkRef'])

In [5]:
stats_ = stats

## FNN

In [7]:
net_FNN = FNN(num_hidden=20)
net_FNN.load('FNN.pt')
net_FNN.cuda()
validation_scores=[net_FNN.get_MAE_score(validation,timestep=i) for i in range(1,prediction_steps+1)]
print('Validation score:\n',validation_scores)
print('\nMean validation score for all timesteps:\n',sum(validation_scores)/len(validation_scores))

test_scores=[net_FNN.get_MAE_score(test,timestep=i) for i in range(1,prediction_steps+1)]
print('\nTest score:\n',test_scores)
print('\nMean test score for all timesteps:\n',sum(test_scores)/len(test_scores))

Validation score:
 [1.468803882598877, 1.508021354675293, 1.522831678390503, 1.5315278768539429, 1.5421165227890015, 1.5539194345474243]

Mean validation score for all timesteps:
 1.5212034583091736

Test score:
 [1.4647096395492554, 1.5006835460662842, 1.5135947465896606, 1.5224456787109375, 1.5305625200271606, 1.535524606704712]

Mean test score for all timesteps:
 1.5112534562746684


In [8]:
stats_['fnn_t1'],stats_['fnn_t2'],stats_['fnn_t3'],stats_['fnn_t4'],stats_['fnn_t5'],stats_['fnn_t6'] = [net_FNN.get_MAE_score(test,timestep=i,individual_roads=True) for i in range(1,prediction_steps+1)]
#data_norm = (stats_.sub(stats_.mean(axis=0,numeric_only=True),axis=1))/stats_.std(axis=0)
#data_norm.corr()

In [None]:
net_FNN.visualize_road(test, timesteps=1, road=17)

## Seq2Seq

In [9]:
net_s2s = Seq2Seq()
net_s2s._BaseNetwork__target_to_net = True
net_s2s.load('Sequence2Sequence.pt')
net_s2s.cuda()
validation_scores=[net_s2s.get_MAE_score(validation2,timestep=i) for i in range(1,prediction_steps+1)]
print('Validation score:\n',validation_scores)
print('\nMean validation score for all timesteps:\n',sum(validation_scores)/len(validation_scores))

test_scores=[net_s2s.get_MAE_score(test2,timestep=i) for i in range(1,prediction_steps+1)]
print('\nTest score:\n',test_scores)
print('\nMean test score for all timesteps:\n',sum(test_scores)/len(test_scores))

Validation score:
 [1.521698236465454, 1.525720477104187, 1.529951810836792, 1.534590244293213, 1.5416491031646729, 1.5448389053344727]

Mean validation score for all timesteps:
 1.5330747961997986

Test score:
 [1.5241392850875854, 1.5242527723312378, 1.5267033576965332, 1.5291039943695068, 1.5355304479599, 1.5400067567825317]

Mean test score for all timesteps:
 1.5299561023712158


In [16]:
stats_['s2s_t1'],stats_['s2s_t2'],stats_['s2s_t3'],stats_['s2s_t4'],stats_['s2s_t5'],stats_['s2s_t6'] = [net_s2s.get_MAE_score(validation2,timestep=i,individual_roads=True) for i in range(1,prediction_steps+1)]
data_norm = (stats_.sub(stats_.mean(axis=0,numeric_only=True),axis=1))/stats_.std(axis=0)
data_norm.corr()

Unnamed: 0,freq,stds,mean_diff,mean_diff_diff,mean,stops,fnn_t1,fnn_t2,fnn_t3,fnn_t4,...,s2s_t3,s2s_t4,s2s_t5,s2s_t6,dcrnn_t1,dcrnn_t2,dcrnn_t3,dcrnn_t4,dcrnn_t5,dcrnn_t6
freq,1.0,-0.087368,-0.032244,-0.002021,-0.446185,0.254136,-0.068827,-0.085869,-0.087271,-0.088893,...,-0.094766,-0.093034,-0.094675,-0.094497,-0.065675,-0.084169,-0.088062,-0.088447,-0.086994,-0.085084
stds,-0.087368,1.0,0.92887,0.921349,-0.246023,-0.038347,0.891044,0.889269,0.886464,0.884445,...,0.892153,0.890747,0.889874,0.89247,0.891102,0.890613,0.884936,0.881849,0.879358,0.875735
mean_diff,-0.032244,0.92887,1.0,0.997995,-0.227146,-0.060936,0.923915,0.911123,0.905254,0.901409,...,0.895516,0.894402,0.89153,0.893874,0.920307,0.907128,0.89757,0.891641,0.887004,0.882044
mean_diff_diff,-0.002021,0.921349,0.997995,1.0,-0.24972,-0.046977,0.915683,0.901038,0.894837,0.891115,...,0.886054,0.884832,0.881858,0.884012,0.912779,0.897677,0.887855,0.881956,0.877448,0.872488
mean,-0.446185,-0.246023,-0.227146,-0.24972,1.0,-0.511415,-0.148131,-0.133038,-0.128511,-0.12506,...,-0.111422,-0.110112,-0.11058,-0.108754,-0.149132,-0.133469,-0.12797,-0.123911,-0.123934,-0.126178
stops,0.254136,-0.038347,-0.060936,-0.046977,-0.511415,1.0,-0.157394,-0.161376,-0.165144,-0.166607,...,-0.14353,-0.142279,-0.136176,-0.132618,-0.151201,-0.152065,-0.15319,-0.153959,-0.153052,-0.150581
fnn_t1,-0.068827,0.891044,0.923915,0.915683,-0.148131,-0.157394,1.0,0.997742,0.995929,0.994213,...,0.954657,0.954791,0.953685,0.954442,0.995475,0.990596,0.986081,0.983194,0.98042,0.97736
fnn_t2,-0.085869,0.889269,0.911123,0.901038,-0.133038,-0.161376,0.997742,1.0,0.999222,0.998103,...,0.955639,0.956443,0.956132,0.956975,0.993766,0.993123,0.990086,0.987978,0.985679,0.983191
fnn_t3,-0.087271,0.886464,0.905254,0.894837,-0.128511,-0.165144,0.995929,0.999222,1.0,0.999251,...,0.954298,0.955426,0.955566,0.95658,0.993102,0.993883,0.991933,0.990307,0.988258,0.986124
fnn_t4,-0.088893,0.884445,0.901409,0.891115,-0.12506,-0.166607,0.994213,0.998103,0.999251,1.0,...,0.953631,0.954944,0.955225,0.956307,0.992007,0.993272,0.991726,0.99079,0.989153,0.98718


# Visualize all predictions 

In [None]:
road_nr = 59
time_steps_nr = 1
time,out_s2s,tar = net_s2s.visualize_road(test2, timesteps=time_steps_nr, road=road_nr,return_values=True)
_,out_fnn,_ = net_FNN.visualize_road(test, timesteps=time_steps_nr, road=road_nr,return_values=True)
with np.load('../dcrnn.npz') as data:
    # also contains groundtruth
    out_dcrnn = data['predictions'][:,:,road_nr]
    
net_dcrnn = out_dcrnn[time_steps_nr-1,0:90]+train_road_mean.values[6:-6,16]

In [None]:
import matplotlib.pyplot as plt
%matplotlib notebook
plt.plot(time[0:90],out_s2s[0:90], label='Prediction_s2s')
plt.plot(time[0:90],tar[0:90], label='Truth')
plt.plot(time[0:90],out_fnn[0:90], label='Prediction_fnn')
plt.plot(time[0:90],net_dcrnn,label='Prediction_dcrnn')
plt.legend()
plt.xlabel('Time of day [MM:DD:HH]')
plt.ylabel('Mean speed [m/2]')
plt.rcParams["figure.figsize"] = [10,4]
plt.show()

## DCRNN 

In [15]:
with np.load('../dcrnn.npz') as dcrnn_data:
    # also contains groundtruth
    out_dcrnn = np.abs(dcrnn_data['predictions'] - dcrnn_data['groundtruth'])
    stats_['dcrnn_t1'],stats_['dcrnn_t2'],stats_['dcrnn_t3'],stats_['dcrnn_t4'],stats_['dcrnn_t5'],stats_['dcrnn_t6'] = out_dcrnn.mean(axis=1)

In [14]:
out_dcrnn.mean(axis=1).shape

(6, 192)

In [None]:

len(time)
102*3-8*3
train_road_mean
%matplotlib notebook
plt.plot(time[0:90],train_road_mean.values[6:-6,16])
plt.plot(time[0:90],tar[0:90], label='Prediction')
plt.legend()
plt.show()

## Plotting

In [None]:
%matplotlib notebook
plt.plot(time,out_dcrnn[1,6:-1])

In [None]:
len(time)

In [None]:
len(out_dcrnn[1,6:96]+train_road_mean.values[6:-6,16])

In [None]:
%matplotlib notebook
plt.plot(time[0:90],out_dcrnn[1,6:96]+train_road_mean.values[6:-6,16])

In [20]:
data_norm = (stats_.sub(stats_.mean(axis=0,numeric_only=True),axis=1))/stats_.std(axis=0)
corr = data_norm.corr()
arr = ['freq','stds','mean_diff','mean','stops','fnn_t6','s2s_t6','dcrnn_t6']
print(corr.to_latex(float_format="{:,.2f}".format, columns=arr,))

\begin{tabular}{lrrrrrrrr}
\toprule
{} &  freq &  stds &  mean\_diff &  mean &  stops &  fnn\_t6 &  s2s\_t6 &  dcrnn\_t6 \\
\midrule
freq           &  1.00 & -0.09 &      -0.03 & -0.45 &   0.25 &   -0.09 &   -0.09 &     -0.09 \\
stds           & -0.09 &  1.00 &       0.93 & -0.25 &  -0.04 &    0.88 &    0.89 &      0.88 \\
mean\_diff      & -0.03 &  0.93 &       1.00 & -0.23 &  -0.06 &    0.89 &    0.89 &      0.88 \\
mean\_diff\_diff & -0.00 &  0.92 &       1.00 & -0.25 &  -0.05 &    0.88 &    0.88 &      0.87 \\
mean           & -0.45 & -0.25 &      -0.23 &  1.00 &  -0.51 &   -0.12 &   -0.11 &     -0.13 \\
stops          &  0.25 & -0.04 &      -0.06 & -0.51 &   1.00 &   -0.17 &   -0.13 &     -0.15 \\
fnn\_t1         & -0.07 &  0.89 &       0.92 & -0.15 &  -0.16 &    0.99 &    0.95 &      0.98 \\
fnn\_t2         & -0.09 &  0.89 &       0.91 & -0.13 &  -0.16 &    0.99 &    0.96 &      0.98 \\
fnn\_t3         & -0.09 &  0.89 &       0.91 & -0.13 &  -0.17 &    1.00 &    0.96 &      0.99 