In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append("../")
sys.path.append("../misc")
import pandas as pd
from misc.MoviaBusDataset import MoviaBusDataset
from misc.data_loader import load_network, adjacency_matrix
import pickle
import os
import yaml

In [3]:
#df = pd.read_hdf("../DCRNN/data/df_highway_2012_4mon_sample.h5")
base_dir = '../DCRNN/movia'
if not os.path.exists(base_dir):
    os.makedirs(base_dir)
if not os.path.exists(os.path.join(base_dir,'out')):
    os.makedirs(os.path.join(base_dir,'out'))
if not os.path.exists(os.path.join(base_dir,'data')):
    os.makedirs(os.path.join(base_dir,'data'))
if not os.path.exists(os.path.join(base_dir,'model')):
    os.makedirs(os.path.join(base_dir,'model'))

In [4]:
movia_train = MoviaBusDataset('../data/train/', interpolation=True, agg_time=10)
movia_val =  MoviaBusDataset('../data/validation/', interpolation=True, agg_time=10)
movia_test =  MoviaBusDataset('../data/test/', interpolation=True, agg_time=10)

#movia_dataset.remove_trend()
#data_file = os.path.join(base_dir,'movia.h5')
#all_data.to_hdf(data_file,key='df',format='table')


In [5]:
#road_network = load_network()
adj_mx = movia_train.adjacency_matrix
adj_mx = adj_mx.astype('float32')
with open(os.path.join(base_dir,'adj_mx.pkl'), 'wb') as f:
    pickle.dump([-1, -1, adj_mx], f, protocol=2)

In [6]:
##TAKEN FROM generate_training_data.py
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import argparse
import numpy as np
import os
import pandas as pd


def generate_graph_seq2seq_io_data(
        df, x_offsets, y_offsets, add_time_in_day=True, add_day_in_week=False, scaler=None
):
    """
    Generate samples from
    :param df:
    :param x_offsets:
    :param y_offsets:
    :param add_time_in_day:
    :param add_day_in_week:
    :param scaler:
    :return:
    # x: (epoch_size, input_length, num_nodes, input_dim)
    # y: (epoch_size, output_length, num_nodes, output_dim)
    """

    num_samples, num_nodes = df.shape
    data = np.expand_dims(df.values, axis=-1)
    data_list = [data]
    if add_time_in_day:
        time_ind = (df.index.values - df.index.values.astype("datetime64[D]")) / np.timedelta64(1, "D")
        time_in_day = np.tile(time_ind, [1, num_nodes, 1]).transpose((2, 1, 0))
        data_list.append(time_in_day)
    if add_day_in_week:
        day_in_week = np.zeros(shape=(num_samples, num_nodes, 7))
        day_in_week[np.arange(num_samples), :, df.index.dayofweek] = 1
        data_list.append(day_in_week)

    data = np.concatenate(data_list, axis=-1)
    # epoch_len = num_samples + min(x_offsets) - max(y_offsets)
    x, y = [], []
    # t is the index of the last observation.
    min_t = abs(min(x_offsets))
    max_t = abs(num_samples - abs(max(y_offsets)))  # Exclusive
    for t in range(min_t, max_t):
        x_t = data[t + x_offsets, ...]
        y_t = data[t + y_offsets, ...]
        x.append(x_t)
        y.append(y_t)
    x = np.stack(x, axis=0)
    y = np.stack(y, axis=0)
    return x, y


#def generate_train_val_test(args):
#df = pd.read_hdf(args.traffic_df_filename)
# 0 is the latest observed sample.
x_offsets = np.sort(
    # np.concatenate(([-week_size + 1, -day_size + 1], np.arange(-11, 1, 1)))
    #ADD +6 to go from 5*12 = 60mins of predtions to 10*6=60mins of predictions
    np.concatenate((np.arange(-11+6, 1, 1),))
)
#ADD -6 to go from 5*12 = 60mins of predtions to 10*6=60mins of predictions
y_offsets = np.sort(np.arange(1, 13-6, 1))
# x: (num_samples, input_length, num_nodes, input_dim)
# y: (num_samples, output_length, num_nodes, output_dim)
movia_train_df = pd.concat(movia_train.dataframes)
movia_train_df[movia_train_df.columns] = movia_train_df[movia_train_df.columns].astype('float32')

x_train, y_train = generate_graph_seq2seq_io_data(
    movia_train_df,
    x_offsets=x_offsets,
    y_offsets=y_offsets,
    add_time_in_day=True,
    add_day_in_week=False,
)
movia_val_df = pd.concat(movia_val.dataframes)
movia_val_df[movia_val_df.columns] = movia_val_df[movia_val_df.columns].astype('float32')

x_val, y_val = generate_graph_seq2seq_io_data(
    movia_val_df,
    x_offsets=x_offsets,
    y_offsets=y_offsets,
    add_time_in_day=True,
    add_day_in_week=False,
)

movia_test_df = pd.concat(movia_test.dataframes)
movia_test_df[movia_test_df.columns] = movia_test_df[movia_test_df.columns].astype('float32')

x_test, y_test = generate_graph_seq2seq_io_data(
    movia_test_df,
    x_offsets=x_offsets,
    y_offsets=y_offsets,
    add_time_in_day=True,
    add_day_in_week=False,
)

for cat in ["train", "val", "test"]:
    _x, _y = locals()["x_" + cat], locals()["y_" + cat]
    print(cat, "x: ", _x.shape, "y:", _y.shape)
    np.savez_compressed(
        os.path.join(os.path.join(base_dir,'data'), "%s.npz" % cat),
        x=_x,
        y=_y,
        x_offsets=x_offsets.reshape(list(x_offsets.shape) + [1]),
        y_offsets=y_offsets.reshape(list(y_offsets.shape) + [1]),
    )


train x:  (1429, 6, 192, 2) y: (1429, 6, 192, 2)
val x:  (181, 6, 192, 2) y: (181, 6, 192, 2)
test x:  (277, 6, 192, 2) y: (277, 6, 192, 2)


In [9]:
config = {
    'base_dir': 'data/model',
    'log_level': 'INFO',
    'data': {
      'batch_size': 64,
      'dataset_dir': 'movia/data',
      'test_batch_size': 64,
      'val_batch_size': 64,
      'graph_pkl_filename': 'movia/adj_mx.pkl'
    },
    'model': {
      'cl_decay_steps': 2000,
      'filter_type': 'dual_random_walk',
      'horizon': 6,
      'input_dim': 2,
      'l1_decay': 0,
      'max_diffusion_step': 2,
      'num_nodes': len(adj_mx),
      'num_rnn_layers': 2,
      'output_dim': 1,
      'rnn_units': 64,
      'seq_len': 6,
      'use_curriculum_learning': True
    },
    'train': {
      'base_lr': 0.01,
      'dropout': 0,
      'epoch': 0,
      'epochs': 100,
      'epsilon': 1.0e-3,
      'global_step': 0,
      'lr_decay_ratio': 0.1,
      'max_grad_norm': 5,
      'max_to_keep': 100,
      'min_learning_rate': 2.0e-06,
      'optimizer': 'adam',
      'patience': 50,
      'steps': [20, 30, 40, 50],
      'test_every_n_epochs': 10
    }
}

with open(os.path.join(base_dir,'config.yml'), 'w') as outfile:
    yaml.dump(config, outfile, default_flow_style=False)

In [8]:
#IF running into ValueError: Variable rnn/basic_rnn_cell/kernel already exists, disallowed. Did you mean to set reuse=True or reuse=tf.AUTO_REUSE in VarScope? Originally defined at
#Try restarting kernel
os.chdir(os.path.join(base_dir,'..'))
#%run run_demo.py --config_filename movia/config.yml --output_filename movia/out/out.npz
import tensorflow as tf
import numpy as np

from lib.utils import load_graph_data
from model.dcrnn_supervisor import DCRNNSupervisor
use_cpu_only = False
graph_pkl_filename = 'movia/adj_mx.pkl'
tf_config = tf.ConfigProto()

if use_cpu_only:
    tf_config = tf.ConfigProto(device_count={'GPU': 0})
tf_config.gpu_options.allow_growth = True
_, _, adj_mx = load_graph_data(graph_pkl_filename)
supervisor = None
with tf.Session(config=tf_config) as sess:
    supervisor = DCRNNSupervisor(adj_mx=adj_mx, **config)
    supervisor.train(sess)
    #supervisor.load(sess, config['train']['model_filename'])
    outputs = supervisor.evaluate(sess)
    np.savez_compressed(os.path.join(base_dir,'out.npz'), **outputs)
    print('Predictions saved as {}.'.format(os.path.join(base_dir,'out.npz')))

2018-12-10 18:49:36,040 - INFO - Log directory: data/model/dcrnn_DR_2_h_6_64-64_lr_0.01_bs_64_1210184936/
2018-12-10 18:49:36,044 - INFO - {'base_dir': 'data/model', 'log_level': 'INFO', 'data': {'batch_size': 64, 'dataset_dir': 'movia/data', 'test_batch_size': 64, 'val_batch_size': 64, 'graph_pkl_filename': 'movia/adj_mx.pkl'}, 'model': {'cl_decay_steps': 2000, 'filter_type': 'dual_random_walk', 'horizon': 6, 'input_dim': 2, 'l1_decay': 0, 'max_diffusion_step': 2, 'num_nodes': 192, 'num_rnn_layers': 2, 'output_dim': 1, 'rnn_units': 64, 'seq_len': 6, 'use_curriculum_learning': True}, 'train': {'base_lr': 0.01, 'dropout': 0, 'epoch': 0, 'epochs': 100, 'epsilon': 0.001, 'global_step': 0, 'lr_decay_ratio': 0.1, 'max_grad_norm': 5, 'max_to_keep': 100, 'min_learning_rate': 2e-06, 'optimizer': 'adam', 'patience': 50, 'steps': [20, 30, 40, 50], 'test_every_n_epochs': 10}}
2018-12-10 18:49:36,255 - INFO - ('x_train', (1429, 6, 192, 2))
2018-12-10 18:49:36,256 - INFO - ('y_train', (1429, 6, 192

  d_inv = np.power(d, -1).flatten()


2018-12-10 18:49:43,099 - INFO - Total number of trainable parameters: 372352
2018-12-10 18:49:43,963 - INFO - Start training ...
2018-12-10 18:49:51,691 - INFO - Epoch [0/100] (23) train_mae: 1.7498, val_mae: 1.6571 lr:0.010000 7.6s
2018-12-10 18:49:51,878 - INFO - Val loss decrease from inf to 1.6571, saving to data/model/dcrnn_DR_2_h_6_64-64_lr_0.01_bs_64_1210184936/models-1.6571-23
2018-12-10 18:49:56,501 - INFO - Epoch [1/100] (46) train_mae: 1.5049, val_mae: 1.6073 lr:0.010000 4.6s
2018-12-10 18:49:56,567 - INFO - Val loss decrease from 1.6571 to 1.6073, saving to data/model/dcrnn_DR_2_h_6_64-64_lr_0.01_bs_64_1210184936/models-1.6073-46
2018-12-10 18:50:01,658 - INFO - Epoch [2/100] (69) train_mae: 1.4913, val_mae: 1.6018 lr:0.010000 5.1s
2018-12-10 18:50:01,723 - INFO - Val loss decrease from 1.6073 to 1.6018, saving to data/model/dcrnn_DR_2_h_6_64-64_lr_0.01_bs_64_1210184936/models-1.6018-69
2018-12-10 18:50:06,753 - INFO - Epoch [3/100] (92) train_mae: 1.4873, val_mae: 1.5941 

2018-12-10 18:52:36,743 - INFO - Epoch [29/100] (690) train_mae: 1.4282, val_mae: 1.5544 lr:0.001000 7.4s
2018-12-10 18:52:38,357 - INFO - Horizon 01, MAE: 1.48, MAPE: 0.4393, RMSE: 2.06
2018-12-10 18:52:38,360 - INFO - Horizon 02, MAE: 1.53, MAPE: 0.4649, RMSE: 2.14
2018-12-10 18:52:38,363 - INFO - Horizon 03, MAE: 1.55, MAPE: 0.4764, RMSE: 2.18
2018-12-10 18:52:38,366 - INFO - Horizon 04, MAE: 1.57, MAPE: 0.4882, RMSE: 2.21
2018-12-10 18:52:38,369 - INFO - Horizon 05, MAE: 1.59, MAPE: 0.4987, RMSE: 2.24
2018-12-10 18:52:38,371 - INFO - Horizon 06, MAE: 1.60, MAPE: 0.5075, RMSE: 2.27
2018-12-10 18:52:38,437 - INFO - Val loss decrease from 1.5547 to 1.5544, saving to data/model/dcrnn_DR_2_h_6_64-64_lr_0.01_bs_64_1210184936/models-1.5544-690
2018-12-10 18:52:45,938 - INFO - Epoch [30/100] (713) train_mae: 1.4274, val_mae: 1.5565 lr:0.000100 7.5s
2018-12-10 18:52:55,009 - INFO - Epoch [31/100] (736) train_mae: 1.4272, val_mae: 1.5561 lr:0.000100 9.1s
2018-12-10 18:53:03,444 - INFO - Epoc

2018-12-10 18:58:50,137 - INFO - Horizon 03, MAE: 1.55, MAPE: 0.4792, RMSE: 2.18
2018-12-10 18:58:50,140 - INFO - Horizon 04, MAE: 1.57, MAPE: 0.4917, RMSE: 2.21
2018-12-10 18:58:50,142 - INFO - Horizon 05, MAE: 1.59, MAPE: 0.5029, RMSE: 2.24
2018-12-10 18:58:50,145 - INFO - Horizon 06, MAE: 1.60, MAPE: 0.5120, RMSE: 2.27
2018-12-10 18:58:56,942 - INFO - Epoch [80/100] (1863) train_mae: 1.4264, val_mae: 1.5556 lr:0.000002 6.8s
2018-12-10 18:58:58,147 - INFO - Horizon 01, MAE: 1.48, MAPE: 0.4400, RMSE: 2.07
2018-12-10 18:58:58,150 - INFO - Horizon 02, MAE: 1.53, MAPE: 0.4668, RMSE: 2.14
2018-12-10 18:58:58,154 - INFO - Horizon 03, MAE: 1.55, MAPE: 0.4792, RMSE: 2.18
2018-12-10 18:58:58,157 - INFO - Horizon 04, MAE: 1.57, MAPE: 0.4917, RMSE: 2.21
2018-12-10 18:58:58,160 - INFO - Horizon 05, MAE: 1.59, MAPE: 0.5029, RMSE: 2.24
2018-12-10 18:58:58,163 - INFO - Horizon 06, MAE: 1.60, MAPE: 0.5120, RMSE: 2.27
Predictions saved as ../DCRNN/movia/out.npz.


### diff 2 best
### dropuout 0.2, 0.5, 0 ingen effekt
###  1,3 layers no affect.
### 128 notes no affect
### laplace worse


### diff 1
```
2018-12-10 19:18:20,415 - INFO - Horizon 01, MAE: 1.50, MAPE: 0.4415, RMSE: 2.08
2018-12-10 19:18:20,418 - INFO - Horizon 02, MAE: 1.57, MAPE: 0.4702, RMSE: 2.17
2018-12-10 19:18:20,421 - INFO - Horizon 03, MAE: 1.60, MAPE: 0.4847, RMSE: 2.22
2018-12-10 19:18:20,423 - INFO - Horizon 04, MAE: 1.63, MAPE: 0.5004, RMSE: 2.26
2018-12-10 19:18:20,426 - INFO - Horizon 05, MAE: 1.65, MAPE: 0.5073, RMSE: 2.30
2018-12-10 19:18:20,429 - INFO - Horizon 06, MAE: 1.66, MAPE: 0.5103, RMSE: 2.32
```

### diff 2
```
2018-12-10 18:58:56,944 - WARNING - Early stopping at epoch: 80
2018-12-10 18:58:58,147 - INFO - Horizon 01, MAE: 1.48, MAPE: 0.4400, RMSE: 2.07
2018-12-10 18:58:58,150 - INFO - Horizon 02, MAE: 1.53, MAPE: 0.4668, RMSE: 2.14
2018-12-10 18:58:58,154 - INFO - Horizon 03, MAE: 1.55, MAPE: 0.4792, RMSE: 2.18
2018-12-10 18:58:58,157 - INFO - Horizon 04, MAE: 1.57, MAPE: 0.4917, RMSE: 2.21
2018-12-10 18:58:58,160 - INFO - Horizon 05, MAE: 1.59, MAPE: 0.5029, RMSE: 2.24
2018-12-10 18:58:58,163 - INFO - Horizon 06, MAE: 1.60, MAPE: 0.5120, RMSE: 2.27
```

### diff 2 - 3 layers
```
2018-12-10 20:41:26,454 - INFO - Horizon 01, MAE: 1.48, MAPE: 0.4389, RMSE: 2.07
2018-12-10 20:41:26,457 - INFO - Horizon 02, MAE: 1.52, MAPE: 0.4667, RMSE: 2.13
2018-12-10 20:41:26,460 - INFO - Horizon 03, MAE: 1.54, MAPE: 0.4764, RMSE: 2.17
2018-12-10 20:41:26,462 - INFO - Horizon 04, MAE: 1.56, MAPE: 0.4856, RMSE: 2.21
2018-12-10 20:41:26,465 - INFO - Horizon 05, MAE: 1.58, MAPE: 0.4960, RMSE: 2.23
2018-12-10 20:41:26,467 - INFO - Horizon 06, MAE: 1.59, MAPE: 0.5079, RMSE: 2.26
```

### diff 1  layer -- no affect
```
2018-12-10 20:58:01,709 - INFO - Horizon 01, MAE: 1.49, MAPE: 0.4421, RMSE: 2.07
2018-12-10 20:58:01,712 - INFO - Horizon 02, MAE: 1.54, MAPE: 0.4706, RMSE: 2.15
2018-12-10 20:58:01,715 - INFO - Horizon 03, MAE: 1.56, MAPE: 0.4851, RMSE: 2.18
2018-12-10 20:58:01,717 - INFO - Horizon 04, MAE: 1.58, MAPE: 0.5014, RMSE: 2.22
2018-12-10 20:58:01,720 - INFO - Horizon 05, MAE: 1.60, MAPE: 0.5108, RMSE: 2.25
2018-12-10 20:58:01,722 - INFO - Horizon 06, MAE: 1.61, MAPE: 0.5207, RMSE: 2.27
```

### diff 2 dropuout 0.2
```
2018-12-10 19:47:35,831 - INFO - Horizon 01, MAE: 1.48, MAPE: 0.4364, RMSE: 2.07
2018-12-10 19:47:35,834 - INFO - Horizon 02, MAE: 1.53, MAPE: 0.4638, RMSE: 2.14
2018-12-10 19:47:35,837 - INFO - Horizon 03, MAE: 1.54, MAPE: 0.4804, RMSE: 2.17
2018-12-10 19:47:35,839 - INFO - Horizon 04, MAE: 1.56, MAPE: 0.4963, RMSE: 2.21
2018-12-10 19:47:35,842 - INFO - Horizon 05, MAE: 1.58, MAPE: 0.5057, RMSE: 2.24
2018-12-10 19:47:35,845 - INFO - Horizon 06, MAE: 1.59, MAPE: 0.5155, RMSE: 2.26
```
### diff 2 dropuout 0.5
```
2018-12-10 19:47:29,754 - INFO - Horizon 01, MAE: 1.48, MAPE: 0.4453, RMSE: 2.07
2018-12-10 19:47:29,756 - INFO - Horizon 02, MAE: 1.53, MAPE: 0.4678, RMSE: 2.14
2018-12-10 19:47:29,759 - INFO - Horizon 03, MAE: 1.55, MAPE: 0.4828, RMSE: 2.17
2018-12-10 19:47:29,762 - INFO - Horizon 04, MAE: 1.57, MAPE: 0.4977, RMSE: 2.21
2018-12-10 19:47:29,764 - INFO - Horizon 05, MAE: 1.58, MAPE: 0.5072, RMSE: 2.24
2018-12-10 19:47:29,766 - INFO - Horizon 06, MAE: 1.60, MAPE: 0.5154, RMSE: 2.26
```
### diff 3
```
2018-12-10 19:01:31,102 - WARNING - Early stopping at epoch: 58
2018-12-10 19:01:32,378 - INFO - Horizon 01, MAE: 1.59, MAPE: 0.5304, RMSE: 2.21
2018-12-10 19:01:32,381 - INFO - Horizon 02, MAE: 1.60, MAPE: 0.5254, RMSE: 2.25
2018-12-10 19:01:32,384 - INFO - Horizon 03, MAE: 1.61, MAPE: 0.5227, RMSE: 2.27
2018-12-10 19:01:32,386 - INFO - Horizon 04, MAE: 1.62, MAPE: 0.5412, RMSE: 2.29
2018-12-10 19:01:32,389 - INFO - Horizon 05, MAE: 1.63, MAPE: 0.5460, RMSE: 2.32
2018-12-10 19:01:32,391 - INFO - Horizon 06, MAE: 1.63, MAPE: 0.5442, RMSE: 2.33
```


###  diff 2 128 notes
```
2018-12-10 21:18:15,171 - WARNING - Early stopping at epoch: 80
2018-12-10 21:18:16,450 - INFO - Horizon 01, MAE: 1.48, MAPE: 0.4415, RMSE: 2.07
2018-12-10 21:18:16,453 - INFO - Horizon 02, MAE: 1.53, MAPE: 0.4724, RMSE: 2.14
2018-12-10 21:18:16,456 - INFO - Horizon 03, MAE: 1.55, MAPE: 0.4848, RMSE: 2.18
2018-12-10 21:18:16,458 - INFO - Horizon 04, MAE: 1.56, MAPE: 0.4965, RMSE: 2.21
2018-12-10 21:18:16,461 - INFO - Horizon 05, MAE: 1.58, MAPE: 0.5045, RMSE: 2.24
2018-12-10 21:18:16,463 - INFO - Horizon 06, MAE: 1.59, MAPE: 0.5146, RMSE: 2.26
```


### diff2 laplacian 
```
2018-12-10 21:34:08,644 - INFO - Horizon 01, MAE: 1.50, MAPE: 0.4444, RMSE: 2.09
2018-12-10 21:34:08,647 - INFO - Horizon 02, MAE: 1.57, MAPE: 0.4723, RMSE: 2.18
2018-12-10 21:34:08,650 - INFO - Horizon 03, MAE: 1.60, MAPE: 0.4856, RMSE: 2.22
2018-12-10 21:34:08,652 - INFO - Horizon 04, MAE: 1.63, MAPE: 0.5040, RMSE: 2.26
2018-12-10 21:34:08,654 - INFO - Horizon 05, MAE: 1.65, MAPE: 0.5113, RMSE: 2.30
2018-12-10 21:34:08,657 - INFO - Horizon 06, MAE: 1.67, MAPE: 0.5134, RMSE: 2.33
```