In [1]:
import sys
sys.path.append("../")
import pandas as pd
from misc.MoviaBusDataset import MoviaBusDataset
from misc.data_loader import load_network, adjacency_matrix
import pickle
import os
import yaml

In [2]:
#df = pd.read_hdf("../DCRNN/data/df_highway_2012_4mon_sample.h5")
base_dir = '../DCRNN/movia'
if not os.path.exists(base_dir):
    os.makedirs(base_dir)
if not os.path.exists(os.path.join(base_dir,'out')):
    os.makedirs(os.path.join(base_dir,'out'))
if not os.path.exists(os.path.join(base_dir,'data')):
    os.makedirs(os.path.join(base_dir,'data'))
if not os.path.exists(os.path.join(base_dir,'model')):
    os.makedirs(os.path.join(base_dir,'model'))

In [3]:

movia_dataset =  MoviaBusDataset('../data/train', interpolation=True)
all_data = pd.concat(movia_dataset.dataframes)
all_data = all_data.drop(['TimeOfDay'],axis=1)
all_data[all_data.columns] = all_data[all_data.columns].astype('float32')
data_file = os.path.join(base_dir,'movia.h5')
all_data.to_hdf(data_file,key='df',format='table')


In [4]:
road_network = load_network()
adj_mx = adjacency_matrix(road_network)
adj_mx = adj_mx.astype('float32')
with open(os.path.join(base_dir,'adj_mx.pkl'), 'wb') as f:
    pickle.dump([-1, -1, adj_mx], f, protocol=2)

In [None]:
%run ../DCRNN/scripts/generate_training_data.py --output_dir ../DCRNN/movia/data --traffic_df_filename ../DCRNN/movia/movia.h5

Generating training data
x shape:  (170, 12, 194, 2) , y shape:  (170, 12, 194, 2)
train x:  (119, 12, 194, 2) y: (119, 12, 194, 2)
val x:  (17, 12, 194, 2) y: (17, 12, 194, 2)
test x:  (34, 12, 194, 2) y: (34, 12, 194, 2)


In [None]:
config = {
    'base_dir': 'data/model',
    'log_level': 'INFO',
    'data': {
      'batch_size': 64,
      'dataset_dir': 'movia/data',
      'test_batch_size': 64,
      'val_batch_size': 64,
      'graph_pkl_filename': 'movia/adj_mx.pkl'
    },
    'model': {
      'cl_decay_steps': 2000,
      'filter_type': 'dual_random_walk',
      'horizon': 12,
      'input_dim': 2,
      'l1_decay': 0,
      'max_diffusion_step': 2,
      'num_nodes': len(all_data.columns),
      'num_rnn_layers': 2,
      'output_dim': 1,
      'rnn_units': 64,
      'seq_len': 12,
      'use_curriculum_learning': True
    },
    'train': {
      'base_lr': 0.01,
      'dropout': 0,
      'epoch': 0,
      'epochs': 100,
      'epsilon': 1.0e-3,
      'global_step': 0,
      'lr_decay_ratio': 0.1,
      'max_grad_norm': 5,
      'max_to_keep': 100,
      'min_learning_rate': 2.0e-06,
      'optimizer': 'adam',
      'patience': 50,
      'steps': [20, 30, 40, 50],
      'test_every_n_epochs': 10
    }
}

with open(os.path.join(base_dir,'config.yml'), 'w') as outfile:
    yaml.dump(config, outfile, default_flow_style=False)

In [None]:
#IF running into ValueError: Variable rnn/basic_rnn_cell/kernel already exists, disallowed. Did you mean to set reuse=True or reuse=tf.AUTO_REUSE in VarScope? Originally defined at
#Try restarting kernel
os.chdir(os.path.join(base_dir,'..'))
#%run run_demo.py --config_filename movia/config.yml --output_filename movia/out/out.npz
import tensorflow as tf

from lib.utils import load_graph_data
from model.dcrnn_supervisor import DCRNNSupervisor
use_cpu_only = False
graph_pkl_filename = 'movia/adj_mx.pkl'
tf_config = tf.ConfigProto()

if use_cpu_only:
    tf_config = tf.ConfigProto(device_count={'GPU': 0})
tf_config.gpu_options.allow_growth = True
_, _, adj_mx = load_graph_data(graph_pkl_filename)
with tf.Session(config=tf_config) as sess:
    supervisor = DCRNNSupervisor(adj_mx=adj_mx, **config)
    supervisor.train(sess)
    #supervisor.load(sess, config['train']['model_filename'])
    outputs = supervisor.evaluate(sess)
    np.savez_compressed(args.output_filename, **outputs)
    print('Predictions saved as {}.'.format(args.output_filename))

2018-11-08 21:44:50,850 - INFO - Log directory: data/model/dcrnn_DR_2_h_12_64-64_lr_0.01_bs_64_1108214450/
2018-11-08 21:44:50,855 - INFO - {'base_dir': 'data/model', 'log_level': 'INFO', 'data': {'batch_size': 64, 'dataset_dir': 'movia/data', 'test_batch_size': 64, 'val_batch_size': 64, 'graph_pkl_filename': 'movia/adj_mx.pkl'}, 'model': {'cl_decay_steps': 2000, 'filter_type': 'dual_random_walk', 'horizon': 12, 'input_dim': 2, 'l1_decay': 0, 'max_diffusion_step': 2, 'num_nodes': 194, 'num_rnn_layers': 2, 'output_dim': 1, 'rnn_units': 64, 'seq_len': 12, 'use_curriculum_learning': True}, 'train': {'base_lr': 0.01, 'dropout': 0, 'epoch': 0, 'epochs': 100, 'epsilon': 0.001, 'global_step': 0, 'lr_decay_ratio': 0.1, 'max_grad_norm': 5, 'max_to_keep': 100, 'min_learning_rate': 2e-06, 'optimizer': 'adam', 'patience': 50, 'steps': [20, 30, 40, 50], 'test_every_n_epochs': 10}}
2018-11-08 21:44:50,953 - INFO - ('x_train', (119, 12, 194, 2))
2018-11-08 21:44:50,955 - INFO - ('y_train', (119, 12, 