## Imports

Make sure your computer has the right versions installed, this BRNN implementation will ONLY work in TF2.0, you might need to get Gast as well if you get an error related to that.

In [0]:
import tensorflow as tf
if tf.__version__ != '2.0.0-rc1':
  !pip install tensorflow-gpu==2.0.0-rc1
  !pip install tensorflow_probability==0.8.0-rc0
  !pip install numpy==1.17.2

Cloning into 'BayesianNN-DQR'...
remote: Enumerating objects: 101, done.[K
remote: Counting objects:   0% (1/101)[Kremote: Counting objects:   1% (2/101)[Kremote: Counting objects:   2% (3/101)[Kremote: Counting objects:   3% (4/101)[Kremote: Counting objects:   4% (5/101)[Kremote: Counting objects:   5% (6/101)[Kremote: Counting objects:   6% (7/101)[Kremote: Counting objects:   7% (8/101)[Kremote: Counting objects:   8% (9/101)[Kremote: Counting objects:   9% (10/101)[Kremote: Counting objects:  10% (11/101)[Kremote: Counting objects:  11% (12/101)[Kremote: Counting objects:  12% (13/101)[Kremote: Counting objects:  13% (14/101)[Kremote: Counting objects:  14% (15/101)[Kremote: Counting objects:  15% (16/101)[Kremote: Counting objects:  16% (17/101)[Kremote: Counting objects:  17% (18/101)[Kremote: Counting objects:  18% (19/101)[Kremote: Counting objects:  19% (20/101)[Kremote: Counting objects:  20% (21/101)[Kremote: Counting objects:  21%

In [0]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow_probability as tfp
import math
from sklearn.metrics import mean_squared_error as eval_mse
from sklearn.metrics import median_absolute_error as eval_mae
from datetime import datetime
tfd = tfp.distributions
tfb = tfp.bijectors
import pandas as pd
pd.options.mode.chained_assignment = None

print("Tensorflow Probability Version .{}".format(tfp.__version__))
print("Tensorflow Version .{}".format(tf.__version__))
print("Numpy Version .{}".format(np.__version__))

Tensorflow Probability Version .0.8.0-rc0
Tensorflow Version .2.0.0-rc1
Numpy Version .1.17.2
Found GPU at: /device:GPU:0


If the following modules can't be loaded just take the functions from Github and paste them into this Notebook :-)

In [None]:
from BNN.prior import MixturePrior
from BNN.VariationalPosterior import VariationalPosterior
from BNN.cells.BayesianBasicCell import MinimalRNNCell
from BNN.cells.BayesianUntiedLSTM import BayesianLSTMCell_Untied
from BNN.cells.BayesianTiedLSTM import BayesianLSTMCellTied
from load import sort_and_order, skip_row, write_3d, skip_row, transform, fit_scale, roll, sort_links, tod_interval, split_df_with_val, tilted_loss_np_t

## Model

In [0]:
class BayesianRNN(tf.keras.Model):
    def __init__(self, num_units, num_links, batch_size, init, cell_type, prior, **kwargs):
        super(BayesianRNN, self).__init__(**kwargs)
        self.cell_type = cell_type
        self.init = init
        self.num_units_lst = num_units
        self.num_links = num_links
        self.batch_size = batch_size
        self.cell_prior = prior
        self.prior = prior
        self.build()
    
    def build(self):
        print("Building net...")
        self.cell_lst = []
        state_size = self.num_links
        for i, num_units in enumerate(self.num_units_lst):
          if self.cell_type == 'Basic':
              self.cell_lst.append(MinimalRNNCell(num_units, training=True, init=self.init, prior=self.cell_prior))
          elif self.cell_type == 'TiedLSTM':
              self.cell_lst.append(BayesianLSTMCellTied(num_units, training=True, init=self.init, prior=self.cell_prior))
          else:
              self.cell_lst.append(BayesianLSTMCell_Untied(num_units, training=True, init=self.init, prior=self.cell_prior))
          self.cell_lst[-1].initialise_cell(state_size)
          state_size = num_units
            
        self.weight_mu = self.add_weight(shape=(self.num_units_lst[-1],self.num_links),
                                 initializer=self.init,
                                 name='weight_mu')
        self.weight_rho = self.add_weight(shape=(self.num_units_lst[-1],self.num_links),
                                 initializer=self.init,
                                 name='weight_mu')
        self.bias_mu = self.add_weight(shape=(self.num_links,),
                                     initializer=self.init,
                                     name='bias_mu', trainable=True)
        self.bias_rho = self.add_weight(shape=(self.num_links,),
                                     initializer=self.init,
                                     name='bias_mu', trainable=True)
        self.weight_dist = VariationalPosterior(self.weight_mu, self.weight_rho) 
        self.bias_dist = VariationalPosterior(self.bias_mu, self.bias_rho)     
        print("  Output layer has been built (in:", self.num_units_lst[-1], ") (out:", 1, ")")

        ## The diagonal of the correlation matrix
        self.scale_prior = tfd.LKJ(dimension=self.num_links, concentration=10, input_output_cholesky=True)
        self.y_rho = self.add_weight(shape=(self.num_links*((self.num_links-1)/2 + 1),), 
                                     initializer='zeros',
                                     name='y_rho',
                                     trainable=True)
        self.built = True
    @property
    def y_std(self):
        cor = tfb.ScaleTriL(diag_bijector=tfb.Softplus(),
                            diag_shift=None)
        return cor.forward(self.y_rho)

    def call(self, batch_x, training, sampling):
        self.weight = self.weight_dist.sample(training, sampling)
        self.bias = self.bias_dist.sample(training, sampling)
        if training:
            self.log_prior_dense = sum_all(self.prior.log_prob(self.weight)) + sum_all(self.prior.log_prob(self.bias))
            self.log_variational_posterior_dense  = self.weight_dist.log_prob(self.weight) 
            self.log_variational_posterior_dense += self.bias_dist.log_prob(self.bias)
        for cell in self.cell_lst:
          cell.is_training = training
          cell.sampling = sampling

        inputs = tf.convert_to_tensor(batch_x)
        rnn = tf.keras.layers.RNN(self.cell_lst)
        ## RNN layer
        final_rnn_output = rnn(inputs)
        ## Dense layer
        self.outputs = tf.linalg.matmul(final_rnn_output, self.weight) + self.bias   
        return self.outputs
    
    def log_prior(self):
        return sum(sum_all(cell.log_prior) for cell in self.cell_lst) + sum_all(self.log_prior_dense) + sum_all(self.scale_prior.log_prob(self.y_std))
    
    def log_variational_posterior(self):
        return sum(sum_all(cell.log_variational_posterior) for cell in self.cell_lst) + sum_all(self.log_variational_posterior_dense)
    
    def elbo(self, batch_x, batch_y, batch_ind, num_batches,  training, sampling=True):
        output = self(batch_x, training, sampling)
        assert(batch_y.shape[1] == self.num_links)
        assert(output.shape == batch_y.shape)
        pred_dist = tfd.MultivariateNormalTriL(output, scale_tril=self.y_std)
        self.nll = -tf.math.reduce_sum(pred_dist.log_prob(batch_y))
        kl_weight = 2**(num_batches - batch_ind) / (2**num_batches - 1)
        return (self.log_variational_posterior() - self.log_prior())/num_batches + self.nll, sum_all((output - batch_y)**2) / self.batch_size

## Testing Across Test Partitions

In [0]:
num_units_basic_l10 = 6
num_units_basic_l20 = 10
num_units_tiedLSTM_l10 = 54
num_units_tiedLSTM_l20 = 66
num_units_untiedLSTM_l10 = 20
num_units_untiedLSTM_l20 = 30

param_lst = [num_units_basic_l10,
            num_units_basic_l20,
            num_units_tiedLSTM_l10,
            num_units_tiedLSTM_l20,
            num_units_untiedLSTM_l10,
            num_units_untiedLSTM_l20,]
mtypes = ['Basic', 'Basic', 'TiedLSTM', 'TiedLSTM', 'UntiedLSTM', 'UntiedLSTM'] 
lag_lst = [10,20,10,20,10,20] 
prior = MixturePrior(0.10, 1, np.exp(-6))

quantiles = np.array([0.005, 0.995, 0.025, 0.975, 0.05, 0.95, 0.10, 0.90, 0.20, 0.80, 0.30, 0.70, 0.40, 0.60])
pred_ints = np.array([0.99,         0.95,         0.90,       0.80,       0.60,       0.40,       0.20])

start = datetime.strptime('19/01/21', "%y/%m/%d")
end   = datetime.strptime('19/04/14', "%y/%m/%d")
period = (end - start).days
period_train_days = 7*4  ## Train on 4 weeks
period_val_days = 7      ## Validate training on 1 week
period_test_days =  7    ## Test on  1 week
advance_days = 7         ## Advance by 1 week
num_partitions = int((period-period_train_days-period_test_days)/advance_days)

init = 'uniform'
num_links = 16
preds = 1
batch_size = 80
epochs = 100
patience = 8
init_lr = 1e-2

In [0]:
mse = np.empty((num_partitions, len(mtypes)))
mae = np.empty((num_partitions, len(mtypes)))
mape = np.empty((num_partitions, len(mtypes)))
mse2 = np.empty((num_partitions, len(mtypes)))
mae2 = np.empty((num_partitions, len(mtypes)))
mape2 = np.empty((num_partitions, len(mtypes)))
icp = np.empty((int(len(quantiles)/2), num_partitions, len(mtypes)))
mil = np.empty((int(len(quantiles)/2), num_partitions, len(mtypes)))
tradeoff = np.empty((int(len(quantiles)/2), num_partitions, len(mtypes)))
tilt_loss = np.empty((len(quantiles), num_partitions, len(mtypes)))
time = np.empty((num_partitions, len(mtypes)))

icp_route = np.empty((int(len(quantiles)/2), num_partitions, len(mtypes)))
mil_route = np.empty((int(len(quantiles)/2), num_partitions, len(mtypes)))
tradeoff_route = np.empty((int(len(quantiles)/2), num_partitions, len(mtypes)))
mse_route = np.empty((num_partitions, len(mtypes)))
mae_route = np.empty((num_partitions, len(mtypes)))
mape_route = np.empty((num_partitions, len(mtypes)))
mse_route2 = np.empty((num_partitions, len(mtypes)))
mae_route2 = np.empty((num_partitions, len(mtypes)))
mape_route2 = np.empty((num_partitions, len(mtypes)))

for m, (mtype, units, lags) in enumerate(zip(mtypes, param_lst, lag_lst)):
  print("Bayesian net with {} cell and {} lags.".format(mtype, lags), end='')
  net = BayesianRNN([units], num_links, batch_size, init, mtype, prior)
  for part in range(num_partitions):
    print("   Partition {}/{}".format(part+1, num_partitions), end=' ')
    train_from = part*advance_days
    train_to = period_train_days + part*advance_days
    val_to = period_train_days + part*advance_days + period_val_days
    test_to = period_train_days + period_val_days + part*advance_days + period_test_days

    train_ind = np.arange(train_from, train_to)
    val_ind = np.arange(train_to, val_to)
    test_ind = np.arange(val_to, test_to)
    print("Training on weeks {}".format(np.arange(int((train_ind[6]+1)/7),
                                                  int((train_ind[-1]+1)/7)+1)), end=' ')
    print("validating training on week {}".format(int((val_ind[-1]+1)/7)), end=' ')
    keep_train = range(int(2297920*train_ind[ 0]/period), int(2297920*train_ind[-1]/period))
    keep_val = range(int(2297920*train_ind[-1]/period)+1, int(2297920*val_ind[-1]/period))
    keep_test = range(int(2297920*val_ind[-1]/period)+1, int(2297920*test_ind[-1]/period))

    ## Load the part of the dataset we need for training, validation, testing
    data_train = pd.read_csv('data/link_travel_time_local.csv.gz', compression='gzip', 
                              parse_dates = True, index_col = 0,
                              skiprows = lambda x: skip_row(x, keep_train))
    data_val = pd.read_csv('data/link_travel_time_local.csv.gz', compression='gzip', 
                              parse_dates = True, index_col = 0,
								              skiprows = lambda x: skip_row(x, keep_val))
    data_test  = pd.read_csv('data/link_travel_time_local.csv.gz', compression='gzip',
                              parse_dates = True, index_col = 0,
                              skiprows = lambda x: skip_row(x, keep_test))
    ## Sort data by links and add categorical columns TOD, Weekday
    data_train, order = sort_and_order(data_train)
    data_val, order = sort_and_order(data_val)
    data_test, order = sort_and_order(data_test)
    
    ## Transform datasets using the mean and std for train and val set.
    means_df_train, scales_df_train = fit_scale(pd.concat([data_train,data_val]), order)
    ts_train_df, mean_train_df, scale_train_df = transform(data_train, 
                                                            means_df_train, 
                                                            scales_df_train, 
                                                            order,
                                                            freq = '15min')
    ts_val_df, mean_val_df, scale_val_df = transform(data_val, 
                                                      means_df_train, 
                                                      scales_df_train, 
                                                      order,
                                                      freq = '15min')
    ts_test_df, mean_test_df, scale_test_df = transform(data_test, 
                                                        means_df_train, 
                                                        scales_df_train, 
                                                        order,
                                                        freq = '15min')
    ## Roll data into timeseries format
    X_train, y_train, y_ix_train, y_mean_train, y_std_train = roll(ts_train_df.index, 
                                                                    ts_train_df.values,
                                                                    mean_train_df.values,
                                                                    scale_train_df.values,
                                                                    lags, 
                                                                    preds)
    X_val, y_val, y_ix_val, y_mean_val, y_std_val = roll(ts_val_df.index, 
                                                        ts_val_df.values,
                                                        mean_val_df.values,
                                                        scale_val_df.values,
                                                        lags, 
                                                        preds)
    X_test, y_test, y_ix_test, y_mean_test, y_std_test = roll(ts_test_df.index, 
                                                              ts_test_df.values, 
                                                              mean_test_df.values,
                                                              scale_test_df.values,
                                                              lags, 
                                                              preds)
    num_batch_train = int(X_train.shape[0]/batch_size)
    num_batch_test = int(X_test.shape[0]/batch_size)

    data_train = tf.data.Dataset.from_tensor_slices((X_train, 
                                                    y_train)).shuffle(1000).batch(batch_size, drop_remainder=True)
    data_test = tf.data.Dataset.from_tensor_slices((X_test, 
                                                    y_test)).batch(batch_size, drop_remainder=True)
    drop_train = len(y_train) - num_batch_train*batch_size
    drop_test = len(y_test) - num_batch_test*batch_size
    X_train, y_train, y_ix_train, y_mean_train, y_std_train = drop_remainder(X_train, y_train, y_ix_train, y_mean_train, y_std_train, drop_train)
    X_test, y_test, y_ix_test, y_mean_test, y_std_test = drop_remainder(X_test, y_test, y_ix_test, y_mean_test, y_std_test, drop_test)
        
    t1 = datetime.now()
    ## Initialise weights using last partition
    if part > 0:
      init_lr = 1e-2
      net.set_weights(best_weights)

    best_elbo = 10000000000000000000000
    best_mse = 10000000000000000000000
    lr = init_lr
    for epoch in range(epochs):
      ## Training
      elbo_sum, mse_sum = train_step(net, data_train, lr)
      mse_avg_train = mse_sum.numpy() / num_batch_train
      elbo_avg = elbo_sum.numpy() / num_batch_train
      ## Validation
      elbo_test, mse_sum = val_loss(net, data_test)
      mse_avg_test = mse_sum.numpy() / num_batch_test
      elbo_avg_test = elbo_test.numpy() / num_batch_test
      
      if elbo_avg_test < best_elbo:
          best_weights = net.get_weights()
          best_elbo = elbo_avg_test
          patience_counter = 0
      elif mse_avg_test < best_mse:
          best_weights = net.get_weights()
          best_mse = mse_avg_test
          patience_counter = 0
      patience_counter += 1
      
      if patience_counter > patience:
          print("Early stopping after {} epochs".format(epoch))
          break
      if patience_counter >= int(patience/2):
          lr = init_lr*np.exp(-0.1*epoch)
    ## Set weights back to the best model
    net.set_weights(best_weights)

    t2 = datetime.now()
    time[part, m] = (t2-t1).seconds

    print("      Testing on week {}".format(int((test_ind[-1]+1)/7)))
    nsamples = 50
    y_pred = np.empty((nsamples, y_test.shape[0], y_test.shape[2]))
    y_pred_mean_param = np.empty((y_test.shape[0], y_test.shape[2]))
    T = 0
    for x_batch, _ in data_test:
        x = tf.cast(x_batch,tf.float32)
        y_pred_mean_param[T*batch_size:(T+1)*batch_size] = net(x, training=False, sampling=False)
        for samp in range(nsamples):
          out = net(x, training=False, sampling=True)
          y_pred[samp, T*batch_size:(T+1)*batch_size] = tfd.MultivariateNormalTriL(out, scale_tril=net.y_std).sample()
        T = T+1
    y_pred_all = np.sum(y_pred*y_std_test[:,0] + y_mean_test[:,0], axis=2)
    y_pred_mean_param = y_pred_mean_param*y_std_test[:,0,:] + y_mean_test[:,0,:]
    for i in range(int(len(quantiles)/2)):
      icp_lnks = np.zeros(num_links)
      mil_lnks = np.zeros(num_links) 
      tradeoff_lnks = np.zeros(num_links)  
      for lnk in range(num_links):
        q1 = np.quantile(y_pred[:,:,lnk], quantiles[2*i],   axis=0)
        q2 = np.quantile(y_pred[:,:,lnk], quantiles[2*i+1], axis=0)
        q1_back = q1*y_std_test[:,0,lnk] + y_mean_test[:,0,lnk]
        q2_back = q2*y_std_test[:,0,lnk] + y_mean_test[:,0,lnk]
        icp_lnks[lnk] = 1-(np.sum(y_test[:,0,lnk] < q1)+np.sum(y_test[:,0,lnk] > q2))/len(y_test)
        mil_lnks[lnk] = np.sum(np.maximum(0, q2_back - q1_back)) / len(y_test)
        tradeoff_lnks[lnk] = np.abs(icp_lnks[lnk] - pred_ints[i])*mil_lnks[lnk]
      icp[i, part, m] = np.mean(icp_lnks)
      mil[i, part, m] = np.mean(mil_lnks)
      tradeoff[i, part, m] = np.mean(tradeoff)

    mse_lnks = np.zeros(num_links)
    mae_lnks = np.zeros(num_links) 
    mape_lnks = np.zeros(num_links)
    mse_lnks2 = np.zeros(num_links)
    mae_lnks2 = np.zeros(num_links) 
    mape_lnks2 = np.zeros(num_links)
    for lnk in range(num_links):
      y_mean_lnk = np.mean(y_pred[:,:,lnk], axis=0)*y_std_test[:,0,lnk] + y_mean_test[:,0,lnk]
      y_true_lnk = y_test[:,0,lnk]*y_std_test[:,0,lnk] + y_mean_test[:,0,lnk]
      mse_lnks[lnk] = eval_mse(y_mean_lnk/60, y_true_lnk/60)
      mae_lnks[lnk] = eval_mae(y_mean_lnk/60, y_true_lnk/60)
      mape_lnks[lnk] = np.mean(np.abs((y_mean_lnk/60 - y_true_lnk/60)/(y_true_lnk/60)))
      mse_lnks2[lnk] = eval_mse(y_pred_mean_param[:,lnk]/60, y_true_lnk/60)
      mae_lnks2[lnk] = eval_mae(y_pred_mean_param[:,lnk]/60, y_true_lnk/60)
      mape_lnks2[lnk] = np.mean(np.abs((y_pred_mean_param[:,lnk]/60 - y_true_lnk/60)/(y_true_lnk/60)))
    mse[part, m] = np.mean(mse_lnks)
    mae[part, m] = np.mean(mae_lnks)
    mape[part, m] = np.mean(mape_lnks)
    mse2[part, m] = np.mean(mse_lnks2)
    mae2[part, m] = np.mean(mae_lnks2)
    mape2[part, m] = np.mean(mape_lnks2)
    for q, quan in enumerate(quantiles):
      quan_pred = np.quantile(y_pred, quan, axis=0)
      tilt_loss[q, part, m] = tilted_loss_np_t(quan, np.squeeze(y_test), quan_pred)
    
    y_test_all = np.sum(y_test[:,0]*y_std_test[:,0] + y_mean_test[:,0], axis=1)
    for i in range(int(len(quantiles)/2)):
      q1_all = np.quantile(y_pred_all, quantiles[2*i],   axis=0)
      q2_all = np.quantile(y_pred_all, quantiles[2*i+1], axis=0)
      icp_route[i, part, m] = 1-(np.sum(y_test_all < q1_all)+np.sum(y_test_all > q2_all))/len(y_test)
      mil_route[i, part, m] = np.sum(np.maximum(0, q2_all - q1_all)) / len(y_test)
      tradeoff_route[i, part, m] = np.abs(icp_route[i, part, m] - pred_ints[i])*mil_route[i, part, m]

    y_mean_all = np.mean(y_pred_all, axis=0)
    mse_route[part, m] = eval_mse(y_mean_all/60, y_test_all/60)
    mape_route[part, m] = np.mean(np.abs((y_mean_all/60 - y_test_all/60)/(y_test_all/60)))
    mae_route[part, m] = eval_mae(y_mean_all/60, y_test_all/60)
    mse_route2[part, m] = eval_mse(np.sum(y_pred_mean_param,axis=1)/60, y_test_all/60)
    mape_route2[part, m] = np.mean(np.abs((np.sum(y_pred_mean_param,axis=1)/60 - y_test_all/60)/(y_test_all/60)))
    mae_route2[part, m] = eval_mae(np.sum(y_pred_mean_param,axis=1)/60, y_test_all/60)

    ## Print results
    print("    Quantile errors (avg link)  ICP (0.99) %.3f, MIL (0.99) %.3f" % (icp[0,part,m], mil[0,part,m]), end = ' ')
    print("ICP (0.95) %.3f, MIL (0.95) %.3f" % (icp[1,part,m], mil[1,part,m]), end = ' ')
    print("ICP (0.90) %.3f, MIL (0.90) %.3f" % (icp[2,part,m], mil[2,part,m]), end = ' ')
    print("ICP (0.80) %.3f, MIL (0.80) %.3f" % (icp[3,part,m], mil[3,part,m]), end = ' ')
    print("ICP (0.60) %.3f, MIL (0.60) %.3f" % (icp[4,part,m], mil[4,part,m]), end = ' ')
    print("ICP (0.40) %.3f, MIL (0.40) %.3f" % (icp[5,part,m], mil[5,part,m]), end = ' ')
    print("ICP (0.20) %.3f, MIL (0.20) %.3f" % (icp[6,part,m], mil[6,part,m]))

    print("    Quantile errors (route)    ICP (0.99) %.3f, MIL (0.99) %.3f" % (icp_route[0,part,m], mil_route[0,part,m]), end = ' ')
    print("ICP (0.95) %.3f, MIL (0.95) %.3f" % (icp_route[1,part,m], mil_route[1,part,m]), end = ' ')
    print("ICP (0.90) %.3f, MIL (0.90) %.3f" % (icp_route[2,part,m], mil_route[2,part,m]), end = ' ')
    print("ICP (0.80) %.3f, MIL (0.80) %.3f" % (icp_route[3,part,m], mil_route[3,part,m]), end = ' ')
    print("ICP (0.60) %.3f, MIL (0.60) %.3f" % (icp_route[4,part,m], mil_route[4,part,m]), end = ' ')
    print("ICP (0.40) %.3f, MIL (0.40) %.3f" % (icp_route[5,part,m], mil_route[5,part,m]), end = ' ')
    print("ICP (0.20) %.3f, MIL (0.20) %.3f" % (icp_route[6,part,m], mil_route[6,part,m]))

    print(" Tilted loss for quantile:", end='')
    for q, quan in enumerate(quantiles):
      print("({}): ".format(quan), end='')
      print("%.2f " % tilt_loss[q,part,m], end = '')
    print("")
    print("    Mean errors by sampling (avg link) MSE %.3f MAE %.3f MAPE %.3f" % (mse[part,m], mae[part,m], mape[part,m]))
    print("    Mean errors by sampling (route)    MSE %.3f MAE %.3f MAPE %.3f" % (mse_route[part,m], mae_route[part,m], mape_route[part,m]))
    print("    Mean errors by mean parameters (avg link) MSE %.3f MAE %.3f MAPE %.3f" % (mse2[part,m], mae2[part,m], mape2[part,m]))
    print("    Mean errors by mean parameters (route)    MSE %.3f MAE %.3f MAPE %.3f" % (mse_route2[part,m], mape_route2[part,m], mape_route2[part,m]))
  print("Times for model (seconds)")
  print(time[:,m])
  print("")

Bayesian net with Basic cell and 10 lags.Building net...
  Basic cell has been built (in: 16 ) (out: 6 )
  Output layer has been built (in: 6 ) (out: 1 )
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Early stopping after 40 epochs
      Testing on week 6
    Quantile errors (avg link)  ICP (0.99) 0.964, MIL (0.99) 76.845 ICP (0.95) 0.942, MIL (0.95) 64.356 ICP (0.90) 0.917, MIL (0.90) 55.466 ICP (0.80) 0.853, MIL (0.80) 43.916 ICP (0.60) 0.693, MIL (0.60) 29.155 ICP (0.40) 0.491, MIL (0.40) 18.262 ICP (0.20) 0.258, MIL (0.20) 8.855
    Quantile errors (route)    ICP (0.99) 0.984, MIL (0.99) 433.174 ICP (0.95) 0.976, MIL (0.95) 362.900 ICP (0.90) 0.952, MIL (0.90) 311.847 ICP (0.80) 0.901, MIL (0.80) 246.204 ICP (0.60) 0.743, MIL (0.60) 163.575 ICP (0.40) 0.547, MIL (0.40) 102.571 ICP (0.20) 0.274, MIL (0.20) 49.197
 Tilted loss for quantile:(0.005): 0.01 (0.995): 0.06 (0.025): 0.06 (0.975): 0.11 (0.05): 0.11 (0.95): 0.17 (0.1): 0.17 (0.9)