In [1]:
import numpy as np
import pandas as pd

In [2]:
def masked_rmse(y_pred, y_true):
    with np.errstate(divide="ignore", invalid="ignore"):
        mask = np.not_equal(y_true, 0)
        mask = mask.astype(np.float32)
        mask /= np.mean(mask)
        rmse = np.square(np.abs(y_pred - y_true))
        rmse = np.nan_to_num(rmse * mask)
        rmse = np.sqrt(np.mean(rmse))
        return rmse

def masked_mae(y_pred, y_true):
    with np.errstate(divide="ignore", invalid="ignore"):
        mask = np.not_equal(y_true, 0)
        mask = mask.astype(np.float32)
        mask /= np.mean(mask)
        mae = np.abs(y_pred - y_true)
        mae = np.nan_to_num(mae * mask)
        mae = np.mean(mae)
        return mae

def masked_mape(y_pred, y_true, null_val=0):
    with np.errstate(divide="ignore", invalid="ignore"):
        if np.isnan(null_val):
            mask = ~np.isnan(y_true)
        else:
            mask = np.not_equal(y_true, null_val)
        mask = mask.astype("float32")
        mask /= np.mean(mask)
        mape = np.abs(np.divide((y_pred - y_true).astype("float32"), y_true))
        mape = np.nan_to_num(mask * mape)
        return np.mean(mape)

In [3]:
def generate_graph_seq2seq_io_data(data, x_offsets, y_offsets):
    """
    Generate samples from
    :param data:
    :param x_offsets:
    :param y_offsets:
    :return:
    # x: (epoch_size, input_length, num_nodes, input_dim)
    # y: (epoch_size, output_length, num_nodes, output_dim)
    """
    # epoch_len = num_samples + min(x_offsets) - max(y_offsets)
    num_samples = data.shape[0]
    x, y = [], []
    # t is the index of the last observation.
    min_t = abs(min(x_offsets))
    max_t = abs(num_samples - abs(max(y_offsets)))  # Exclusive
    for t in range(min_t, max_t):
        x_t = data[t + x_offsets, ...]
        y_t = data[t + y_offsets, ...]
        x.append(x_t)
        y.append(y_t)
    x = np.stack(x, axis=0)
    y = np.stack(y, axis=0)
    return x, y

In [4]:
def generate_train_val_test(data):
    # 0 is the latest observed sample.
    x_offsets = np.sort(
        # np.concatenate(([-week_size + 1, -day_size + 1], np.arange(-11, 1, 1)))
        np.concatenate((np.arange(-11, 1, 1),))
    )
    # Predict the next one hour
    y_offsets = np.sort(np.arange(1, 13, 1))
    # x: (num_samples, input_length, num_nodes, input_dim)
    # y: (num_samples, output_length, num_nodes, output_dim)
    x, y = generate_graph_seq2seq_io_data(data, x_offsets=x_offsets, y_offsets=y_offsets)

    print("x shape: ", x.shape, ", y shape: ", y.shape)
    # Write the data into npz file.
    # num_test = 6831, using the last 6831 examples as testing.
    # for the rest: 7/8 is used for training, and 1/8 is used for validation.
    num_samples = x.shape[0]
    num_test = round(num_samples * 0.2)
    num_train = round(num_samples * 0.7)
    num_val = num_samples - num_test - num_train

    # train
    x_train, y_train = x[:num_train], y[:num_train]
    # val
    x_val, y_val = (
        x[num_train: num_train + num_val],
        y[num_train: num_train + num_val],
    )
    # test
    x_test, y_test = x[-num_test:], y[-num_test:]

    return x_test, y_test

#     for cat in ["train", "val", "test"]:
#         _x, _y = locals()["x_" + cat], locals()["y_" + cat]
#         print(cat, "x: ", _x.shape, "y:", _y.shape)
#         np.savez_compressed(
#             os.path.join(args.output_dir, "%s.npz" % cat),
#             x=_x,
#             y=_y,
#             x_offsets=x_offsets.reshape(list(x_offsets.shape) + [1]),
#             y_offsets=y_offsets.reshape(list(y_offsets.shape) + [1]),
#         )

In [5]:
df = pd.read_csv('./METRLA/metr-la.csv.gz')
df

Unnamed: 0,timestamp,sensorid,speed,weekdaytime,speed_y
0,2012-03-01 00:00:00,773869,64.375000,0.428784,63.132937
1,2012-03-01 00:00:00,767541,67.625000,0.428784,64.867063
2,2012-03-01 00:00:00,767542,67.125000,0.428784,66.170635
3,2012-03-01 00:00:00,717447,61.500000,0.428784,61.943452
4,2012-03-01 00:00:00,717446,66.875000,0.428784,65.241071
...,...,...,...,...,...
7094299,2012-06-27 23:55:00,717592,66.444444,0.428288,62.377595
7094300,2012-06-27 23:55:00,717595,68.444444,0.428288,66.349054
7094301,2012-06-27 23:55:00,772168,63.555556,0.428288,64.207723
7094302,2012-06-27 23:55:00,718141,68.666667,0.428288,66.785409


In [6]:
data = df[['speed', 'weekdaytime', 'speed_y']].values
data = data.reshape(-1, 207, 3)
data.shape

(34272, 207, 3)

In [7]:
x_test, y_test = generate_train_val_test(data)
print('x_test.shape, y_test.shape', x_test.shape, y_test.shape)

x shape:  (34249, 12, 207, 3) , y shape:  (34249, 12, 207, 3)
x_test.shape, y_test.shape (6850, 12, 207, 3) (6850, 12, 207, 3)


In [8]:
y_pred, y_true = y_test[:,:,:,2:3], y_test[:,:,:,0:1]
print('y_pred.shape, y_true.shape', y_pred.shape, y_true.shape)

y_pred.shape, y_true.shape (6850, 12, 207, 1) (6850, 12, 207, 1)


In [9]:
ys_true, ys_pred = np.transpose(y_true, (1, 0, 2, 3)), np.transpose(y_pred, (1, 0, 2, 3))
mae = masked_mae(ys_pred, ys_true)
mape = masked_mape(ys_pred, ys_true)
rmse = masked_rmse(ys_pred, ys_true)
mae_3 = masked_mae(ys_pred[2:3], ys_true[2:3])
mape_3 = masked_mape(ys_pred[2:3], ys_true[2:3])
rmse_3 = masked_rmse(ys_pred[2:3], ys_true[2:3])
mae_6 = masked_mae(ys_pred[5:6], ys_true[5:6])
mape_6 = masked_mape(ys_pred[5:6], ys_true[5:6])
rmse_6 = masked_rmse(ys_pred[5:6], ys_true[5:6])
mae_12 = masked_mae(ys_pred[11:12], ys_true[11:12])
mape_12 = masked_mape(ys_pred[11:12], ys_true[11:12])
rmse_12 = masked_rmse(ys_pred[11:12], ys_true[11:12])
print('Horizon overall: mae: {:.4f}, mape: {:.4f}, rmse: {:.4f}'.format(mae, mape, rmse))
print('Horizon 15mins: mae: {:.4f}, mape: {:.4f}, rmse: {:.4f}'.format(mae_3, mape_3, rmse_3))
print('Horizon 30mins: mae: {:.4f}, mape: {:.4f}, rmse: {:.4f}'.format(mae_6, mape_6, rmse_6))
print('Horizon 60mins: mae: {:.4f}, mape: {:.4f}, rmse: {:.4f}'.format(mae_12, mape_12, rmse_12))

Horizon overall: mae: 6.8307, mape: 0.1640, rmse: 9.5298
Horizon 15mins: mae: 6.8308, mape: 0.1640, rmse: 9.5299
Horizon 30mins: mae: 6.8307, mape: 0.1640, rmse: 9.5298
Horizon 60mins: mae: 6.8306, mape: 0.1640, rmse: 9.5297
