In [1]:
import pandas as pd
import numpy as np
import torch

In [2]:
def masked_rmse(y_pred, y_true):
    with np.errstate(divide="ignore", invalid="ignore"):
        mask = np.not_equal(y_true, 0)
        mask = mask.astype(np.float32)
        mask /= np.mean(mask)
        rmse = np.square(np.abs(y_pred - y_true))
        rmse = np.nan_to_num(rmse * mask)
        rmse = np.sqrt(np.mean(rmse))
        return rmse

def masked_mae(y_pred, y_true):
    with np.errstate(divide="ignore", invalid="ignore"):
        mask = np.not_equal(y_true, 0)
        mask = mask.astype(np.float32)
        mask /= np.mean(mask)
        mae = np.abs(y_pred - y_true)
        mae = np.nan_to_num(mae * mask)
        mae = np.mean(mae)
        return mae

def masked_mape(y_pred, y_true, null_val=0):
    with np.errstate(divide="ignore", invalid="ignore"):
        if np.isnan(null_val):
            mask = ~np.isnan(y_true)
        else:
            mask = np.not_equal(y_true, null_val)
        mask = mask.astype("float32")
        mask /= np.mean(mask)
        mape = np.abs(np.divide((y_pred - y_true).astype("float32"), y_true))
        mape = np.nan_to_num(mask * mape)
        return np.mean(mape)

In [3]:
def masked_mae_loss(y_pred, y_true):
    mask = (y_true != 0).float()
    mask /= mask.mean()
    loss = torch.abs(y_pred - y_true)
    loss = loss * mask
    # trick for nans: https://discuss.pytorch.org/t/how-to-set-nan-in-tensor-to-0/3918/3
    loss[loss != loss] = 0
    return loss.mean()

def masked_mape_loss(y_pred, y_true):
    mask = (y_true != 0).float()
    mask /= mask.mean()
    loss = torch.abs(torch.div(y_true - y_pred, y_true))
    loss = loss * mask
    # trick for nans: https://discuss.pytorch.org/t/how-to-set-nan-in-tensor-to-0/3918/3
    loss[loss != loss] = 0
    return loss.mean()

def masked_rmse_loss(y_pred, y_true):
    mask = (y_true != 0).float()
    mask /= mask.mean()
    loss = torch.pow(y_true - y_pred, 2)
    loss = loss * mask
    # trick for nans: https://discuss.pytorch.org/t/how-to-set-nan-in-tensor-to-0/3918/3
    loss[loss != loss] = 0
    return torch.sqrt(loss.mean())

In [4]:
df_plus = pd.read_csv('../METRLA/metr-la.csv.gz')
df_plus

Unnamed: 0,timestamp,sensorid,speed,weekdaytime,speed_y
0,2012-03-01 00:00:00,773869,64.375000,0.428784,63.132937
1,2012-03-01 00:00:00,767541,67.625000,0.428784,64.867063
2,2012-03-01 00:00:00,767542,67.125000,0.428784,66.170635
3,2012-03-01 00:00:00,717447,61.500000,0.428784,61.943452
4,2012-03-01 00:00:00,717446,66.875000,0.428784,65.241071
...,...,...,...,...,...
7094299,2012-06-27 23:55:00,717592,66.444444,0.428288,62.377595
7094300,2012-06-27 23:55:00,717595,68.444444,0.428288,66.349054
7094301,2012-06-27 23:55:00,772168,63.555556,0.428288,64.207723
7094302,2012-06-27 23:55:00,718141,68.666667,0.428288,66.785409


In [5]:
def correct_method(data):
    y_pred, y_true = data['speed_y'].values.reshape(-1, 207), data['speed'].values.reshape(-1, 207)
    y_pred, y_true = y_pred[-6854:,], y_true[-6854:,]
    print(y_pred.shape, y_true.shape)

    mae = masked_mae(y_pred, y_true)
    mape = masked_mape(y_pred, y_true)
    rmse = masked_rmse(y_pred, y_true)
    print('Historical Average ---- Our method --- Numpy Loss')
    print('Horizon overall: mae: {:.4f}, mape: {:.4f}, rmse: {:.4f}'.format(mae, mape, rmse))

    test_preds, test_labels = torch.from_numpy(y_pred), torch.from_numpy(y_true)
    rmse = masked_rmse_loss(test_preds, test_labels)
    mape = masked_mape_loss(test_preds, test_labels)
    mae = masked_mae_loss(test_preds, test_labels)
    print('Historical Average ---- Our method --- Torch Loss')
    print('\t'.join(['Model', 'Horizon', 'MAE', 'RMSE', 'MAPE']))
    for horizon in [1, 3, 6, 12]:
        line = 'HA\t%d\t%.2f\t%.2f\t%.2f' % (horizon, mae, rmse, mape * 100)
        print(line)
        
correct_method(df_plus)

(6854, 207) (6854, 207)
Historical Average ---- Our method --- Numpy Loss
Horizon overall: mae: 4.1456, mape: 0.1290, rmse: 7.7728
Historical Average ---- Our method --- Torch Loss
Model	Horizon	MAE	RMSE	MAPE
HA	1	4.15	7.77	12.90
HA	3	4.15	7.77	12.90
HA	6	4.15	7.77	12.90
HA	12	4.15	7.77	12.90


In [6]:
def get_history_average(data):
    num_nodes = 207
    num_samples = data.shape[0] // num_nodes
    
    preds, labels = [], []
    data_speed = data['speed'].values.reshape(num_nodes, -1, 1).transpose(1, 0, 2)  # wrong !!!
    data_speedy = data['speed_y'].values.reshape(num_nodes, -1, 1).transpose(1, 0, 2) # wrong !!!
    print(data_speed.shape, data_speedy.shape)
    
    num_test = round(num_samples * 0.2)  # 6854
    print('num_test', num_test)
    preds, labels = data_speedy[-num_test:], data_speed[-num_test:]
    test_preds, test_labels = torch.from_numpy(preds), torch.from_numpy(labels)
    rmse = masked_rmse_loss(test_preds, test_labels)
    mape = masked_mape_loss(test_preds, test_labels)
    mae = masked_mae_loss(test_preds, test_labels)
    print('Historical Average ---- Our method')
    print('\t'.join(['Model', 'Horizon', 'MAE', 'RMSE', 'MAPE']))
    for horizon in [1, 3, 6, 12]:
        line = 'HA\t%d\t%.2f\t%.2f\t%.2f' % (horizon, mae, rmse, mape * 100)
        print(line)
        
get_history_average(df_plus)

(34272, 207, 1) (34272, 207, 1)
num_test 6854
Historical Average ---- Our method
Model	Horizon	MAE	RMSE	MAPE
HA	1	3.83	7.13	11.40
HA	3	3.83	7.13	11.40
HA	6	3.83	7.13	11.40
HA	12	3.83	7.13	11.40


In [7]:
def get_history_average(data):
    num_nodes = 207
    num_samples = data.shape[0] // num_nodes
    
    preds, labels = [], []
    data_speed = data['speed'].values.reshape(-1, num_nodes, 1)  # correct!!!!
    data_speedy = data['speed_y'].values.reshape(-1, num_nodes, 1) # correct!!!!
    print(data_speed.shape, data_speedy.shape)
    
    num_test = round(num_samples * 0.2)  # 6854
    print('num_test', num_test)
    preds, labels = data_speedy[-num_test:], data_speed[-num_test:]
    test_preds, test_labels = torch.from_numpy(preds), torch.from_numpy(labels)
    rmse = masked_rmse_loss(test_preds, test_labels)
    mape = masked_mape_loss(test_preds, test_labels)
    mae = masked_mae_loss(test_preds, test_labels)
    print('Historical Average ---- Our method')
    print('\t'.join(['Model', 'Horizon', 'MAE', 'RMSE', 'MAPE']))
    for horizon in [1, 3, 6, 12]:
        line = 'HA\t%d\t%.2f\t%.2f\t%.2f' % (horizon, mae, rmse, mape * 100)
        print(line)
        
get_history_average(df_plus)

(34272, 207, 1) (34272, 207, 1)
num_test 6854
Historical Average ---- Our method
Model	Horizon	MAE	RMSE	MAPE
HA	1	4.15	7.77	12.90
HA	3	4.15	7.77	12.90
HA	6	4.15	7.77	12.90
HA	12	4.15	7.77	12.90
