check if they are conducive to the paper's claims

In [1]:
import sys
sys.path.insert(0, '..')

In [2]:
from data_formatters.base import GenericDataFormatter, InputTypes, DataTypes
from data_formatters.electricity import ElectricityFormatter
from expt_settings.configs import ExperimentConfig
from libs.hyperparam_opt import HyperparamOptManager
from libs.tft_model import TemporalFusionTransformer
import libs.utils as utils
import os
import pandas as pd
from pandas import DataFrame, Series, Timestamp, Index
from tqdm import tqdm

In [84]:
import numpy as np
import tensorflow as tf
import random
import tensorflow.compat.v1 as tf1
from tensorflow.compat.v1 import Session, ConfigProto
from tensorflow.python.eager.context import PhysicalDevice
from typing import Dict, List, Union, Generator
from numpy import load, array_equal, allclose

In [4]:
if tf.test.gpu_device_name(): 
    print('Default GPU Device:{}'.format(tf.test.gpu_device_name()))
else:
    print("Please install GPU version of TF")

Default GPU Device:/device:GPU:0


In [5]:
gpu: List[PhysicalDevice] = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(gpu[0], True)

In [6]:
# Tensorflow setup
default_keras_session: Session = tf1.keras.backend.get_session()
tf_config: ConfigProto = utils.get_default_tensorflow_config(tf_device="gpu", gpu_id=0)

Selecting GPU ID=0


In [7]:
file_path: str = r'C:\Users\Lorenzo\PycharmProjects\TFT\outputs\data\electricity\data\hourly_electricity.csv'

In [8]:
raw_data: DataFrame = pd.read_csv(file_path)
raw_data['date'] = raw_data['date'].astype('datetime64[s]')  

In [9]:
config = ExperimentConfig('electricity', r'C:\Users\Lorenzo\PycharmProjects\TFT\outputs')

In [10]:
formatter: ElectricityFormatter = config.make_data_formatter()

In [11]:
data_csv_path: str = config.data_csv_path

# SPLIT DATA

In [12]:
train, valid, test = formatter.split_data(raw_data)

Formatting train-valid-test splits.
Setting scalers with training data...


In [13]:
column_definitions = formatter.get_column_definition()

In [14]:
train_samples, valid_samples = formatter.get_num_samples_for_calibration()

In [15]:
# Sets up default params
fixed_params: Dict = formatter.get_experiment_params()
params: Dict = formatter.get_default_model_params()
params["model_folder"]: str = os.path.join(config.model_folder, "fixed")
model_folder = os.path.join(config.model_folder, "fixed")

In [16]:
# Sets up hyperparam manager
print("*** Loading hyperparm manager ***")
opt_manager = HyperparamOptManager({k: [params[k]] for k in params},
                                   fixed_params, model_folder)

*** Loading hyperparm manager ***


In [17]:
model_folder: str = opt_manager.hyperparam_folder

# PREDICTION

In [18]:
print("*** Running tests ***")
tf1.reset_default_graph()
with tf.Graph().as_default(), tf1.Session(config=tf_config) as sess:
    tf1.keras.backend.set_session(sess)
    params: Dict = opt_manager.get_next_parameters()
    params['exp_name'] = 'electricity'
    params['data_folder'] = os.path.abspath(os.path.join(data_csv_path, os.pardir))
    model = TemporalFusionTransformer(params, use_cudnn=False)
    params.pop('exp_name', None)
    params.pop('data_folder', None)
    # load model
    model.load(opt_manager.hyperparam_folder, use_keras_loadings=True)
    
#     print("Computing best validation loss")
#     val_loss: Series = model.evaluate(valid)
        
    print("Computing test loss")
    output_map: Dict = model.predict(test, return_targets=True)
    print(f"Output map returned a dict with keys {output_map.get('p50').shape}")
    targets: DataFrame = formatter.format_predictions(output_map["targets"])
    p50_forecast: DataFrame = formatter.format_predictions(output_map["p50"])
    p90_forecast: DataFrame = formatter.format_predictions(output_map["p90"])
        
    # save all
    print("saving predictions and targets")
    targets.to_csv(os.path.join(opt_manager.hyperparam_folder, "targets.csv"), index=False)
    p50_forecast.to_csv(os.path.join(opt_manager.hyperparam_folder, "p50.csv"), index=False)
    p90_forecast.to_csv(os.path.join(opt_manager.hyperparam_folder, "p90.csv"), index=False)
        
    def extract_numerical_data(data: DataFrame) -> DataFrame:
        """Strips out forecast time and identifier columns."""
        return data[[
            col for col in data.columns
            if col not in {"forecast_time", "identifier"}
        ]]
    
    p50_loss = utils.numpy_normalised_quantile_loss(
            extract_numerical_data(targets), extract_numerical_data(p50_forecast),
            0.5)
    p90_loss = utils.numpy_normalised_quantile_loss(
        extract_numerical_data(targets), extract_numerical_data(p90_forecast),
        0.9)

    tf1.keras.backend.set_session(default_keras_session)

print()
print("Normalised Quantile Loss for Test Data: P50={}, P90={}".format(
    p50_loss.mean(), p90_loss.mean()))

*** Running tests ***
Resetting temp folder...
*** TemporalFusionTransformer params ***
# dropout_rate = 0.1
# hidden_layer_size = 160
# learning_rate = 0.001
# max_gradient_norm = 0.01
# minibatch_size = 64
# model_folder = C:\Users\Lorenzo\PycharmProjects\TFT\outputs\saved_models\electricity\fixed
# num_heads = 4
# stack_size = 1
# total_time_steps = 192
# num_encoder_steps = 168
# num_epochs = 100
# early_stopping_patience = 5
# multiprocessing_workers = 5
# column_definition = [('id', <DataTypes.REAL_VALUED: 0>, <InputTypes.ID: 4>), ('date', <DataTypes.DATE: 2>, <InputTypes.TIME: 5>), ('power_usage', <DataTypes.REAL_VALUED: 0>, <InputTypes.TARGET: 0>), ('hour', <DataTypes.REAL_VALUED: 0>, <InputTypes.KNOWN_INPUT: 2>), ('day_of_week', <DataTypes.REAL_VALUED: 0>, <InputTypes.KNOWN_INPUT: 2>), ('hours_from_start', <DataTypes.REAL_VALUED: 0>, <InputTypes.KNOWN_INPUT: 2>), ('categorical_id', <DataTypes.CATEGORICAL: 1>, <InputTypes.STATIC_INPUT: 3>)]
# input_size = 5
# output_size = 1
# 

Computing test loss




Output map returned a dict with keys (53505, 26)
saving predictions and targets

Normalised Quantile Loss for Test Data: P50=0.06195225913111236, P90=0.03087768990980622


# UNDERSTANDING SAVED PREDICTIONS

In [18]:
p50_forecast: DataFrame = pd.read_csv(os.path.join(opt_manager.hyperparam_folder, "p50.csv"))
p90_forecast: DataFrame = pd.read_csv(os.path.join(opt_manager.hyperparam_folder, "p90.csv"))
targets: DataFrame = pd.read_csv(os.path.join(opt_manager.hyperparam_folder, "targets.csv"))

In [19]:
p50_forecast.head()

Unnamed: 0,forecast_time,identifier,t+0,t+1,t+2,t+3,t+4,t+5,t+6,t+7,...,t+14,t+15,t+16,t+17,t+18,t+19,t+20,t+21,t+22,t+23
0,2014-08-31 23:00:00,MT_001,15.816484,15.426067,15.101756,15.059324,15.272384,15.324549,15.201153,14.856417,...,15.98492,15.557263,15.629363,16.073324,15.508624,15.737225,15.264067,15.972996,16.565828,15.223506
1,2014-09-01 00:00:00,MT_001,15.801899,15.549843,15.710974,15.848249,15.672518,15.450286,15.23031,15.300977,...,16.33889,16.242607,16.34614,15.416276,16.004236,15.562339,16.733631,16.430775,15.576667,16.253613
2,2014-09-01 01:00:00,MT_001,15.713786,15.589179,15.790873,15.57574,15.245438,15.128947,15.345088,16.095377,...,16.383362,16.368664,15.480453,16.182049,15.862064,17.132368,16.359085,15.914165,16.430271,15.312605
3,2014-09-01 02:00:00,MT_001,15.564478,15.574275,15.491813,15.173822,15.081623,15.429429,16.036526,16.302347,...,16.40362,15.569962,16.122433,15.804259,17.137283,16.35122,15.906891,16.543669,15.254826,14.968862
4,2014-09-01 03:00:00,MT_001,15.549872,15.505312,15.312562,15.243621,15.582037,16.077026,16.343262,16.521887,...,15.632707,16.093567,15.772758,17.056166,16.34317,15.808394,16.562973,15.204091,14.902324,14.493794


In [20]:
targets.head()

Unnamed: 0,forecast_time,identifier,t+0,t+1,t+2,t+3,t+4,t+5,t+6,t+7,...,t+14,t+15,t+16,t+17,t+18,t+19,t+20,t+21,t+22,t+23
0,2014-08-31 23:00:00,MT_001,16.180203,16.497462,16.180203,16.497462,16.180203,16.814721,16.497462,22.208122,...,14.911168,14.593909,14.911168,15.228426,16.180203,9.517766,2.85533,4.758883,16.497462,8.56599
1,2014-09-01 00:00:00,MT_001,16.497462,16.180203,16.497462,16.180203,16.814721,16.497462,22.208122,14.911168,...,14.593909,14.911168,15.228426,16.180203,9.517766,2.85533,4.758883,16.497462,8.56599,3.489848
2,2014-09-01 01:00:00,MT_001,16.180203,16.497462,16.180203,16.814721,16.497462,22.208122,14.911168,21.890863,...,14.911168,15.228426,16.180203,9.517766,2.85533,4.758883,16.497462,8.56599,3.489848,3.489848
3,2014-09-01 02:00:00,MT_001,16.497462,16.180203,16.814721,16.497462,22.208122,14.911168,21.890863,14.593909,...,15.228426,16.180203,9.517766,2.85533,4.758883,16.497462,8.56599,3.489848,3.489848,3.807107
4,2014-09-01 03:00:00,MT_001,16.180203,16.814721,16.497462,22.208122,14.911168,21.890863,14.593909,14.911168,...,16.180203,9.517766,2.85533,4.758883,16.497462,8.56599,3.489848,3.489848,3.807107,3.489848


In [21]:
p90_forecast.head()

Unnamed: 0,forecast_time,identifier,t+0,t+1,t+2,t+3,t+4,t+5,t+6,t+7,...,t+14,t+15,t+16,t+17,t+18,t+19,t+20,t+21,t+22,t+23
0,2014-08-31 23:00:00,MT_001,19.484957,19.966455,20.021095,20.073578,20.247953,20.45572,20.458849,20.715023,...,20.16673,19.837234,20.098585,20.942125,20.916756,21.400381,20.641865,21.257446,20.518513,19.297764
1,2014-09-01 00:00:00,MT_001,19.708284,19.499857,19.807434,20.135532,20.403917,20.499739,20.539919,20.89277,...,20.51899,20.470705,20.802544,20.35914,21.617046,20.859167,21.741335,20.676088,19.746414,20.406603
2,2014-09-01 01:00:00,MT_001,19.994995,19.905256,20.260685,20.4511,20.369469,20.475649,20.877966,21.58272,...,20.631052,20.753662,20.240032,21.627714,20.893682,21.687847,20.452843,19.897978,20.597872,20.448656
3,2014-09-01 02:00:00,MT_001,19.872185,19.995995,20.374674,20.39554,20.37204,20.982529,21.356647,21.735025,...,20.818357,20.456345,21.66926,20.94167,21.758282,20.560837,19.971897,20.747753,20.36276,20.168312
4,2014-09-01 03:00:00,MT_001,19.82572,20.178839,20.556164,20.439846,21.130474,21.349077,21.730026,21.786488,...,20.578382,21.732712,21.00582,21.820227,20.648443,19.998306,20.82715,20.363838,20.12765,20.186728


CHECK IF TARGETS CORRESPONDS TO RAW DATA

In [103]:
def test_targets(targets: DataFrame, raw_data: DataFrame, timestamp: str, identifier: str):
    """

    :param targets: targets DataFrame returned by predict method
    :param raw_data: original dataset
    :param timestamp: (str) random timestamp in dataset
    :param identifier:
    :return: None
    """
    # pick random timestamp and identifier
#     timestamp: str = random.choice(targets['forecast_time'])
#     identifier: str = random.choice(targets['identifier'])
    target_data: ndarray = targets[(targets['forecast_time'] == timestamp) & (targets['identifier'] == identifier)].iloc[:,
           2:].values
    raw: ndarray = raw_data[(raw_data['date'] >= pd.Timestamp(timestamp) + pd.Timedelta(hours=1)) & (
                                                         raw_data['date'] <= pd.Timestamp(timestamp) + pd.Timedelta(
                                                     hours=24)) & (raw_data['id'] == identifier)].iloc[:,
                                         1].values

    assert allclose(target_data, raw)

In [104]:
targets['identifier'].unique()[0]

'MT_001'

In [105]:
for ts in tqdm(targets['forecast_time'].unique()):
    for i in targets['identifier'].unique():
        test_targets(targets, raw_data, ts, i)

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 145/145 [1:55:26<00:00, 47.77s/it]


In [96]:
identifier = random.choice(targets['identifier'])
identifier

'MT_290'

In [22]:
timestamp: str = '2014-08-31 23:00:00'

In [74]:
a = targets[(targets['forecast_time']==timestamp) & (targets['identifier']=='MT_001')].iloc[:, 2:].values

In [75]:
a, b

(array([[16.18020305, 16.49746193, 16.18020305, 16.49746193, 16.18020305,
         16.81472081, 16.49746193, 22.20812183, 14.91116751, 21.89086294,
         14.59390863, 14.91116751, 14.59390863, 14.59390863, 14.91116751,
         14.59390863, 14.91116751, 15.2284264 , 16.18020305,  9.5177665 ,
          2.85532995,  4.75888325, 16.49746193,  8.56598985]]),
 array([16.18020305, 16.49746193, 16.18020305, 16.49746193, 16.18020305,
        16.81472081, 16.49746193, 22.20812183, 14.91116751, 21.89086294,
        14.59390863, 14.91116751, 14.59390863, 14.59390863, 14.91116751,
        14.59390863, 14.91116751, 15.2284264 , 16.18020305,  9.5177665 ,
         2.85532995,  4.75888325, 16.49746193,  8.56598985]))

In [76]:
b = raw_data[(raw_data['date']>=pd.Timestamp(timestamp)+pd.Timedelta(hours=1)) & (raw_data['date']<=pd.Timestamp(timestamp)+pd.Timedelta(hours=24)) & (raw_data['id']=='MT_001')].iloc[:,1].values

In [83]:
assert allclose(a,b, rtol=1e-50, atol=1e-100)

In [30]:
assert targets[(targets['forecast_time']==timestamp) & (targets['identifier']=='MT_001')].iloc[:, 2:].values.all() == raw_data[(raw_data['date']>=pd.Timestamp(timestamp)+pd.Timedelta(hours=1)) & (raw_data['date']<=pd.Timestamp(timestamp)+pd.Timedelta(hours=24)) & (raw_data['id']=='MT_001')].iloc[:,1].values.all()

In [None]:
# for i in tqdm(zip(targets['forecast_time'].values, targets['identifier'].values)):
#     assert targets[(targets['forecast_time']==i[0]) & (targets['identifier']==i[1])].iloc[:, 2:].values.tolist().sort() == raw_data[(raw_data['date']>=pd.Timestamp(i[0])+pd.Timedelta(hours=1)) & (raw_data['date']<=pd.Timestamp(i[0])+pd.Timedelta(hours=24)) & (raw_data['id']==i[1])].iloc[:,1].values.tolist().sort()
    

In [31]:
def checker(row: Series, targets: DataFrame, raw_data: DataFrame):
#     print(row[0], row[1])
    assert targets[(targets['forecast_time']==row[0]) & (targets['identifier']==row[1])].iloc[:, 2:].values.tolist().sort() == raw_data[(raw_data['date']>=pd.Timestamp(row[0])+pd.Timedelta(hours=1)) & (raw_data['date']<=pd.Timestamp(row[0])+pd.Timedelta(hours=24)) & (raw_data['id']==row[1])].iloc[:,1].values.tolist().sort()
    

In [33]:
# targets.apply(checker, axis=1, args=(targets, raw_data))

PIVOT TARGET AND PREDS 

In [213]:
def pivot(df: DataFrame) -> DataFrame:
    df['forecast_time'] = df['forecast_time'].astype('datetime64[s]')
    df_pivot: DataFrame = df.pivot(index=['forecast_time'], columns='identifier', values=df.columns[2:].tolist())
    df_piv2 = df_pivot.stack(level=0)
    # drop multilevel index
    df_piv2 = df_piv2.reset_index(level=1, drop=False)
    # set level as a int (hourly timedelta)
    df_piv2['level_1'] = df_piv2['level_1'].str.replace('t+', '')
    df_piv2['level_1'] = df_piv2['level_1'].astype('int64')
    df_piv2['level_1'] = df_piv2['level_1'] + 1
    df_piv2.reset_index(drop=False, inplace=True)

    def add_hour(row: Series) -> Series:
        row['forecast_time'] = row['forecast_time'] + pd.Timedelta(hours=row['level_1'])

        return row

    df_piv2 = df_piv2.apply(add_hour, axis=1)
    # sort by forecast_time
    df_piv2.sort_values(by=['forecast_time'], ascending=True, inplace=True)
    df_piv2.drop(['level_1'], axis=1, inplace=True)
    # drop duplicates hours
    df_piv2.drop_duplicates(subset=['forecast_time'], inplace=True)
    df_piv2.columns.name = None
    df_piv2.reset_index(drop=True, inplace=True)

    return df_piv2

In [214]:
targets_piv = pivot(targets)

  


In [217]:
targets_piv.head()

Unnamed: 0,forecast_time,MT_001,MT_002,MT_003,MT_004,MT_005,MT_006,MT_007,MT_008,MT_009,...,MT_361,MT_362,MT_363,MT_364,MT_365,MT_366,MT_367,MT_368,MT_369,MT_370
0,2014-09-01 00:00:00,16.180203,33.783784,1.737619,128.556911,46.341463,159.970238,3.957038,218.013468,40.646853,...,101.713062,46350.0,1000.0,2255.681818,41.0691,9.947338,608.867428,49.66611,766.312317,23324.324324
1,2014-09-01 01:00:00,16.497462,27.382646,1.737619,104.674797,42.378049,125.744048,3.250424,191.919192,37.15035,...,55.496074,18600.0,695.147679,1153.409091,31.616688,8.484494,618.744513,49.248748,720.857771,23324.324324
2,2014-09-01 02:00:00,16.180203,24.715505,1.737619,94.512195,40.243902,117.559524,3.250424,182.659933,38.461538,...,51.570307,17725.0,712.025316,1113.636364,31.290743,5.412522,565.188762,52.587646,733.687683,23554.054054
3,2014-09-01 03:00:00,16.497462,24.537696,1.520417,89.430894,37.5,116.071429,3.250424,222.222222,38.898601,...,51.034975,15050.0,704.64135,1073.863636,31.616688,5.266238,544.117647,50.918197,713.343109,21662.162162
4,2014-09-01 04:00:00,16.180203,24.893314,1.737619,94.004065,36.280488,121.279762,3.109101,201.178451,36.713287,...,51.570307,13225.0,704.64135,982.954545,31.290743,7.753072,538.410887,49.66611,720.307918,23729.72973


In [273]:
# timestamp_upper: str = '2014-09-01 00:00:00'
# timestamp_lower: str = (targets_piv['forecast_time'].max() - pd.Timedelta(hours=23)).strftime('%Y-%m-%d %H:%M:%S')

In [274]:
# raw_data[(raw_data['date']>=pd.Timestamp('2014-09-01 14:00:00')) & (raw_data['date']<=pd.Timestamp('2014-09-01 14:00:00')+pd.Timedelta(hours=23)) & (raw_data['id']=='MT_001')].iloc[:,1].values.tolist()

In [275]:
# targets_piv[(targets_piv['forecast_time']>=pd.Timestamp('2014-09-01 14:00:00')) & (targets_piv['forecast_time']<=pd.Timestamp('2014-09-01 14:00:00')+pd.Timedelta(hours=23))].loc[:, 'MT_001'].values.tolist()

In [270]:
# for timestamp in pd.date_range(start=pd.Timestamp('2014-09-01 00:00:00'), end=(targets_piv['forecast_time'].max() - pd.Timedelta(hours=23)),
#                                                   freq=pd.offsets.Hour(1)):
#     if raw_data[(raw_data['date']>=pd.Timestamp(timestamp)) & (raw_data['date']<=pd.Timestamp(timestamp)+pd.Timedelta(hours=23)) & (raw_data['id']=='MT_001')].iloc[:,1].values.tolist() == targets_piv[(targets_piv['forecast_time']>=pd.Timestamp(timestamp)) & (targets_piv['forecast_time']<=pd.Timestamp(timestamp)+pd.Timedelta(hours=23))].loc[:, 'MT_001'].values.tolist():
#         continue
#     else:
#         print(timestamp)
        


In [269]:
# targets_piv['forecast_time'].max() - pd.Timedelta(hours=23)

In [276]:
p50_piv: DataFrame = pivot(p50_forecast)
p90_piv: DataFrame = pivot(p90_forecast)

  
  


In [277]:
p50_piv.head()

Unnamed: 0,forecast_time,MT_001,MT_002,MT_003,MT_004,MT_005,MT_006,MT_007,MT_008,MT_009,...,MT_361,MT_362,MT_363,MT_364,MT_365,MT_366,MT_367,MT_368,MT_369,MT_370
0,2014-09-01 00:00:00,15.816484,32.333202,1.726487,119.80411,47.519485,143.74661,3.985144,225.68738,41.00396,...,96.095055,29567.742,1002.459,2253.4258,62.161964,6.966114,643.72375,42.859077,786.4482,21632.58
1,2014-09-01 01:00:00,15.426067,27.259977,1.725155,99.02534,42.097904,122.17685,3.487472,202.56679,41.436615,...,45.153713,20238.367,754.63086,1081.788,24.938011,5.047535,599.5548,41.065666,751.6471,21268.16
2,2014-09-01 02:00:00,15.549843,26.116793,1.544381,91.94444,38.116226,117.20415,3.252636,198.64682,39.6288,...,48.42848,17006.703,686.2129,1046.3827,23.602146,4.682081,551.8011,42.157806,759.3985,21629.219
3,2014-09-01 03:00:00,15.710974,25.81147,1.721075,87.46027,37.20029,117.28565,3.228712,217.45206,38.95031,...,44.07217,13547.113,654.6166,970.2458,22.339233,5.593839,538.3987,45.283806,762.0596,21552.654
4,2014-09-01 04:00:00,15.574275,25.7357,1.731947,85.2646,37.07412,114.15558,3.097564,207.18054,37.340942,...,48.840267,11509.521,653.2987,901.4543,22.922176,5.812411,539.22546,49.05198,738.7145,21566.703


In [278]:
p50_piv.shape

(168, 370)

In [287]:
t_mt_001 = targets[targets['identifier']=='MT_001']

In [294]:
# check if date ranges of targets and targets_piv are the same
assert t_mt_001['forecast_time'].max() + pd.Timedelta(hours=24) == targets_piv['forecast_time'].max()
assert t_mt_001['forecast_time'].min() + pd.Timedelta(hours=1) == targets_piv['forecast_time'].min()

# CALCULATE MAPE

In [295]:
targets_piv_1 = targets_piv[['forecast_time', 'MT_001']]
p50_piv_1 = p50_piv[['forecast_time', 'MT_001']]

In [315]:
def rolling_mape_multitarget(targets_df: DataFrame, preds_df: DataFrame, hours_mape: int) -> DataFrame:
    count = 0
    for mt in targets_df.columns[1:]:
        if count == 0:
            df_mape: DataFrame = pd.DataFrame(data={'forecast_time': preds_df['forecast_time'], 'true': targets_df[mt], 'preds': preds_df[mt]})
            df_mape['abs(Pred-true)']: Series = np.abs(df_mape['preds'] - df_mape['true'])
            d: List = []

            for i in range(0, df_mape.shape[0] - hours_mape):
                a: int = sum(df_mape['abs(Pred-true)'][i:i + hours_mape])
                b: int = sum(df_mape['true'][i:i + hours_mape])
                c: float = 100 * a / b
                d.append(c)

            # prendere la data del inizio di intervallo
            p: List = []
            for i in range(0, df_mape.shape[0] - hours_mape):
                f: Union[str, Timestamp] = df_mape['forecast_time'].iloc[i]
                p.append(f)
            assert len(p) == len(d)

            df_mape_final: DataFrame = DataFrame(data={'time': p, f'mape_{mt}': d})
            count += 1

        else:
            df_mape: DataFrame = pd.DataFrame(data={'forecast_time': preds_df['forecast_time'], 'true': targets_df[mt], 'preds': preds_df[mt]})
            df_mape['abs(Pred-true)']: Series = np.abs(df_mape['preds'] - df_mape['true'])
            d: List = []

            for i in range(0, df_mape.shape[0] - hours_mape):
                a: int = sum(df_mape['abs(Pred-true)'][i:i + hours_mape])
                b: int = sum(df_mape['true'][i:i + hours_mape])
                c: float = 100 * a / b
                d.append(c)

            # prendere la data del inizio di intervallo
            p: List = []
            for i in range(0, df_mape.shape[0] - hours_mape):
                f: Union[str, Timestamp] = df_mape['forecast_time'].iloc[i]
                p.append(f)
            assert len(p) == len(d)

            df_mape_interim: DataFrame = DataFrame(data={'time': p, f'mape_{mt}': d})
            df_mape_final[f'mape_{mt}'] = df_mape_interim[f'mape_{mt}']
            count += 1
            
    return df_mape_final

In [329]:
df_mape_50: DataFrame = rolling_mape_multitarget(targets_piv, p50_piv, 84)

In [330]:
df_mape_50

Unnamed: 0,time,mape_MT_001,mape_MT_002,mape_MT_003,mape_MT_004,mape_MT_005,mape_MT_006,mape_MT_007,mape_MT_008,mape_MT_009,...,mape_MT_361,mape_MT_362,mape_MT_363,mape_MT_364,mape_MT_365,mape_MT_366,mape_MT_367,mape_MT_368,mape_MT_369,mape_MT_370
0,2014-09-01 00:00:00,27.399685,5.410595,0.924466,7.122646,11.023991,6.244305,51.966402,5.011514,14.658034,...,6.904244,6.681340,7.112374,7.430763,30.236986,14.237464,6.969070,6.762520,6.279935,7.095542
1,2014-09-01 01:00:00,27.488703,5.379870,0.925396,7.076477,11.040743,6.155688,51.757438,5.001451,14.668354,...,6.903178,6.388825,7.093486,7.345950,30.337574,14.001739,7.018736,6.654484,6.246934,7.030532
2,2014-09-01 02:00:00,27.580430,5.413469,0.925799,7.261482,11.111679,6.117865,51.442258,5.091561,14.633045,...,6.834852,6.412199,6.964326,7.225537,30.523782,13.894295,7.125739,6.538520,6.208029,6.938292
3,2014-09-01 03:00:00,27.648387,5.360905,0.797433,7.288275,11.151026,6.178188,51.557599,5.040898,14.634852,...,6.852272,6.496354,6.883801,7.101978,30.711624,14.077132,7.211038,6.442904,6.160923,6.845971
4,2014-09-01 04:00:00,28.481666,5.377088,0.661130,7.283050,11.336411,6.168323,52.280230,5.058461,14.779916,...,6.780372,6.501928,6.864115,6.959664,31.058740,14.122682,7.303066,6.369201,6.079381,6.863536
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
79,2014-09-04 07:00:00,38.667629,3.790953,0.964825,6.206285,9.305120,5.293530,52.530985,4.300421,19.874050,...,7.828710,5.173309,4.310625,4.350362,17.382987,14.623089,6.046488,7.193053,4.291299,7.343512
80,2014-09-04 08:00:00,39.156419,3.835548,1.036389,6.375585,9.309598,5.312395,52.242511,4.296098,19.883452,...,7.805466,5.101711,4.226050,4.334227,16.658227,14.794264,6.057444,7.269065,4.369395,7.243498
81,2014-09-04 09:00:00,39.632950,3.844560,1.038138,6.569600,9.175175,5.292588,52.745278,4.372635,19.623948,...,7.758787,5.034820,4.223368,4.324391,16.457392,14.842459,6.056624,7.383698,4.425072,7.129757
82,2014-09-04 10:00:00,39.687712,3.849807,1.042647,6.465930,9.113501,5.163560,52.122535,4.479599,19.582228,...,7.735545,4.956230,4.256691,3.857614,15.988581,14.881184,6.033464,7.434248,4.498585,7.166027


In [331]:
df_mape_50.iloc[:, 1:].mean().mean()

7.589166441215918

In [332]:
df_mape_90: DataFrame = rolling_mape_multitarget(targets_piv, p90_piv, 84)

In [333]:
df_mape_90.iloc[:, 1:].mean().mean()

13.761469244165124