Attempt to apply trained model on testset

In [1]:
import numpy as np
import pandas as pd
from pandas import DataFrame, Series, Timestamp
import tensorflow as tf
import tensorflow.compat.v1 as tf1
from tensorflow.compat.v1 import Session, ConfigProto
from tensorflow.python.eager.context import PhysicalDevice
from typing import Dict, List, Union, Generator
import os
from numpy import load

In [2]:
import sys
sys.path.insert(0, '..')

In [3]:
from data_formatters.base import GenericDataFormatter, InputTypes, DataTypes
from data_formatters.erg_wind import ErgFormatter

In [4]:
from expt_settings.configs import ExperimentConfig
from libs.hyperparam_opt import HyperparamOptManager
from libs.tft_model import TemporalFusionTransformer
import libs.utils as utils

In [5]:
if tf.test.gpu_device_name(): 
    print('Default GPU Device:{}'.format(tf.test.gpu_device_name()))
else:
    print("Please install GPU version of TF")

Default GPU Device:/device:GPU:0


In [6]:
gpu: List[PhysicalDevice] = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(gpu[0], True)

In [7]:
# Tensorflow setup
default_keras_session: Session = tf1.keras.backend.get_session()
tf_config: ConfigProto = utils.get_default_tensorflow_config(tf_device="gpu", gpu_id=0)

Selecting GPU ID=0


In [8]:
file_path: str = r'C:\Users\Lorenzo\PycharmProjects\TFT\outputs\data\erg_wind\data\erg_7farms_final.csv'

In [9]:
raw_data: DataFrame = pd.read_csv(file_path)

In [10]:
raw_data.head()

Unnamed: 0,energy_mw,time,Wind Speed,2m_devpoint [C],temperature [C],mean_sealev_pressure [hPa],surface pressure [hPa],precipitation [m],10_wind_speed,10_u_wind,...,days_from_start,id,hour,day,day_of_week,month,categorical_id,hours_from_start,categorical_day_of_week,categorical_hour
0,11.787686,2019-01-01 01:00:00,9.024996,-0.595344,3.746035,1023.987081,952.041642,0.017248,3.077197,-0.290841,...,0,BISACCIA2,1,1,1,1,BISACCIA2,0.0,1,1
1,12.321628,2019-01-01 02:00:00,9.115065,0.186824,4.068633,1023.939205,952.04334,0.066301,3.056552,-0.466334,...,0,BISACCIA2,2,1,1,1,BISACCIA2,1.0,1,2
2,12.21724,2019-01-01 03:00:00,8.807608,0.119856,3.750193,1023.588209,951.72926,0.053635,3.240812,-0.547045,...,0,BISACCIA2,3,1,1,1,BISACCIA2,2.0,1,3
3,12.117007,2019-01-01 04:00:00,9.551801,-0.312831,3.430814,1023.465573,951.629732,0.026092,3.616165,-0.753333,...,0,BISACCIA2,4,1,1,1,BISACCIA2,3.0,1,4
4,12.415503,2019-01-01 05:00:00,8.734134,-0.526966,3.453347,1023.853208,951.984117,0.016079,3.446475,-0.759338,...,0,BISACCIA2,5,1,1,1,BISACCIA2,4.0,1,5


In [11]:
raw_data['time'] = raw_data['time'].astype('datetime64[s]')

In [12]:
config = ExperimentConfig('erg_wind', r'C:\Users\Lorenzo\PycharmProjects\TFT\outputs')

In [13]:
formatter: ErgFormatter = config.make_data_formatter()

In [14]:
data_csv_path: str = config.data_csv_path

# SPLIT DATA

In [15]:
train, valid, test = formatter.split_data(raw_data)

Setting scalers with training data...


In [16]:
column_definitions = formatter.get_column_definition()

In [17]:
train_samples, valid_samples = formatter.get_num_samples_for_calibration()

In [18]:
# Sets up default params
fixed_params: Dict = formatter.get_experiment_params()
params: Dict = formatter.get_default_model_params()
params["model_folder"]: str = os.path.join(config.model_folder, "fixed")
model_folder = os.path.join(config.model_folder, "fixed")

In [19]:
# Sets up hyperparam manager
print("*** Loading hyperparm manager ***")
opt_manager = HyperparamOptManager({k: [params[k]] for k in params},
                                   fixed_params, model_folder)

*** Loading hyperparm manager ***


In [20]:
model_folder: str = opt_manager.hyperparam_folder

In [21]:
test.head()

Unnamed: 0,energy_mw,time,Wind Speed,2m_devpoint [C],temperature [C],mean_sealev_pressure [hPa],surface pressure [hPa],precipitation [m],10_wind_speed,10_u_wind,...,days_from_start,id,hour,day,day_of_week,month,categorical_id,hours_from_start,categorical_day_of_week,categorical_hour
7847,0.553991,2019-11-24 01:00:00,0.149984,-0.217145,-0.572498,-1.240157,-1.201925,-0.280909,-0.148862,-1.306867,...,327,BISACCIA2,-1.517353,24,1.508026,11,0,2.727674,6,1
7848,0.077031,2019-11-24 02:00:00,-0.068494,-0.210682,-0.562844,-1.287976,-1.267696,-0.202226,-0.131412,-1.420313,...,327,BISACCIA2,-1.37287,24,1.508026,11,0,2.728242,6,2
7849,-0.212918,2019-11-24 03:00:00,0.771623,-0.198735,-0.54939,-1.387889,-1.3785,-0.138043,-0.171952,-1.399401,...,327,BISACCIA2,-1.228386,24,1.508026,11,0,2.72881,6,3
7850,1.402098,2019-11-24 04:00:00,0.711172,-0.145757,-0.535209,-1.440861,-1.43047,0.165935,-0.231601,-1.485212,...,327,BISACCIA2,-1.083902,24,1.508026,11,0,2.729378,6,4
7851,0.670221,2019-11-24 05:00:00,0.330383,-0.067967,-0.540407,-1.458021,-1.463828,0.853539,-0.366567,-1.341374,...,327,BISACCIA2,-0.939418,24,1.508026,11,0,2.729947,6,5


In [22]:
test.time.min(), test.time.max()

(Timestamp('2019-11-24 01:00:00'), Timestamp('2019-12-31 23:00:00'))

WE'RE EXPECTING THE MODEL TO BE FED INPUTS WHICH ARE STARTING FROM THE FOLLOWING TIMESTAMPS

In [23]:
test.iloc[168:178, [0,1,25]]

Unnamed: 0,energy_mw,time,hours_from_start
8015,-0.966229,2019-12-01 01:00:00,2.823135
8016,-0.966229,2019-12-01 02:00:00,2.823703
8017,-0.966229,2019-12-01 03:00:00,2.824272
8018,-0.966229,2019-12-01 04:00:00,2.82484
8019,-0.966229,2019-12-01 05:00:00,2.825408
8020,-0.966229,2019-12-01 06:00:00,2.825976
8021,-0.958126,2019-12-01 07:00:00,2.826545
8022,-0.799554,2019-12-01 08:00:00,2.827113
8023,-0.837053,2019-12-01 09:00:00,2.827681
8024,-0.958994,2019-12-01 10:00:00,2.828249


# PREDICTION

In [24]:
print("*** Running tests ***")
tf1.reset_default_graph()
with tf.Graph().as_default(), tf1.Session(config=tf_config) as sess:
    tf1.keras.backend.set_session(sess)
    params: Dict = opt_manager.get_next_parameters()
    params['exp_name'] = 'erg_wind'
    params['data_folder'] = os.path.abspath(os.path.join(data_csv_path, os.pardir))
    model = TemporalFusionTransformer(params, use_cudnn=False)
    params.pop('exp_name', None)
    params.pop('data_folder', None)
    # load model
    model.load(opt_manager.hyperparam_folder, use_keras_loadings=True)
    
#     print("Computing best validation loss")
#     val_loss: Series = model.evaluate(valid)
        
    print("Computing test loss")
    output_map, combined = model.predict(test, return_targets=True)
#     print(f"Output map returned a dict with keys {output_map.get('p50').shape}")
#     targets: DataFrame = formatter.format_predictions(output_map["targets"])
#     p50_forecast: DataFrame = formatter.format_predictions(output_map["p50"])
#     p90_forecast: DataFrame = formatter.format_predictions(output_map["p90"])
        
    # save all
#     print("saving predictions and targets")
#     targets.to_csv(os.path.join(opt_manager.hyperparam_folder, "targets.csv"), index=False)
#     p50_forecast.to_csv(os.path.join(opt_manager.hyperparam_folder, "p50.csv"), index=False)
#     p90_forecast.to_csv(os.path.join(opt_manager.hyperparam_folder, "p90.csv"), index=False)
        
#     def extract_numerical_data(data: DataFrame) -> DataFrame:
#         """Strips out forecast time and identifier columns."""
#         return data[[
#             col for col in data.columns
#             if col not in {"forecast_time", "identifier"}
#         ]]
    
#     p50_loss = utils.numpy_normalised_quantile_loss(
#             extract_numerical_data(targets), extract_numerical_data(p50_forecast),
#             0.5)
#     p90_loss = utils.numpy_normalised_quantile_loss(
#         extract_numerical_data(targets), extract_numerical_data(p90_forecast),
#         0.9)

#     tf1.keras.backend.set_session(default_keras_session)

# print()
# print("Normalised Quantile Loss for Test Data: P50={}, P90={}".format(
#     p50_loss.mean(), p90_loss.mean()))

*** Running tests ***
Resetting temp folder...
*** TemporalFusionTransformer params ***
# dropout_rate = 0.1
# hidden_layer_size = 160
# learning_rate = 0.001
# max_gradient_norm = 0.01
# minibatch_size = 64
# model_folder = C:\Users\Lorenzo\PycharmProjects\TFT\outputs\saved_models\erg_wind\fixed
# num_heads = 4
# stack_size = 1
# total_time_steps = 192
# num_encoder_steps = 168
# num_epochs = 100
# early_stopping_patience = 10
# multiprocessing_workers = 5
# column_definition = [('id', <DataTypes.REAL_VALUED: 0>, <InputTypes.ID: 4>), ('time', <DataTypes.DATE: 2>, <InputTypes.TIME: 5>), ('energy_mw', <DataTypes.REAL_VALUED: 0>, <InputTypes.TARGET: 0>), ('hour', <DataTypes.REAL_VALUED: 0>, <InputTypes.KNOWN_INPUT: 2>), ('day_of_week', <DataTypes.REAL_VALUED: 0>, <InputTypes.KNOWN_INPUT: 2>), ('hours_from_start', <DataTypes.REAL_VALUED: 0>, <InputTypes.KNOWN_INPUT: 2>), ('Wind Speed', <DataTypes.REAL_VALUED: 0>, <InputTypes.OBSERVED_INPUT: 1>), ('2m_devpoint [C]', <DataTypes.REAL_VALUED:

Computing test loss




In [26]:
combined[0, :, 0]

array([-0.91762805, -0.91939265, -0.9187931 , -0.9183624 , -0.91801924,
       -0.9177746 , -0.91764355, -0.91762835, -0.9177374 , -0.9180009 ,
       -0.91849256, -0.9194091 , -0.92119604, -0.9244736 , -0.9221057 ,
       -0.91426367, -0.9037035 , -0.9077713 , -0.90940017, -0.91137105,
       -0.91556686, -0.92083484, -0.9259221 , -0.95263046], dtype=float32)

In [27]:
combined[0, :, 1]

array([-0.9372621 , -0.93364024, -0.930244  , -0.92762583, -0.9255981 ,
       -0.9240468 , -0.92283016, -0.92180735, -0.9208341 , -0.91973263,
       -0.9182158 , -0.9156521 , -0.9102551 , -0.8969884 , -0.8697816 ,
       -0.8389054 , -0.8209944 , -0.82769865, -0.83464247, -0.83651286,
       -0.8338221 , -0.8299964 , -0.8347605 , -0.93628794], dtype=float32)

In [28]:
combined[0, :, 2]

array([-0.19817322, -0.21149379, -0.21110609, -0.20966315, -0.20838176,
       -0.20738016, -0.20658475, -0.20590994, -0.20527567, -0.2045997 ,
       -0.20380019, -0.20287356, -0.20216367, -0.20095007, -0.18831627,
       -0.16742766, -0.15738857, -0.14966673, -0.14510958, -0.1392355 ,
       -0.11780034, -0.09134372, -0.08285485, -0.35983786], dtype=float32)

In [None]:
# test.columns

In [29]:
labels = load(os.path.join(model.data_folder, 'labels.npy'))

In [31]:
labels[0, :, 0]

array([ 0.89619397,  0.91390567,  0.8841601 , -0.10270699, -0.57446371,
       -0.65231392, -0.6737567 , -0.00690768, -0.19052638, -0.59388903,
       -0.46355291,  0.56612911, -0.17732474,  0.59771273,  1.41848404,
        2.33678076,  2.49458601,  1.982414  ,  1.8727362 ,  1.247928  ,
        1.09233788,  1.18142138,  0.03371621, -0.02206723])

In [None]:
# test.iloc[192:202, [0,20,21,24,23,2,3,4,5,6,7,8,9,10,11,12,13,14,15]]

# UNDERSTANDING SAVED PREDICTIONS

START BY CONVERTING THE FORECAST TIME INTO PROPER DATETIME FORMAT

In [None]:
p50_forecast: DataFrame = pd.read_csv(os.path.join(opt_manager.hyperparam_folder, "p50.csv"))
p90_forecast: DataFrame = pd.read_csv(os.path.join(opt_manager.hyperparam_folder, "p90.csv"))
targets: DataFrame = pd.read_csv(os.path.join(opt_manager.hyperparam_folder, "targets.csv"))

In [None]:
p90_forecast.head()

In [None]:
targets.head()

In [None]:
targets['forecast_time'] = targets['forecast_time'].astype('datetime64[s]')

# CALCULATE ROLLING MAPE