In [1]:
import sys
sys.path.insert(0, '..')

In [2]:
from data_formatters.base import GenericDataFormatter, InputTypes, DataTypes
from data_formatters.sorgenia_wind import SorgeniaFormatter
from expt_settings.configs import ExperimentConfig
from libs.hyperparam_opt import HyperparamOptManager
from libs.tft_model import TemporalFusionTransformer
import libs.utils as utils
import os
import pandas as pd
from pandas import DataFrame, Series, Timestamp, Index
from tqdm import tqdm

In [3]:
import numpy as np
import tensorflow as tf
import tensorflow.compat.v1 as tf1
from tensorflow.compat.v1 import Session, ConfigProto
from tensorflow.python.eager.context import PhysicalDevice
from typing import Dict, List, Union, Generator
from numpy import load
import plotly.graph_objects as go
from plotly.graph_objects import Figure

IMPORT FUNCTIONS TO TRANSFORM DF AND CALCULATE MAPE

In [4]:
from inference import mape
from inference import utils as infutils

In [5]:
if tf.test.gpu_device_name(): 
    print('Default GPU Device:{}'.format(tf.test.gpu_device_name()))
else:
    print("Please install GPU version of TF")

Default GPU Device:/device:GPU:0


In [6]:
gpu: List[PhysicalDevice] = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(gpu[0], True)

In [7]:
# Tensorflow setup
default_keras_session: Session = tf1.keras.backend.get_session()
tf_config: ConfigProto = utils.get_default_tensorflow_config(tf_device="gpu", gpu_id=0)

Selecting GPU ID=0


In [8]:
file_path: str = r'C:\Users\Lorenzo\PycharmProjects\TFT\outputs\data\sorgenia_wind\data\sorgenia_wind\data\sorgenia_final_cop.csv'

In [9]:
# extract data into DataFrame
raw_data: DataFrame = pd.read_csv(file_path)
raw_data['time'] = raw_data['time'].astype('datetime64[s]') 

In [10]:
raw_data.head()

Unnamed: 0,plant_name_up,time,kwh,dew_point_2m_C,temperature_2m_C,msl_pressure_hPa,sfc_pressure_hPa,precipitation_1h_mm,wind_speed_mean_10m_1h_ms,wind_speed_mean_100m_1h_ms,...,days_from_start,id,hour,day,day_of_week,month,categorical_id,hours_from_start,categorical_day_of_week,categorical_hour
0,UP_MPNTLCDMRN_1,2019-01-01 00:00:00,3976.3125,2.8,3.4,1022.8,958.1,0.89,2.8,5.3,...,0,UP_MPNTLCDMRN_1,0,1,1,1,UP_MPNTLCDMRN_1,0.0,1,0
1,UP_MPNTLCDMRN_1,2019-01-01 01:00:00,3576.1875,0.1,4.6,1022.4,958.0,0.3,4.0,7.3,...,0,UP_MPNTLCDMRN_1,1,1,1,1,UP_MPNTLCDMRN_1,1.0,1,1
2,UP_MPNTLCDMRN_1,2019-01-01 02:00:00,3169.875,-1.0,4.5,1021.8,957.6,0.07,4.7,8.6,...,0,UP_MPNTLCDMRN_1,2,1,1,1,UP_MPNTLCDMRN_1,2.0,1,2
3,UP_MPNTLCDMRN_1,2019-01-01 03:00:00,4007.625,-1.0,4.8,1021.4,957.2,0.09,4.8,9.0,...,0,UP_MPNTLCDMRN_1,3,1,1,1,UP_MPNTLCDMRN_1,3.0,1,3
4,UP_MPNTLCDMRN_1,2019-01-01 04:00:00,4361.8125,0.6,4.4,1021.2,956.9,0.14,4.7,8.8,...,0,UP_MPNTLCDMRN_1,4,1,1,1,UP_MPNTLCDMRN_1,4.0,1,4


In [11]:
config = ExperimentConfig('sorgenia_wind', r'C:\Users\Lorenzo\PycharmProjects\TFT\outputs')

# START WITH MODEL1

Wind Forecasts as known input

In [12]:
formatter: SorgeniaFormatter = config.make_data_formatter()

In [13]:
data_csv_path: str = config.data_csv_path

# SPLIT DATA

In [14]:
train, valid, test = formatter.split_data(raw_data)

Setting scalers with training data...


In [15]:
# Sets up default params
fixed_params: Dict = formatter.get_experiment_params()
params: Dict = formatter.get_default_model_params()
params["model_folder"]: str = os.path.join(config.model_folder, "fixed")
model_folder = os.path.join(config.model_folder, "fixed")

In [16]:
# Sets up hyperparam manager
print("*** Loading hyperparm manager ***")
opt_manager = HyperparamOptManager({k: [params[k]] for k in params},
                                   fixed_params, model_folder)

*** Loading hyperparm manager ***


In [17]:
predictions_path: str = r'C:\Users\Lorenzo\savedmodels_sorgenia_wind_known'

In [18]:
def compute_predictions(test: DataFrame, opt_manager: HyperparamOptManager, formatter: SorgeniaFormatter, tf_config: ConfigProto):
    print("*** Running tests ***")
    tf1.reset_default_graph()
    with tf.Graph().as_default(), tf1.Session(config=tf_config) as sess:
        tf1.keras.backend.set_session(sess)
        params: Dict = opt_manager.get_next_parameters()
        params['exp_name'] = 'sorgenia_wind'
        params['data_folder'] = os.path.abspath(os.path.join(data_csv_path, os.pardir))
        model = TemporalFusionTransformer(params, use_cudnn=False)
        params.pop('exp_name', None)
        params.pop('data_folder', None)
        # load model
        model.load(opt_manager.hyperparam_folder, use_keras_loadings=True)

    #     print("Computing best validation loss")
    #     val_loss: Series = model.evaluate(valid)

        print("Computing test loss")
        output_map: Dict = model.predict(test, return_targets=True)
        print(f"Output map returned a dict with keys {output_map.get('p50').shape}")
        targets: DataFrame = formatter.format_predictions(output_map["targets"])
        p50_forecast: DataFrame = formatter.format_predictions(output_map["p50"])
        p90_forecast: DataFrame = formatter.format_predictions(output_map["p90"])

        # save all
        print("saving predictions and targets")
        targets.to_csv(os.path.join(opt_manager.hyperparam_folder, "targets.csv"), index=False)
        p50_forecast.to_csv(os.path.join(opt_manager.hyperparam_folder, "p50.csv"), index=False)
        p90_forecast.to_csv(os.path.join(opt_manager.hyperparam_folder, "p90.csv"), index=False)

        def extract_numerical_data(data: DataFrame) -> DataFrame:
            """Strips out forecast time and identifier columns."""
            return data[[
                col for col in data.columns
                if col not in {"forecast_time", "identifier"}
            ]]

        p50_loss = utils.numpy_normalised_quantile_loss(
                extract_numerical_data(targets), extract_numerical_data(p50_forecast),
                0.5)
        p90_loss = utils.numpy_normalised_quantile_loss(
            extract_numerical_data(targets), extract_numerical_data(p90_forecast),
            0.9)

        tf1.keras.backend.set_session(default_keras_session)

    print()
    print("Normalised Quantile Loss for Test Data: P50={}, P90={}".format(
        p50_loss.mean(), p90_loss.mean()))

In [19]:
compute_predictions(opt_manager, formatter, tf_config)

*** Running tests ***
Resetting temp folder...
*** TemporalFusionTransformer params ***
# dropout_rate = 0.1
# hidden_layer_size = 160
# learning_rate = 0.001
# max_gradient_norm = 0.01
# minibatch_size = 64
# model_folder = C:\Users\Lorenzo\PycharmProjects\TFT\outputs\saved_models\sorgenia_wind\fixed
# num_heads = 4
# stack_size = 1
# total_time_steps = 180
# num_encoder_steps = 168
# num_epochs = 100
# early_stopping_patience = 10
# multiprocessing_workers = 5
# column_definition = [('id', <DataTypes.REAL_VALUED: 0>, <InputTypes.ID: 4>), ('time', <DataTypes.DATE: 2>, <InputTypes.TIME: 5>), ('kwh', <DataTypes.REAL_VALUED: 0>, <InputTypes.TARGET: 0>), ('hour', <DataTypes.REAL_VALUED: 0>, <InputTypes.KNOWN_INPUT: 2>), ('day_of_week', <DataTypes.REAL_VALUED: 0>, <InputTypes.KNOWN_INPUT: 2>), ('hours_from_start', <DataTypes.REAL_VALUED: 0>, <InputTypes.KNOWN_INPUT: 2>), ('dew_point_2m_C', <DataTypes.REAL_VALUED: 0>, <InputTypes.OBSERVED_INPUT: 1>), ('temperature_2m_C', <DataTypes.REAL_VAL

Computing test loss




Output map returned a dict with keys (11172, 14)
saving predictions and targets

Normalised Quantile Loss for Test Data: P50=0.4362108910643745, P90=0.23059013885110494


# OPEN PREDICTIONS AND TARGETS FILES

In [21]:
from inference import evaluate

In [22]:
df_mape = evaluate.evaluate(opt_manager.hyperparam_folder, 'copernicus_forecasts_model', 700)

copernicus_forecasts_model  46.385341166066326


# PLOT MAPE BOXPLOT

In [24]:
fig: Figure = evaluate.boxplotter(df_mape)
fig.show()