In [1]:
import sys
sys.path.insert(0, '..')

In [2]:
from data_formatters.base import GenericDataFormatter, InputTypes, DataTypes
from data_formatters.sorgenia_wind import SorgeniaFormatter
from expt_settings.configs import ExperimentConfig
from libs.hyperparam_opt import HyperparamOptManager
from libs.tft_model import TemporalFusionTransformer
import libs.utils as utils
import os
import pandas as pd
from pandas import DataFrame, Series, Timestamp, Index
from tqdm import tqdm

In [3]:
import numpy as np
import tensorflow as tf
import tensorflow.compat.v1 as tf1
from tensorflow.compat.v1 import Session, ConfigProto
from tensorflow.python.eager.context import PhysicalDevice
from typing import Dict, List, Union, Generator
from numpy import load

In [4]:
if tf.test.gpu_device_name(): 
    print('Default GPU Device:{}'.format(tf.test.gpu_device_name()))
else:
    print("Please install GPU version of TF")

Default GPU Device:/device:GPU:0


In [5]:
gpu: List[PhysicalDevice] = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(gpu[0], True)

In [6]:
# Tensorflow setup
default_keras_session: Session = tf1.keras.backend.get_session()
tf_config: ConfigProto = utils.get_default_tensorflow_config(tf_device="gpu", gpu_id=0)

Selecting GPU ID=0


In [7]:
file_path: str = r'C:\Users\Lorenzo\PycharmProjects\TFT\outputs\data\sorgenia_wind\data\sorgenia_wind\data\sorgenia_final.csv'

In [8]:
# extract data into DataFrame
raw_data: DataFrame = pd.read_csv(file_path)
raw_data['time'] = raw_data['time'].astype('datetime64[s]') 

In [9]:
raw_data.head()

Unnamed: 0,plant_name_up,time,kwh,dew_point_2m_C,temperature_2m_C,msl_pressure_hPa,sfc_pressure_hPa,precipitation_1h_mm,wind_speed_mean_10m_1h_ms,wind_speed_mean_100m_1h_ms,...,days_from_start,id,hour,day,day_of_week,month,categorical_id,hours_from_start,categorical_day_of_week,categorical_hour
0,UP_MPNTLCDMRN_1,2019-01-01 00:00:00,3976.3125,2.8,3.4,1022.8,958.1,0.89,2.8,5.3,...,0,UP_MPNTLCDMRN_1,0,1,1,1,UP_MPNTLCDMRN_1,0.0,1,0
1,UP_MPNTLCDMRN_1,2019-01-01 01:00:00,3576.1875,0.1,4.6,1022.4,958.0,0.3,4.0,7.3,...,0,UP_MPNTLCDMRN_1,1,1,1,1,UP_MPNTLCDMRN_1,1.0,1,1
2,UP_MPNTLCDMRN_1,2019-01-01 02:00:00,3169.875,-1.0,4.5,1021.8,957.6,0.07,4.7,8.6,...,0,UP_MPNTLCDMRN_1,2,1,1,1,UP_MPNTLCDMRN_1,2.0,1,2
3,UP_MPNTLCDMRN_1,2019-01-01 03:00:00,4007.625,-1.0,4.8,1021.4,957.2,0.09,4.8,9.0,...,0,UP_MPNTLCDMRN_1,3,1,1,1,UP_MPNTLCDMRN_1,3.0,1,3
4,UP_MPNTLCDMRN_1,2019-01-01 04:00:00,4361.8125,0.6,4.4,1021.2,956.9,0.14,4.7,8.8,...,0,UP_MPNTLCDMRN_1,4,1,1,1,UP_MPNTLCDMRN_1,4.0,1,4


In [10]:
config = ExperimentConfig('sorgenia_wind', r'C:\Users\Lorenzo\PycharmProjects\TFT\outputs')

# START WITH MODEL1

Wind Forecasts as known input

In [None]:
formatter: SorgeniaFormatter = config.make_data_formatter()

In [None]:
data_csv_path: str = config.data_csv_path

# SPLIT DATA

In [None]:
train, valid, test = formatter.split_data(raw_data)

# OPEN PREDICTIONS AND TARGETS FILES

In [None]:
wind_known_path: str = r'C:\Users\Lorenzo\savedmodels_sorgenia_wind_known'

In [None]:
p50_forecast: DataFrame = pd.read_csv(os.path.join(wind_known_path, "p50.csv"))
p90_forecast: DataFrame = pd.read_csv(os.path.join(wind_known_path, "p90.csv"))
targets: DataFrame = pd.read_csv(os.path.join(wind_known_path, "targets.csv"))
targets['forecast_time'] = targets['forecast_time'].astype('datetime64[s]')
p50_forecast['forecast_time'] = p50_forecast['forecast_time'].astype('datetime64[s]')
p90_forecast['forecast_time'] = p90_forecast['forecast_time'].astype('datetime64[s]')

In [None]:
p50_forecast.head()

In [None]:
targets.head()

In [None]:
p90_forecast.head()

IMPORT FUNCTIONS TO TRANSFORM DF AND CALCULATE MAPE

In [None]:
from inference import mape
from inference import utils

In [None]:
# test targets matching with raw_data
utils.test_targets(targets, raw_data, 'time', 12, 2)

PIVOT TARGETS AND PREDS

In [None]:
targets_piv: DataFrame = utils.pivot(targets)
p50_piv: DataFrame = utils.pivot(p50_forecast)
p90_piv: DataFrame = utils.pivot(p90_forecast)

In [None]:
targets_piv.head()

In [None]:
p50_piv.head()

In [None]:
p90_piv.head()

In [None]:
p90_piv.shape, p50_piv.shape, targets_piv.shape

CHECK IF DATE RANGE IS THE SAME

In [None]:
assert np.array_equal(targets_piv['forecast_time'].values, p50_piv['forecast_time'].values)
assert np.array_equal(targets_piv['forecast_time'].values, p90_piv['forecast_time'].values)

# ROLLING MAPE

In [None]:
df_mape_50: DataFrame = mape.rolling_mape_multitarget(targets_piv, p50_piv, 168)

In [None]:
df_mape: DataFrame = pd.DataFrame(
                data={'forecast_time': p90_piv['forecast_time'], 'true': targets_piv['UP_MPNTLCSMBC_1'], 'preds': p90_piv['UP_MPNTLCSMBC_1']})
df_mape['abs(Pred-true)']: Series = np.abs(df_mape['preds'] - df_mape['true'])
d: List = []

for i in range(0, df_mape.shape[0] - 336):
    a: int = sum(df_mape['abs(Pred-true)'][i:i + 336])
    b: int = sum(df_mape['true'][i:i + 336])
    c: float = 100 * a / b
    d.append(c)

# prendere la data del inizio di intervallo
p: List = []
for i in range(0, df_mape.shape[0] - 336):
    f: Union[str, Timestamp] = df_mape['forecast_time'].iloc[i]
    p.append(f)
assert len(p) == len(d)

df_mape_final: DataFrame = DataFrame(data={'time': p, 'mape_UP_MPNTLCSMBC_1': d})

In [None]:
df_mape_final['mape_UP_MPNTLCSMBC_1'].mean()