In [1]:
import sys
sys.path.insert(0, '..')

In [2]:
from data_formatters.base import GenericDataFormatter, InputTypes, DataTypes
from data_formatters.sorgenia_wind import SorgeniaFormatter
from expt_settings.configs import ExperimentConfig
from libs.hyperparam_opt import HyperparamOptManager
from libs.tft_model import TemporalFusionTransformer
import libs.utils as utils
import os
import pandas as pd
from pandas import DataFrame, Series, Timestamp, Index
from tqdm import tqdm

In [3]:
import numpy as np
import tensorflow as tf
import tensorflow.compat.v1 as tf1
from tensorflow.compat.v1 import Session, ConfigProto
from tensorflow.python.eager.context import PhysicalDevice
from typing import Dict, List, Union, Generator
from numpy import load

In [4]:
if tf.test.gpu_device_name(): 
    print('Default GPU Device:{}'.format(tf.test.gpu_device_name()))
else:
    print("Please install GPU version of TF")

Default GPU Device:/device:GPU:0


In [5]:
gpu: List[PhysicalDevice] = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(gpu[0], True)

In [6]:
# Tensorflow setup
default_keras_session: Session = tf1.keras.backend.get_session()
tf_config: ConfigProto = utils.get_default_tensorflow_config(tf_device="gpu", gpu_id=0)

Selecting GPU ID=0


In [7]:
file_path: str = r'C:\Users\Lorenzo\PycharmProjects\TFT\outputs\data\sorgenia_wind\data\sorgenia_wind\data\sorgenia_final.csv'

In [8]:
# extract data into DataFrame
raw_data: DataFrame = pd.read_csv(file_path)
raw_data['time'] = raw_data['time'].astype('datetime64[s]') 

In [9]:
raw_data.head()

Unnamed: 0,plant_name_up,time,kwh,dew_point_2m_C,temperature_2m_C,msl_pressure_hPa,sfc_pressure_hPa,precipitation_1h_mm,wind_speed_mean_10m_1h_ms,wind_speed_mean_100m_1h_ms,...,days_from_start,id,hour,day,day_of_week,month,categorical_id,hours_from_start,categorical_day_of_week,categorical_hour
0,UP_MPNTLCDMRN_1,2016-12-31 23:00:00,225.0,-0.902831,4.980909,1028.935552,983.718765,0.0,1.381208,2.254668,...,0,UP_MPNTLCDMRN_1,23,31,5,12,UP_MPNTLCDMRN_1,0.0,5,23
1,UP_MPNTLCDMRN_1,2017-01-01 00:00:00,65.625,-0.783022,4.890773,1028.693079,983.524042,-1.734723e-15,1.278206,2.224647,...,0,UP_MPNTLCDMRN_1,0,1,6,1,UP_MPNTLCDMRN_1,1.0,6,0
2,UP_MPNTLCDMRN_1,2017-01-01 01:00:00,31.875,-0.565645,4.807044,1028.156679,983.034144,-1.734723e-15,1.211802,2.130597,...,0,UP_MPNTLCDMRN_1,1,1,6,1,UP_MPNTLCDMRN_1,2.0,6,1
3,UP_MPNTLCDMRN_1,2017-01-01 02:00:00,0.0,-0.331075,4.911959,1028.162779,983.062126,-1.734723e-15,1.017035,1.846413,...,0,UP_MPNTLCDMRN_1,2,1,6,1,UP_MPNTLCDMRN_1,3.0,6,2
4,UP_MPNTLCDMRN_1,2017-01-01 03:00:00,0.0,-0.66804,4.464566,1027.61692,982.551146,-1.734723e-15,0.715094,1.450024,...,0,UP_MPNTLCDMRN_1,3,1,6,1,UP_MPNTLCDMRN_1,4.0,6,3


In [10]:
config = ExperimentConfig('sorgenia_wind', r'C:\Users\Lorenzo\PycharmProjects\TFT\outputs')

# START WITH MODEL1

Wind Forecasts as known input

In [11]:
formatter: SorgeniaFormatter = config.make_data_formatter()

In [12]:
data_csv_path: str = config.data_csv_path

# SPLIT DATA

In [13]:
train, valid, test = formatter.split_data(raw_data)

Setting scalers with training data...


# OPEN PREDICTIONS AND TARGETS FILES

In [14]:
wind_known_path: str = r'C:\Users\Lorenzo\savedmodels_sorgenia_wind_known'

In [15]:
p50_forecast: DataFrame = pd.read_csv(os.path.join(wind_known_path, "p50.csv"))
p90_forecast: DataFrame = pd.read_csv(os.path.join(wind_known_path, "p90.csv"))
targets: DataFrame = pd.read_csv(os.path.join(wind_known_path, "targets.csv"))
targets['forecast_time'] = targets['forecast_time'].astype('datetime64[s]')
p50_forecast['forecast_time'] = p50_forecast['forecast_time'].astype('datetime64[s]')
p90_forecast['forecast_time'] = p90_forecast['forecast_time'].astype('datetime64[s]')

In [16]:
p50_forecast.head()

Unnamed: 0,forecast_time,identifier,t+0,t+1,t+2,t+3,t+4,t+5,t+6,t+7,t+8,t+9,t+10,t+11
0,2020-08-13 22:00:00,UP_MPNTLCDMRN_1,94.7528,236.21342,325.1527,307.60492,294.89087,246.62157,163.40056,103.391716,114.31194,313.344,732.8784,652.0716
1,2020-08-13 23:00:00,UP_MPNTLCDMRN_1,1657.0032,1147.2983,676.1232,469.01343,347.4005,235.9268,150.77953,158.33025,440.21902,761.2005,706.5071,1364.0544
2,2020-08-14 00:00:00,UP_MPNTLCDMRN_1,1914.3452,1136.4591,711.5871,487.5738,344.98242,206.8101,194.00963,509.81693,810.82104,658.3359,1373.9912,1643.2124
3,2020-08-14 01:00:00,UP_MPNTLCDMRN_1,1109.7296,834.6086,563.53094,363.6079,183.96867,153.01237,492.92365,823.3561,682.92035,1556.1431,1759.6161,1981.1287
4,2020-08-14 02:00:00,UP_MPNTLCDMRN_1,1327.7163,889.4348,561.01917,274.82147,215.57652,569.987,866.9952,742.00824,1652.6295,1800.097,2014.1538,2023.6296


In [17]:
targets.head()

Unnamed: 0,forecast_time,identifier,t+0,t+1,t+2,t+3,t+4,t+5,t+6,t+7,t+8,t+9,t+10,t+11
0,2020-08-13 22:00:00,UP_MPNTLCDMRN_1,1999.5,3033.375,1952.4375,1969.125,1177.875,1275.1875,390.0,3.75,225.0,832.6875,1020.1875,585.0
1,2020-08-13 23:00:00,UP_MPNTLCDMRN_1,3033.375,1952.4375,1969.125,1177.875,1275.1875,390.0,3.75,225.0,832.6875,1020.1875,585.0,1426.125
2,2020-08-14 00:00:00,UP_MPNTLCDMRN_1,1952.4375,1969.125,1177.875,1275.1875,390.0,3.75,225.0,832.6875,1020.1875,585.0,1426.125,2937.0
3,2020-08-14 01:00:00,UP_MPNTLCDMRN_1,1969.125,1177.875,1275.1875,390.0,3.75,225.0,832.6875,1020.1875,585.0,1426.125,2937.0,1623.75
4,2020-08-14 02:00:00,UP_MPNTLCDMRN_1,1177.875,1275.1875,390.0,3.75,225.0,832.6875,1020.1875,585.0,1426.125,2937.0,1623.75,1582.875


In [18]:
p90_forecast.head()

Unnamed: 0,forecast_time,identifier,t+0,t+1,t+2,t+3,t+4,t+5,t+6,t+7,t+8,t+9,t+10,t+11
0,2020-08-13 22:00:00,UP_MPNTLCDMRN_1,431.28275,796.88684,1015.1233,978.5049,950.5942,844.57117,654.0504,483.7723,522.96497,978.26685,1687.2004,1543.1346
1,2020-08-13 23:00:00,UP_MPNTLCDMRN_1,2631.3281,2331.4636,1723.1796,1367.8485,1112.2424,905.2881,694.56445,741.45026,1328.5981,1854.8528,1650.9906,2457.6096
2,2020-08-14 00:00:00,UP_MPNTLCDMRN_1,2941.7886,2351.3496,1879.6521,1458.7332,1135.9467,849.97565,851.1212,1506.6927,1968.5065,1641.6179,2496.5151,2744.0544
3,2020-08-14 01:00:00,UP_MPNTLCDMRN_1,1965.9198,1830.4994,1536.1088,1147.4752,756.76324,666.90094,1359.7202,1914.5146,1557.3672,2576.7803,2778.3845,2901.9946
4,2020-08-14 02:00:00,UP_MPNTLCDMRN_1,2183.0208,1886.1135,1496.876,1014.66675,854.17346,1534.0648,1966.1691,1631.5808,2666.4868,2825.912,2950.655,2931.485


IMPORT FUNCTIONS TO TRANSFORM DF AND CALCULATE MAPE

In [19]:
from inference import mape
from inference import utils

In [20]:
# test targets matching with raw_data
utils.test_targets(targets, raw_data, 'time', 12, 2)

22946it [06:06, 62.66it/s]


PIVOT TARGETS AND PREDS

In [22]:
targets_piv: DataFrame = utils.pivot(targets)
p50_piv: DataFrame = utils.pivot(p50_forecast)
p90_piv: DataFrame = utils.pivot(p90_forecast)

  row['forecast_time'] = row['forecast_time'] + pd.Timedelta(hours=row['level_1'])
  row['forecast_time'] = row['forecast_time'] + pd.Timedelta(hours=row['level_1'])
  row['forecast_time'] = row['forecast_time'] + pd.Timedelta(hours=row['level_1'])


In [23]:
targets_piv.head()

Unnamed: 0,forecast_time,UP_MPNTLCDMRN_1,UP_MPNTLCSMBC_1,UP_PEPIZZA_1,UP_PRCLCDMZRD_1,UP_PRCLCDPLRM_1,UP_PRCLCDPRZZ_1,UP_PRCLCMINEO_1
0,2020-08-13 23:00:00,1999.5,958.9475,968.25,230.784158,1723.125,3882.825,225.15
1,2020-08-14 00:00:00,3033.375,1320.9375,979.5,360.259093,3213.675,5392.95,1142.4
2,2020-08-14 01:00:00,1952.4375,1060.8025,250.5,23.228734,1980.9,3550.05,3746.1
3,2020-08-14 02:00:00,1969.125,382.535,910.5,363.637708,1994.325,2307.9,1706.25
4,2020-08-14 03:00:00,1177.875,1341.335,1471.5,2285.976416,1029.9,1194.525,69.15


In [24]:
p50_piv.head()

Unnamed: 0,forecast_time,UP_MPNTLCDMRN_1,UP_MPNTLCSMBC_1,UP_PEPIZZA_1,UP_PRCLCDMZRD_1,UP_PRCLCDPLRM_1,UP_PRCLCDPRZZ_1,UP_PRCLCMINEO_1
0,2020-08-13 23:00:00,94.7528,700.89124,832.75604,498.8829,1183.8936,805.3833,10.358035
1,2020-08-14 00:00:00,236.21342,471.63696,870.3061,555.8575,851.9419,565.1338,48.32239
2,2020-08-14 01:00:00,1147.2983,442.05334,855.97797,566.2686,1155.9641,1212.171,843.1451
3,2020-08-14 02:00:00,307.60492,395.35608,849.84467,607.4703,570.86786,201.98827,438.2124
4,2020-08-14 03:00:00,294.89087,391.54486,856.4567,971.8403,431.29358,142.96434,257.7317


In [25]:
p90_piv.head()

Unnamed: 0,forecast_time,UP_MPNTLCDMRN_1,UP_MPNTLCSMBC_1,UP_PEPIZZA_1,UP_PRCLCDMZRD_1,UP_PRCLCDPLRM_1,UP_PRCLCDPRZZ_1,UP_PRCLCMINEO_1
0,2020-08-13 23:00:00,431.28275,1194.4808,1457.235,1067.707,2000.239,2013.6917,456.73096
1,2020-08-14 00:00:00,796.88684,860.63837,1493.2479,1153.3906,1852.436,1941.7083,649.0972
2,2020-08-14 01:00:00,2331.4636,766.4402,1438.0261,1197.176,2283.362,3251.8892,2228.1448
3,2020-08-14 02:00:00,978.5049,676.89343,1419.4735,1512.347,1564.5599,1150.8121,1595.9038
4,2020-08-14 03:00:00,950.5942,684.63495,1489.5775,2500.8906,1302.2809,900.7057,1218.658


In [27]:
p90_piv.shape, p50_piv.shape, targets_piv.shape

((3311, 8), (3311, 8), (3311, 8))

CHECK IF DATE RANGE IS THE SAME

In [31]:
assert np.array_equal(targets_piv['forecast_time'].values, p50_piv['forecast_time'].values)
assert np.array_equal(targets_piv['forecast_time'].values, p90_piv['forecast_time'].values)