# Test Environment

In [1]:
import ipywidgets as widgets
from IPython.display import display
import numpy as np
import pandas as pd
#import Models.models as models
#import Models.LSTM.models_LSTM as models
import myLibrary as mL
#from Experiment_Class import Experiment
from sklearn.metrics import mean_squared_error, mean_absolute_error
import pickle
import os

NDBC = mL.NDBC_lib
ERA5 = mL.ERA5_lib
Models = mL.Models
DP = mL.DataProcessor
Experiment = mL.Experiment

# Get Data

In [2]:
data_directory = os.path.join(os.getcwd(), f'data/datasets/type_A')

def build_UI():

    # Select Model-------------------------------------------------------------------------------
    global datafile_widget
    datafile_list = os.listdir(data_directory)
    datafile_widget = widgets.Select(
        options=datafile_list,
        value=datafile_list[0],
        # rows=10,
        description='Datafile:',
        disabled=False
    )
    display(datafile_widget)

    #STATIONARY_SHIFT -----------------------------------------------------------------------
    global stationaryShift_widget
    stationaryShift_widget = widgets.BoundedIntText(
        value=1,
        min=0,
        max=10,
        step=1,
        description='',
        disabled=False,
    )
    print("Stationary Shilft: ")
    display(stationaryShift_widget)

    # Test Hours-------------------------------------------------------------------------------
    global test_hours_widget
    test_hours_widget = widgets.IntSlider(
        value=24,
        min=0,
        max=1000,
        step=1,
        description='Test Hours:',
        disabled=False,
        continuous_update=False,
        orientation='horizontal',
        readout=True,
        readout_format='d'
    )

    # display the range slider widget
    display(test_hours_widget)
    #print("(1 Week = 168h)")

    # Select Model-------------------------------------------------------------------------------
    global models_widget
    models_list = list(Models.model_dictionary.keys())
    models_widget = widgets.Select(
        options=models_list,
        value=models_list[0],
        rows=10,
        description='Model:',
        disabled=False
    )
    display(models_widget)

    #ALPHA:-------------------------------------------------------------------------------
    # create a FloatSlider widget for a value between 0 and 1
    print("Alpha (only for PINN):")
    global alpha_slider
    alpha_slider = widgets.FloatSlider(
        value=0.5,
        min=0,
        max=1,
        step=0.01,
        description='',
        readout_format='.2f',
        orientation='horizontal',
        layout={'width': '500px'}
    )

    # display the FloatSlider widget
    display(alpha_slider)


build_UI()

Select(description='Datafile:', options=('.DS_Store', 'GOM_2_A.pickle', 'dataset_GOM_1_A_A.pickle', 'GOM_1_A.p…

Stationary Shilft: 


BoundedIntText(value=1, max=10)

IntSlider(value=24, continuous_update=False, description='Test Hours:', max=1000)

Select(description='Model:', options=('LSTM',), rows=10, value='LSTM')

Alpha (only for PINN):


FloatSlider(value=0.5, layout=Layout(width='500px'), max=1.0, step=0.01)

In [3]:
DATAFILE = datafile_widget.value
STATIONARY_SHIFT = stationaryShift_widget.value
N_TEST_HOURS = test_hours_widget.value
MODEL_NAME = models_widget.value
ALPHA = alpha_slider.value

## Optional: use hardcoded variables instead

In [4]:
# DATAFILE = "dataset_1_A.pickle"
# STATIONARY_SHIFT = 1
# N_TEST_HOURS = 24
# MODEL_NAME = "pinn_1"
# ALPHA = 0.5



In [5]:
print(f"Datafile: {DATAFILE}")
print(f"Alpha: {ALPHA}")
print(f"Stationary Shift: {STATIONARY_SHIFT}")
print(f"Test-Hours: {N_TEST_HOURS}")
print(f"Model: {MODEL_NAME}")

Datafile: dataset_GOM_1_A_A.pickle
Alpha: 0.2
Stationary Shift: 1
Test-Hours: 24
Model: LSTM


In [6]:
with open(f'data/datasets/type_A/{DATAFILE}', 'rb') as f:
    # load the object from the file using pickle.load()
    dataset = pickle.load(f)

print("stations",dataset["stations"])
print("years",dataset["years"])
print("nan_threshold",dataset["nan_threshold"])
print("features",dataset["features"])
print("add_era5",dataset["add_era5"])

data = dataset["data"]
data

stations ['42001', '42002', '42003', '42007', '42012', '42019', '42020', '42035', '42036', '42038', '42039', '42040', '42041', '42055']
years ['2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022']
nan_threshold 0.66
features ['WDIR', 'WSPD', 'WVHT', 'APD', 'MWD', 'PRES', 'ATMP', 'WTMP', 'DEWP']
add_era5 True


Unnamed: 0,WDIR_42001,WSPD_42001,PRES_42001,ATMP_42001,WTMP_42001,DEWP_42001,WDIR_42002,WSPD_42002,PRES_42002,ATMP_42002,...,WDIR_42039_ERA5,WSPD_42039_ERA5,ATMP_42039_ERA5,WSPD_42035_ERA5,WSPD_42001_ERA5,DEWP_42020_ERA5,ATMP_42019_ERA5,WTMP_42039_ERA5,WSPD_42002_ERA5,PRES_42039_ERA5
2002-01-01 00:00:00,66.0,9.3,1017.1,22.3,25.5,16.8,39.0,10.5,1016.1,21.7,...,246.007357,5.756333,13.882608,8.031200,9.867456,10.834305,11.708612,21.781113,9.820263,1019.426223
2002-01-01 01:00:00,66.0,9.3,1017.1,22.3,25.5,16.8,39.0,10.5,1016.1,21.7,...,247.678051,5.579721,14.020573,8.216895,9.782997,10.975658,11.926516,21.781113,10.465795,1019.792677
2002-01-01 02:00:00,67.0,9.4,1017.2,21.9,25.5,16.6,36.0,10.9,1016.1,21.7,...,250.591891,5.582730,14.070538,8.454808,9.517146,11.111871,12.166319,21.781113,11.760698,1019.725358
2002-01-01 03:00:00,69.0,9.1,1017.2,22.4,25.5,16.9,32.0,12.7,1015.9,20.8,...,253.468273,5.633966,14.058979,8.471692,8.911373,11.239089,12.361607,21.781113,11.910608,1019.833394
2002-01-01 04:00:00,70.0,9.0,1017.1,22.5,25.5,16.3,33.0,12.7,1015.8,21.0,...,251.493918,5.638108,13.986641,8.698506,8.481407,11.332468,12.482585,21.781113,11.716782,1019.804620
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-31 19:00:00,195.0,0.5,1015.0,25.6,24.7,24.5,22.0,0.6,1015.9,25.1,...,28.180851,8.602918,22.730194,2.533178,0.601025,19.323104,21.847184,24.392457,1.062895,1016.581840
2022-12-31 20:00:00,210.0,1.0,1015.0,25.3,24.5,24.3,88.0,1.2,1015.4,24.7,...,22.969408,7.734290,22.564177,2.669909,0.979714,19.245361,22.052563,24.392457,1.330008,1016.363260
2022-12-31 21:00:00,231.0,0.6,1014.7,26.4,24.7,24.4,87.0,1.7,1014.9,24.6,...,14.655430,6.485218,22.377552,3.147255,2.067327,19.338348,22.260412,24.392457,1.770915,1016.186435
2022-12-31 22:00:00,18.0,0.7,1014.9,25.4,24.7,24.1,90.0,2.5,1014.9,24.5,...,20.371840,3.294807,24.068445,3.469271,1.554299,18.567779,22.101541,24.885927,2.483432,1017.067575


# Data Processing

## 1. Make data stationary

In [7]:
STATIONARY = True #Set Flag for report
data_stationary = DP.data_to_stationary(data, n = STATIONARY_SHIFT)
data_stationary.head()

  data_stationary[col] = data[col] - data[col].shift(n)  # y = value(i) - value(i-n)
  data_stationary[col] = data[col] - data[col].shift(n)  # y = value(i) - value(i-n)
  data_stationary[col] = data[col] - data[col].shift(n)  # y = value(i) - value(i-n)
  data_stationary[col] = data[col] - data[col].shift(n)  # y = value(i) - value(i-n)


Unnamed: 0,WDIR_42001,WSPD_42001,PRES_42001,ATMP_42001,WTMP_42001,DEWP_42001,WDIR_42002,WSPD_42002,PRES_42002,ATMP_42002,...,WDIR_42039_ERA5,WSPD_42039_ERA5,ATMP_42039_ERA5,WSPD_42035_ERA5,WSPD_42001_ERA5,DEWP_42020_ERA5,ATMP_42019_ERA5,WTMP_42039_ERA5,WSPD_42002_ERA5,PRES_42039_ERA5
2002-01-01 01:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.670694,-0.176612,0.137965,0.185695,-0.084459,0.141353,0.217904,0.0,0.645532,0.366454
2002-01-01 02:00:00,1.0,0.1,0.1,-0.4,0.0,-0.2,-3.0,0.4,0.0,0.0,...,2.91384,0.003009,0.049966,0.237913,-0.265851,0.136213,0.239802,0.0,1.294903,-0.067319
2002-01-01 03:00:00,2.0,-0.3,0.0,0.5,0.0,0.3,-4.0,1.8,-0.2,-0.9,...,2.876383,0.051235,-0.011559,0.016884,-0.605774,0.127218,0.195288,0.0,0.14991,0.108036
2002-01-01 04:00:00,1.0,-0.1,-0.1,0.1,0.0,-0.6,1.0,0.0,-0.1,0.2,...,-1.974355,0.004142,-0.072338,0.226814,-0.429966,0.093379,0.120978,0.0,-0.193825,-0.028773
2002-01-01 05:00:00,-1.0,0.6,-0.6,0.0,-0.1,0.8,9.0,-0.9,-0.2,-0.2,...,-3.817351,-0.199347,-0.181964,0.199733,-0.44634,0.097662,0.066412,0.0,-0.422577,-0.087949


## 2. Transform to supervised problem

In [8]:
data_supervised = DP.data_to_supervised(data_stationary)
data_supervised.head()

Unnamed: 0,WDIR_42001(t-1),WSPD_42001(t-1),PRES_42001(t-1),ATMP_42001(t-1),WTMP_42001(t-1),DEWP_42001(t-1),WDIR_42002(t-1),WSPD_42002(t-1),PRES_42002(t-1),ATMP_42002(t-1),...,WDIR_42039_ERA5(t),WSPD_42039_ERA5(t),ATMP_42039_ERA5(t),WSPD_42035_ERA5(t),WSPD_42001_ERA5(t),DEWP_42020_ERA5(t),ATMP_42019_ERA5(t),WTMP_42039_ERA5(t),WSPD_42002_ERA5(t),PRES_42039_ERA5(t)
2002-01-01 02:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2.91384,0.003009,0.049966,0.237913,-0.265851,0.136213,0.239802,0.0,1.294903,-0.067319
2002-01-01 03:00:00,1.0,0.1,0.1,-0.4,0.0,-0.2,-3.0,0.4,0.0,0.0,...,2.876383,0.051235,-0.011559,0.016884,-0.605774,0.127218,0.195288,0.0,0.14991,0.108036
2002-01-01 04:00:00,2.0,-0.3,0.0,0.5,0.0,0.3,-4.0,1.8,-0.2,-0.9,...,-1.974355,0.004142,-0.072338,0.226814,-0.429966,0.093379,0.120978,0.0,-0.193825,-0.028773
2002-01-01 05:00:00,1.0,-0.1,-0.1,0.1,0.0,-0.6,1.0,0.0,-0.1,0.2,...,-3.817351,-0.199347,-0.181964,0.199733,-0.44634,0.097662,0.066412,0.0,-0.422577,-0.087949
2002-01-01 06:00:00,-1.0,0.6,-0.6,0.0,-0.1,0.8,9.0,-0.9,-0.2,-0.2,...,1.650662,0.179143,-0.247964,0.187775,0.416342,0.074103,0.066053,0.0,-0.400801,0.382198


## train test split

In [9]:
train_X, train_y, test_X, test_y = DP.train_test_split(data_supervised, N_TEST_HOURS)
print("Shapes: ", train_X.shape, train_y.shape, test_X.shape, test_y.shape)

Shapes:  (184054, 1, 104) (184054, 104) (24, 1, 104) (24, 104)


## Normalize the data

In [10]:
# Scale the data
#NORMALIZED = True   #set flag for report
train_X_scaled, train_y_scaled, test_X_scaled, test_y_scaled, SCALER = DP.scale_data(train_X,
                                                                                     train_y,
                                                                                     test_X,
                                                                                     test_y)

---
# Select Model

In [11]:
#model = models.lstm_0(train_X_scaled,train_y_scaled)
model = Models.get_model(MODEL_NAME, train_X, train_y, ALPHA)
model.summary()

Epoch 1/100


2023-06-19 22:09:04.088545: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 7

----
# One-Shot Forecasting

In [12]:
_ = model.predict(train_X_scaled, batch_size=1)



In [13]:
output_cols = data.columns.tolist()
output_cols

['WDIR_42001',
 'WSPD_42001',
 'PRES_42001',
 'ATMP_42001',
 'WTMP_42001',
 'DEWP_42001',
 'WDIR_42002',
 'WSPD_42002',
 'PRES_42002',
 'ATMP_42002',
 'WTMP_42002',
 'DEWP_42002',
 'WDIR_42003',
 'WSPD_42003',
 'PRES_42003',
 'ATMP_42003',
 'WTMP_42003',
 'DEWP_42003',
 'WDIR_42019',
 'WSPD_42019',
 'PRES_42019',
 'ATMP_42019',
 'WTMP_42019',
 'DEWP_42019',
 'WDIR_42020',
 'WSPD_42020',
 'PRES_42020',
 'ATMP_42020',
 'WTMP_42020',
 'DEWP_42020',
 'WDIR_42035',
 'WSPD_42035',
 'PRES_42035',
 'ATMP_42035',
 'WTMP_42035',
 'DEWP_42035',
 'WDIR_42036',
 'WSPD_42036',
 'PRES_42036',
 'ATMP_42036',
 'DEWP_42036',
 'WDIR_42039',
 'WSPD_42039',
 'PRES_42039',
 'ATMP_42039',
 'WTMP_42039',
 'WDIR_42040',
 'WSPD_42040',
 'PRES_42040',
 'ATMP_42040',
 'WTMP_42040',
 'DEWP_42040',
 'WTMP_42001_ERA5',
 'ATMP_42020_ERA5',
 'ATMP_42001_ERA5',
 'DEWP_42001_ERA5',
 'PRES_42019_ERA5',
 'ATMP_42040_ERA5',
 'PRES_42001_ERA5',
 'WDIR_42035_ERA5',
 'PRES_42003_ERA5',
 'DEWP_42003_ERA5',
 'WTMP_42035_ERA5',


In [14]:
yhat = model.predict(test_X_scaled)
yhat_unscaled = DP.invert_scaling(yhat, SCALER)
yhat_unscaled_df = pd.DataFrame(yhat_unscaled, columns=[name + "_pred" for name in output_cols])
yhat_unscaled_df.set_index(data.tail(len(yhat)).index, inplace=True)
yhat_unscaled_df



Unnamed: 0,WDIR_42001_pred,WSPD_42001_pred,PRES_42001_pred,ATMP_42001_pred,WTMP_42001_pred,DEWP_42001_pred,WDIR_42002_pred,WSPD_42002_pred,PRES_42002_pred,ATMP_42002_pred,...,WDIR_42039_ERA5_pred,WSPD_42039_ERA5_pred,ATMP_42039_ERA5_pred,WSPD_42035_ERA5_pred,WSPD_42001_ERA5_pred,DEWP_42020_ERA5_pred,ATMP_42019_ERA5_pred,WTMP_42039_ERA5_pred,WSPD_42002_ERA5_pred,PRES_42039_ERA5_pred
2022-12-31 00:00:00,181.004929,-1.613021,-4.770545,-0.699644,-0.900683,-2.236889,-19.737869,-5.553482,-0.897391,-0.419738,...,-613.882141,-1.330827,-2.026993,0.355348,-0.048313,-1.429555,-2.238705,0.020209,0.231062,-0.94986
2022-12-31 01:00:00,153.792877,-1.614667,-4.748064,-0.700806,-0.900419,-2.239975,-21.147339,-5.550149,-0.886973,-0.420351,...,-393.186249,-1.359675,-2.031423,0.356995,-0.055516,-1.439693,-2.24264,0.020079,0.216955,-0.943733
2022-12-31 02:00:00,147.790466,-1.605179,-4.838012,-0.70305,-0.901353,-2.239955,-21.473269,-5.569685,-0.945232,-0.417294,...,-428.90036,-1.328351,-2.031884,0.354822,-0.060653,-1.401854,-2.220924,0.019491,0.224087,-0.955737
2022-12-31 03:00:00,251.893433,-1.607363,-4.705099,-0.699231,-0.899855,-2.239411,-10.116276,-5.539085,-0.852375,-0.422122,...,-559.526001,-1.368373,-2.031934,0.357185,-0.049558,-1.456493,-2.25524,0.020513,0.202442,-0.936655
2022-12-31 04:00:00,129.493835,-1.612553,-4.713374,-0.700811,-0.900203,-2.240116,-27.197329,-5.544037,-0.868325,-0.423558,...,-423.940002,-1.343874,-2.032681,0.355885,-0.047158,-1.455288,-2.254955,0.020055,0.224556,-0.934579
2022-12-31 05:00:00,74.741692,-1.605976,-4.897756,-0.703968,-0.902001,-2.238764,-28.829403,-5.586896,-0.984891,-0.415325,...,-472.175781,-1.290115,-2.030331,0.352869,-0.05869,-1.377318,-2.207604,0.01908,0.242003,-0.964053
2022-12-31 06:00:00,120.373848,-1.614301,-4.934953,-0.703543,-0.902274,-2.238032,-29.581989,-5.591302,-1.011824,-0.415633,...,-701.027954,-1.286852,-2.023575,0.365335,-0.066122,-1.368586,-2.202179,0.018847,0.232062,-0.974344
2022-12-31 07:00:00,178.339676,-1.612391,-4.907498,-0.703678,-0.901578,-2.240569,-21.512299,-5.57672,-0.991919,-0.417067,...,-583.861938,-1.320129,-2.025574,0.351233,-0.067885,-1.382164,-2.206639,0.019025,0.220815,-0.96892
2022-12-31 08:00:00,150.286926,-1.616956,-4.884286,-0.701741,-0.901529,-2.238487,-24.749487,-5.57802,-0.977968,-0.417343,...,-741.88147,-1.311304,-2.022185,0.36414,-0.06291,-1.392151,-2.215679,0.019157,0.222885,-0.96772
2022-12-31 09:00:00,121.625717,-1.60647,-4.992885,-0.702674,-0.90244,-2.238186,-26.948149,-5.608409,-1.048625,-0.414849,...,-957.10199,-1.270491,-2.018897,0.366292,-0.067558,-1.348791,-2.189194,0.018459,0.223692,-0.98479


# Correct wind direction (modulo 360)

In [15]:
# Get the list of columns starting with "WDIR"
wdir_columns = [col for col in yhat_unscaled_df.columns if col.startswith("WDIR")]

# Modify the values in the selected columns
yhat_unscaled_df[wdir_columns] = yhat_unscaled_df[wdir_columns] % 360

yhat_unscaled_df

Unnamed: 0,WDIR_42001_pred,WSPD_42001_pred,PRES_42001_pred,ATMP_42001_pred,WTMP_42001_pred,DEWP_42001_pred,WDIR_42002_pred,WSPD_42002_pred,PRES_42002_pred,ATMP_42002_pred,...,WDIR_42039_ERA5_pred,WSPD_42039_ERA5_pred,ATMP_42039_ERA5_pred,WSPD_42035_ERA5_pred,WSPD_42001_ERA5_pred,DEWP_42020_ERA5_pred,ATMP_42019_ERA5_pred,WTMP_42039_ERA5_pred,WSPD_42002_ERA5_pred,PRES_42039_ERA5_pred
2022-12-31 00:00:00,181.004929,-1.613021,-4.770545,-0.699644,-0.900683,-2.236889,340.262146,-5.553482,-0.897391,-0.419738,...,106.117859,-1.330827,-2.026993,0.355348,-0.048313,-1.429555,-2.238705,0.020209,0.231062,-0.94986
2022-12-31 01:00:00,153.792877,-1.614667,-4.748064,-0.700806,-0.900419,-2.239975,338.852661,-5.550149,-0.886973,-0.420351,...,326.813751,-1.359675,-2.031423,0.356995,-0.055516,-1.439693,-2.24264,0.020079,0.216955,-0.943733
2022-12-31 02:00:00,147.790466,-1.605179,-4.838012,-0.70305,-0.901353,-2.239955,338.526733,-5.569685,-0.945232,-0.417294,...,291.09964,-1.328351,-2.031884,0.354822,-0.060653,-1.401854,-2.220924,0.019491,0.224087,-0.955737
2022-12-31 03:00:00,251.893433,-1.607363,-4.705099,-0.699231,-0.899855,-2.239411,349.883728,-5.539085,-0.852375,-0.422122,...,160.473999,-1.368373,-2.031934,0.357185,-0.049558,-1.456493,-2.25524,0.020513,0.202442,-0.936655
2022-12-31 04:00:00,129.493835,-1.612553,-4.713374,-0.700811,-0.900203,-2.240116,332.802673,-5.544037,-0.868325,-0.423558,...,296.059998,-1.343874,-2.032681,0.355885,-0.047158,-1.455288,-2.254955,0.020055,0.224556,-0.934579
2022-12-31 05:00:00,74.741692,-1.605976,-4.897756,-0.703968,-0.902001,-2.238764,331.170593,-5.586896,-0.984891,-0.415325,...,247.824219,-1.290115,-2.030331,0.352869,-0.05869,-1.377318,-2.207604,0.01908,0.242003,-0.964053
2022-12-31 06:00:00,120.373848,-1.614301,-4.934953,-0.703543,-0.902274,-2.238032,330.417999,-5.591302,-1.011824,-0.415633,...,18.972046,-1.286852,-2.023575,0.365335,-0.066122,-1.368586,-2.202179,0.018847,0.232062,-0.974344
2022-12-31 07:00:00,178.339676,-1.612391,-4.907498,-0.703678,-0.901578,-2.240569,338.487701,-5.57672,-0.991919,-0.417067,...,136.138062,-1.320129,-2.025574,0.351233,-0.067885,-1.382164,-2.206639,0.019025,0.220815,-0.96892
2022-12-31 08:00:00,150.286926,-1.616956,-4.884286,-0.701741,-0.901529,-2.238487,335.250519,-5.57802,-0.977968,-0.417343,...,338.11853,-1.311304,-2.022185,0.36414,-0.06291,-1.392151,-2.215679,0.019157,0.222885,-0.96772
2022-12-31 09:00:00,121.625717,-1.60647,-4.992885,-0.702674,-0.90244,-2.238186,333.051849,-5.608409,-1.048625,-0.414849,...,122.89801,-1.270491,-2.018897,0.366292,-0.067558,-1.348791,-2.189194,0.018459,0.223692,-0.98479


In [16]:
evaluation_1 = data.tail(len(yhat)+1).copy()  #+1 since i need that value for de-differencing
evaluation_1

Unnamed: 0,WDIR_42001,WSPD_42001,PRES_42001,ATMP_42001,WTMP_42001,DEWP_42001,WDIR_42002,WSPD_42002,PRES_42002,ATMP_42002,...,WDIR_42039_ERA5,WSPD_42039_ERA5,ATMP_42039_ERA5,WSPD_42035_ERA5,WSPD_42001_ERA5,DEWP_42020_ERA5,ATMP_42019_ERA5,WTMP_42039_ERA5,WSPD_42002_ERA5,PRES_42039_ERA5
2022-12-30 23:00:00,185.0,5.9,1012.9,25.3,24.2,24.4,322.0,5.5,1011.7,24.7,...,113.871622,10.54264,23.935086,2.632245,5.301299,18.452689,19.872416,24.392457,3.651173,1015.146367
2022-12-31 00:00:00,189.0,5.0,1013.5,25.2,24.3,24.5,333.0,5.6,1012.3,24.6,...,108.235067,10.13632,24.123266,3.286975,5.057694,18.248043,20.009884,24.392457,1.420595,1015.755665
2022-12-31 01:00:00,203.0,5.6,1013.8,24.7,24.3,23.5,359.0,4.6,1012.9,24.6,...,97.574364,9.799587,24.028398,4.023334,5.587926,17.794162,20.081088,24.392457,1.13469,1016.328747
2022-12-31 02:00:00,169.0,2.3,1014.5,24.8,24.3,24.4,18.0,4.2,1013.5,24.7,...,84.710673,10.336202,22.973194,3.117344,5.840792,17.447368,20.151057,24.392457,1.613154,1016.790195
2022-12-31 03:00:00,53.0,6.0,1014.8,22.7,24.4,22.1,51.0,4.0,1014.1,24.5,...,70.760477,11.014376,23.074671,1.771335,6.463279,17.274733,20.047338,24.392457,1.853178,1017.056071
2022-12-31 04:00:00,55.0,8.5,1015.2,22.3,24.4,21.9,69.0,3.1,1014.4,24.3,...,61.68271,9.975791,23.811448,1.62455,6.500897,17.075041,19.827142,24.392457,2.384701,1017.194548
2022-12-31 05:00:00,72.0,7.6,1014.9,22.6,24.3,22.1,89.0,3.7,1014.4,24.3,...,65.365573,9.259451,24.030731,1.979889,6.541557,16.895165,19.694613,24.392457,3.354966,1016.693048
2022-12-31 06:00:00,77.0,7.2,1014.4,23.0,24.4,23.0,118.0,2.6,1014.5,24.4,...,77.495898,8.763196,24.01168,2.840376,6.465415,16.681753,19.66786,24.392457,4.305897,1016.111445
2022-12-31 07:00:00,97.0,7.8,1014.1,23.1,24.4,23.0,151.0,2.8,1014.6,24.5,...,81.234776,9.032894,24.101493,3.88953,6.487022,16.582288,19.685558,24.392457,4.524105,1015.494477
2022-12-31 08:00:00,148.0,3.3,1013.9,24.4,24.4,24.3,195.0,1.2,1014.8,24.6,...,77.326712,9.596466,24.324664,3.860175,6.690521,16.566664,19.631641,24.392457,4.23395,1015.054333


In [17]:
for col in evaluation_1.columns:
    evaluation_1[f"{col}_pred"]= evaluation_1[col].shift(STATIONARY_SHIFT) + yhat_unscaled_df[f"{col}_pred"]

evaluation_1 = evaluation_1.iloc[STATIONARY_SHIFT:]  # remove first n entries since there is no delta value for them
evaluation_1

  evaluation_1[f"{col}_pred"]= evaluation_1[col].shift(STATIONARY_SHIFT) + yhat_unscaled_df[f"{col}_pred"]
  evaluation_1[f"{col}_pred"]= evaluation_1[col].shift(STATIONARY_SHIFT) + yhat_unscaled_df[f"{col}_pred"]
  evaluation_1[f"{col}_pred"]= evaluation_1[col].shift(STATIONARY_SHIFT) + yhat_unscaled_df[f"{col}_pred"]
  evaluation_1[f"{col}_pred"]= evaluation_1[col].shift(STATIONARY_SHIFT) + yhat_unscaled_df[f"{col}_pred"]
  evaluation_1[f"{col}_pred"]= evaluation_1[col].shift(STATIONARY_SHIFT) + yhat_unscaled_df[f"{col}_pred"]


Unnamed: 0,WDIR_42001,WSPD_42001,PRES_42001,ATMP_42001,WTMP_42001,DEWP_42001,WDIR_42002,WSPD_42002,PRES_42002,ATMP_42002,...,WDIR_42039_ERA5_pred,WSPD_42039_ERA5_pred,ATMP_42039_ERA5_pred,WSPD_42035_ERA5_pred,WSPD_42001_ERA5_pred,DEWP_42020_ERA5_pred,ATMP_42019_ERA5_pred,WTMP_42039_ERA5_pred,WSPD_42002_ERA5_pred,PRES_42039_ERA5_pred
2022-12-31 00:00:00,189.0,5.0,1013.5,25.2,24.3,24.5,333.0,5.6,1012.3,24.6,...,219.989481,9.211813,21.908093,2.987593,5.252986,17.023135,17.633711,24.412666,3.882235,1014.196507
2022-12-31 01:00:00,203.0,5.6,1013.8,24.7,24.3,23.5,359.0,4.6,1012.9,24.6,...,435.048818,8.776645,22.091843,3.643971,5.002178,16.80835,17.767244,24.412537,1.637549,1014.811932
2022-12-31 02:00:00,169.0,2.3,1014.5,24.8,24.3,24.4,18.0,4.2,1013.5,24.7,...,388.674003,8.471236,21.996514,4.378156,5.527273,16.392308,17.860164,24.411949,1.358777,1015.37301
2022-12-31 03:00:00,53.0,6.0,1014.8,22.7,24.4,22.1,51.0,4.0,1014.1,24.5,...,245.184672,8.967829,20.94126,3.474529,5.791234,15.990875,17.895816,24.41297,1.815596,1015.85354
2022-12-31 04:00:00,55.0,8.5,1015.2,22.3,24.4,21.9,69.0,3.1,1014.4,24.3,...,366.820474,9.670502,21.04199,2.12722,6.416122,15.819445,17.792384,24.412512,2.077735,1016.121492
2022-12-31 05:00:00,72.0,7.6,1014.9,22.6,24.3,22.1,89.0,3.7,1014.4,24.3,...,309.506929,8.685676,21.781116,1.977419,6.442206,15.697723,17.619538,24.411538,2.626704,1016.230495
2022-12-31 06:00:00,77.0,7.2,1014.4,23.0,24.4,23.0,118.0,2.6,1014.5,24.4,...,84.337619,7.972599,22.007156,2.345224,6.475435,15.526579,17.492434,24.411305,3.587028,1015.718704
2022-12-31 07:00:00,97.0,7.8,1014.1,23.1,24.4,23.0,151.0,2.8,1014.6,24.5,...,213.633959,7.443067,21.986106,3.191609,6.39753,15.299589,17.461221,24.411482,4.526712,1015.142525
2022-12-31 08:00:00,148.0,3.3,1013.9,24.4,24.4,24.3,195.0,1.2,1014.8,24.6,...,419.353307,7.72159,22.079308,4.253671,6.424112,15.190138,17.469879,24.411615,4.74699,1014.526757
2022-12-31 09:00:00,195.0,5.4,1014.0,25.0,24.4,24.6,207.0,2.3,1014.4,24.5,...,200.224722,8.325974,22.305767,4.226467,6.622963,15.217872,17.442447,24.410916,4.457642,1014.069543


In [18]:
wtmp_true = [col for col in evaluation_1.columns if col.startswith("WTMP")][0]

mae = mean_absolute_error(evaluation_1[wtmp_true], evaluation_1[f"{wtmp_true}_pred"])
mse = mean_squared_error(evaluation_1[wtmp_true], evaluation_1[f"{wtmp_true}_pred"])
print('MAE: ', mae)
print('MSE: ', mse)

MAE:  0.9131892790397008
MSE:  0.844276077415207


In [19]:
#evaluation_1.plot(kind='line')

# Recurrent forecast (EXCLUDED FOR NOW!)

In [20]:
# model.reset_states()
# _ = model.predict(train_X_scaled, batch_size=1)

In [21]:
# # make a one-step forecast
# # This function helps with reshaping.
# def single_forecast(model, x):
#     x = x.reshape(1, 1, len(x[0]))
#     yhat = model.predict(x, verbose=0)
#     return yhat

In [22]:
# # Prepare data structure
# prediction_2 = pd.DataFrame(test_y_scaled, columns=output_cols)
# for col in output_cols:
#     prediction_2[f"{col}_pred"] = 0
#
# prediction_2

In [23]:
# #Forecast a whole week
# prev_obs = test_X_scaled[0]
#
# for i, row in prediction_2.iterrows():
#     yhat = single_forecast(model, prev_obs)
#     prev_obs = yhat
#
#     #Mapping of array index and df column name
#     for j, element in enumerate(output_cols):
#         prediction_2.at[i, f"{element}_pred"] =yhat[0,j]
#
# # prediction_2.plot(kind='line')
# prediction_2

In [24]:
# #Reverse differenciate
# first_row = data.iloc[-len(yhat)-1]
#
# yhat = prediction_2[[name + "_pred" for name in output_cols]].values
# yhat_unscaled = DP.invert_scaling(yhat, SCALER)
# yhat_true_value = DP.stationary_to_data(yhat_unscaled, first_row)
#
# yhat_true_value_df = pd.DataFrame(yhat_true_value, columns=[name + "_pred" for name in output_cols])
# yhat_true_value_df.set_index(data.tail(len(yhat)).index, inplace=True)
#
# true_value = data.tail(len(yhat)+1).copy()
#
# evaluation_2 = pd.concat([true_value, yhat_true_value_df], axis=1)
# evaluation_2 = evaluation_2.iloc[STATIONARY_SHIFT:]
# evaluation_2

In [25]:
# wtmp_true = [col for col in evaluation_2.columns if col.startswith("WTMP")][0]
#
# mae_2 = mean_absolute_error(evaluation_2[wtmp_true], evaluation_2[f"{wtmp_true}_pred"])
# mse_2 = mean_squared_error(evaluation_2[wtmp_true], evaluation_2[f"{wtmp_true}_pred"])
# print('MAE: ', mae_2)
# print('MSE: ', mse_2)

In [26]:
#evaluation_2.plot(kind='line')

# SAVE

In [27]:
# create a text input widget for username
filename_widget = widgets.Text(
    value='',
    placeholder='Enter filename',
    description='Filename:',
    disabled=False
)
# add '.csv' to the description
extension_label = widgets.Label('.pickle')

# display the widget
display(widgets.HBox([filename_widget, extension_label]))

print("Please also check if the reports description needs to be changed!")

HBox(children=(Text(value='', description='Filename:', placeholder='Enter filename'), Label(value='.pickle')))

Please also check if the reports description needs to be changed!


In [41]:
report_description="Test #12 GOM, dataset A, LSTM based PINN, Alpha = 0.2"

In [42]:
filename = filename_widget.value
if filename == "":
    print("Enter a valid filename!")

else:
    #Save Data About executed Test:

    # Convert model summary to string
    stringlist = []
    model.summary(print_fn=lambda x: stringlist.append(x))
    model_summary = "\n".join(stringlist)

    report = Experiment(
        name=filename,
        description=report_description,

        stations = dataset["stations"],
        years = dataset["years"],
        nan_threshold=dataset["nan_threshold"],
        features=dataset["features"],
        era5=dataset["add_era5"],

        stationary_shift=STATIONARY_SHIFT,

        n_test_hours=N_TEST_HOURS,

        #stationary=STATIONARY,
        scaler=SCALER,

        model_name = MODEL_NAME,
        model_summary=model_summary,

        one_shot_forecast = evaluation_1,
        recursive_forecast = None   # evaluation_2
    )


    # open a file for writing in binary mode
    filepath = f'data/reports/{report.name}.pickle'
    with open(filepath, 'wb') as f:
        # write the object to the file using pickle.dump()
        pickle.dump(report, f)
        print("File successfully saved:")
        print(filepath)

File successfully saved:
data/reports/report_01.pickle
