# Test Environment

In [2]:
import ipywidgets as widgets
from IPython.display import display
import numpy as np
import pandas as pd
#import Models.models as models
#import Models.LSTM.models_LSTM as models
import myLibrary as mL
#from Experiment_Class import Experiment
from sklearn.metrics import mean_squared_error, mean_absolute_error
import pickle
import os

NDBC = mL.NDBC_lib
ERA5 = mL.ERA5_lib
Models = mL.Models
DP = mL.DataProcessor
Experiment = mL.Experiment

%load_ext jupyternotify

<IPython.core.display.Javascript object>

# Get Data

In [3]:
data_directory = os.path.join(os.getcwd(), f'data/datasets/type_A')

def build_UI():

    # Select Model-------------------------------------------------------------------------------
    global datafile_widget
    datafile_list = os.listdir(data_directory)
    datafile_widget = widgets.Select(
        options=datafile_list,
        value=datafile_list[0],
        # rows=10,
        description='Datafile:',
        disabled=False
    )
    display(datafile_widget)

    #STATIONARY_SHIFT -----------------------------------------------------------------------
    global stationaryShift_widget
    stationaryShift_widget = widgets.BoundedIntText(
        value=1,
        min=0,
        max=10,
        step=1,
        description='',
        disabled=False,
    )
    print("Stationary Shilft: ")
    display(stationaryShift_widget)

    # Test Hours-------------------------------------------------------------------------------
    global test_hours_widget
    test_hours_widget = widgets.IntSlider(
        value=24,
        min=0,
        max=1000,
        step=1,
        description='Test Hours:',
        disabled=False,
        continuous_update=False,
        orientation='horizontal',
        readout=True,
        readout_format='d'
    )

    # display the range slider widget
    display(test_hours_widget)
    #print("(1 Week = 168h)")

    # Select Model-------------------------------------------------------------------------------
    global models_widget
    models_list = list(Models.model_dictionary.keys())
    models_widget = widgets.Select(
        options=models_list,
        value=models_list[0],
        rows=10,
        description='Model:',
        disabled=False
    )
    display(models_widget)

    #ALPHA:-------------------------------------------------------------------------------
    # create a FloatSlider widget for a value between 0 and 1
    print("Alpha (only for PINN):")
    global alpha_slider
    alpha_slider = widgets.FloatSlider(
        value=0.5,
        min=0,
        max=1,
        step=0.01,
        description='',
        readout_format='.2f',
        orientation='horizontal',
        layout={'width': '500px'}
    )

    # display the FloatSlider widget
    display(alpha_slider)

build_UI()

Select(description='Datafile:', options=('.DS_Store', 'GOM_2_A.pickle', 'dataset_GOM_1_A_A.pickle', 'GOM_1_A.p…

Stationary Shilft: 


BoundedIntText(value=1, max=10)

IntSlider(value=24, continuous_update=False, description='Test Hours:', max=1000)

Select(description='Model:', options=('LSTM', 'GRU', 'CNN', 'TCN'), rows=10, value='LSTM')

Alpha (only for PINN):


FloatSlider(value=0.5, layout=Layout(width='500px'), max=1.0, step=0.01)

In [4]:
# DATAFILE = datafile_widget.value
# STATIONARY_SHIFT = stationaryShift_widget.value
# N_TEST_HOURS = test_hours_widget.value
# MODEL_NAME = models_widget.value
# ALPHA = alpha_slider.value

## Optional: use hardcoded variables instead

In [7]:
DATAFILE = "dataset_GOM_1_A_A.pickle"
STATIONARY_SHIFT = 1
N_TEST_HOURS = 24
MODEL_NAME = "LSTM"
ALPHA = 0

In [8]:
print(f"Datafile: {DATAFILE}")
print(f"Alpha: {ALPHA}")
print(f"Stationary Shift: {STATIONARY_SHIFT}")
print(f"Test-Hours: {N_TEST_HOURS}")
print(f"Model: {MODEL_NAME}")

Datafile: dataset_GOM_1_A_A.pickle
Alpha: 0
Stationary Shift: 1
Test-Hours: 24
Model: LSTM


In [9]:
with open(f'data/datasets/type_A/{DATAFILE}', 'rb') as f:
    # load the object from the file using pickle.load()
    dataset = pickle.load(f)

print("stations",dataset["stations"])
print("years",dataset["years"])
print("nan_threshold",dataset["nan_threshold"])
print("features",dataset["features"])
print("add_era5",dataset["add_era5"])

data = dataset["data"]
data

stations ['42001', '42002', '42003', '42007', '42012', '42019', '42020', '42035', '42036', '42038', '42039', '42040', '42041', '42055']
years ['2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022']
nan_threshold 0.66
features ['WDIR', 'WSPD', 'WVHT', 'APD', 'MWD', 'PRES', 'ATMP', 'WTMP', 'DEWP']
add_era5 True


Unnamed: 0,WDIR_42001,WSPD_42001,PRES_42001,ATMP_42001,WTMP_42001,DEWP_42001,WDIR_42002,WSPD_42002,PRES_42002,ATMP_42002,...,WDIR_42039_ERA5,WSPD_42039_ERA5,ATMP_42039_ERA5,WSPD_42035_ERA5,WSPD_42001_ERA5,DEWP_42020_ERA5,ATMP_42019_ERA5,WTMP_42039_ERA5,WSPD_42002_ERA5,PRES_42039_ERA5
2002-01-01 00:00:00,66.0,9.3,1017.1,22.3,25.5,16.8,39.0,10.5,1016.1,21.7,...,246.007357,5.756333,13.882608,8.031200,9.867456,10.834305,11.708612,21.781113,9.820263,1019.426223
2002-01-01 01:00:00,66.0,9.3,1017.1,22.3,25.5,16.8,39.0,10.5,1016.1,21.7,...,247.678051,5.579721,14.020573,8.216895,9.782997,10.975658,11.926516,21.781113,10.465795,1019.792677
2002-01-01 02:00:00,67.0,9.4,1017.2,21.9,25.5,16.6,36.0,10.9,1016.1,21.7,...,250.591891,5.582730,14.070538,8.454808,9.517146,11.111871,12.166319,21.781113,11.760698,1019.725358
2002-01-01 03:00:00,69.0,9.1,1017.2,22.4,25.5,16.9,32.0,12.7,1015.9,20.8,...,253.468273,5.633966,14.058979,8.471692,8.911373,11.239089,12.361607,21.781113,11.910608,1019.833394
2002-01-01 04:00:00,70.0,9.0,1017.1,22.5,25.5,16.3,33.0,12.7,1015.8,21.0,...,251.493918,5.638108,13.986641,8.698506,8.481407,11.332468,12.482585,21.781113,11.716782,1019.804620
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-31 19:00:00,195.0,0.5,1015.0,25.6,24.7,24.5,22.0,0.6,1015.9,25.1,...,28.180851,8.602918,22.730194,2.533178,0.601025,19.323104,21.847184,24.392457,1.062895,1016.581840
2022-12-31 20:00:00,210.0,1.0,1015.0,25.3,24.5,24.3,88.0,1.2,1015.4,24.7,...,22.969408,7.734290,22.564177,2.669909,0.979714,19.245361,22.052563,24.392457,1.330008,1016.363260
2022-12-31 21:00:00,231.0,0.6,1014.7,26.4,24.7,24.4,87.0,1.7,1014.9,24.6,...,14.655430,6.485218,22.377552,3.147255,2.067327,19.338348,22.260412,24.392457,1.770915,1016.186435
2022-12-31 22:00:00,18.0,0.7,1014.9,25.4,24.7,24.1,90.0,2.5,1014.9,24.5,...,20.371840,3.294807,24.068445,3.469271,1.554299,18.567779,22.101541,24.885927,2.483432,1017.067575


# Data Processing

## 1. Make data stationary

In [10]:
STATIONARY = True #Set Flag for report
data_stationary = DP.data_to_stationary(data, n = STATIONARY_SHIFT)
data_stationary.head()

  data_stationary[col] = data[col] - data[col].shift(n)  # y = value(i) - value(i-n)
  data_stationary[col] = data[col] - data[col].shift(n)  # y = value(i) - value(i-n)
  data_stationary[col] = data[col] - data[col].shift(n)  # y = value(i) - value(i-n)
  data_stationary[col] = data[col] - data[col].shift(n)  # y = value(i) - value(i-n)


Unnamed: 0,WDIR_42001,WSPD_42001,PRES_42001,ATMP_42001,WTMP_42001,DEWP_42001,WDIR_42002,WSPD_42002,PRES_42002,ATMP_42002,...,WDIR_42039_ERA5,WSPD_42039_ERA5,ATMP_42039_ERA5,WSPD_42035_ERA5,WSPD_42001_ERA5,DEWP_42020_ERA5,ATMP_42019_ERA5,WTMP_42039_ERA5,WSPD_42002_ERA5,PRES_42039_ERA5
2002-01-01 01:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.670694,-0.176612,0.137965,0.185695,-0.084459,0.141353,0.217904,0.0,0.645532,0.366454
2002-01-01 02:00:00,1.0,0.1,0.1,-0.4,0.0,-0.2,-3.0,0.4,0.0,0.0,...,2.91384,0.003009,0.049966,0.237913,-0.265851,0.136213,0.239802,0.0,1.294903,-0.067319
2002-01-01 03:00:00,2.0,-0.3,0.0,0.5,0.0,0.3,-4.0,1.8,-0.2,-0.9,...,2.876383,0.051235,-0.011559,0.016884,-0.605774,0.127218,0.195288,0.0,0.14991,0.108036
2002-01-01 04:00:00,1.0,-0.1,-0.1,0.1,0.0,-0.6,1.0,0.0,-0.1,0.2,...,-1.974355,0.004142,-0.072338,0.226814,-0.429966,0.093379,0.120978,0.0,-0.193825,-0.028773
2002-01-01 05:00:00,-1.0,0.6,-0.6,0.0,-0.1,0.8,9.0,-0.9,-0.2,-0.2,...,-3.817351,-0.199347,-0.181964,0.199733,-0.44634,0.097662,0.066412,0.0,-0.422577,-0.087949


## 2. Transform to supervised problem

In [11]:
data_supervised = DP.data_to_supervised(data_stationary, n_in=3)
data_supervised.head()

Unnamed: 0,WDIR_42001(t-3),WSPD_42001(t-3),PRES_42001(t-3),ATMP_42001(t-3),WTMP_42001(t-3),DEWP_42001(t-3),WDIR_42002(t-3),WSPD_42002(t-3),PRES_42002(t-3),ATMP_42002(t-3),...,WDIR_42039_ERA5(t),WSPD_42039_ERA5(t),ATMP_42039_ERA5(t),WSPD_42035_ERA5(t),WSPD_42001_ERA5(t),DEWP_42020_ERA5(t),ATMP_42019_ERA5(t),WTMP_42039_ERA5(t),WSPD_42002_ERA5(t),PRES_42039_ERA5(t)
2002-01-01 04:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-1.974355,0.004142,-0.072338,0.226814,-0.429966,0.093379,0.120978,0.0,-0.193825,-0.028773
2002-01-01 05:00:00,1.0,0.1,0.1,-0.4,0.0,-0.2,-3.0,0.4,0.0,0.0,...,-3.817351,-0.199347,-0.181964,0.199733,-0.44634,0.097662,0.066412,0.0,-0.422577,-0.087949
2002-01-01 06:00:00,2.0,-0.3,0.0,0.5,0.0,0.3,-4.0,1.8,-0.2,-0.9,...,1.650662,0.179143,-0.247964,0.187775,0.416342,0.074103,0.066053,0.0,-0.400801,0.382198
2002-01-01 07:00:00,1.0,-0.1,-0.1,0.1,0.0,-0.6,1.0,0.0,-0.1,0.2,...,0.094929,0.381179,-0.362064,0.180239,0.504338,0.097662,0.114516,0.0,-0.92308,-0.081434
2002-01-01 08:00:00,-1.0,0.6,-0.6,0.0,-0.1,0.8,9.0,-0.9,-0.2,-0.2,...,-0.23538,0.35871,-0.351624,0.27488,0.330289,0.122506,0.10985,0.0,-1.613417,-0.093921


## train test split

In [12]:
train_X, train_y, test_X, test_y = DP.train_test_split(data_supervised, N_TEST_HOURS)
print("Shapes: ", train_X.shape, train_y.shape, test_X.shape, test_y.shape)

Shapes:  (184052, 3, 104) (184052, 104) (24, 3, 104) (24, 104)


## Normalize the data

In [13]:
# Scale the data
#NORMALIZED = True   #set flag for report

#train_X_scaled, train_y_scaled, test_X_scaled, test_y_scaled, SCALER = DP.scale_data(train_X,
#                                                                                     train_y,
#                                                                                     test_X,
#                                                                                     test_y)

train_X_scaled, train_y_scaled, test_X_scaled, test_y_scaled = train_X, train_y, test_X, test_y #just to exclude scaling for now!




---
# Select Model

In [14]:
print(train_X_scaled.shape)
print(train_y_scaled.shape)

(184052, 3, 104)
(184052, 104)


In [15]:
model = Models.get_model(MODEL_NAME, train_X_scaled, train_y_scaled, ALPHA)


Epoch 1/100


2023-06-27 08:04:20.732709: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 7

----
# One-Shot Forecasting

In [16]:
_ = model.predict(train_X_scaled, batch_size=1)



In [17]:
output_cols = data.columns.tolist()
output_cols

['WDIR_42001',
 'WSPD_42001',
 'PRES_42001',
 'ATMP_42001',
 'WTMP_42001',
 'DEWP_42001',
 'WDIR_42002',
 'WSPD_42002',
 'PRES_42002',
 'ATMP_42002',
 'WTMP_42002',
 'DEWP_42002',
 'WDIR_42003',
 'WSPD_42003',
 'PRES_42003',
 'ATMP_42003',
 'WTMP_42003',
 'DEWP_42003',
 'WDIR_42019',
 'WSPD_42019',
 'PRES_42019',
 'ATMP_42019',
 'WTMP_42019',
 'DEWP_42019',
 'WDIR_42020',
 'WSPD_42020',
 'PRES_42020',
 'ATMP_42020',
 'WTMP_42020',
 'DEWP_42020',
 'WDIR_42035',
 'WSPD_42035',
 'PRES_42035',
 'ATMP_42035',
 'WTMP_42035',
 'DEWP_42035',
 'WDIR_42036',
 'WSPD_42036',
 'PRES_42036',
 'ATMP_42036',
 'DEWP_42036',
 'WDIR_42039',
 'WSPD_42039',
 'PRES_42039',
 'ATMP_42039',
 'WTMP_42039',
 'WDIR_42040',
 'WSPD_42040',
 'PRES_42040',
 'ATMP_42040',
 'WTMP_42040',
 'DEWP_42040',
 'WTMP_42001_ERA5',
 'ATMP_42020_ERA5',
 'ATMP_42001_ERA5',
 'DEWP_42001_ERA5',
 'PRES_42019_ERA5',
 'ATMP_42040_ERA5',
 'PRES_42001_ERA5',
 'WDIR_42035_ERA5',
 'PRES_42003_ERA5',
 'DEWP_42003_ERA5',
 'WTMP_42035_ERA5',


In [18]:
yhat = model.predict(test_X_scaled)
# yhat_unscaled = DP.invert_scaling(yhat, SCALER)
yhat_unscaled = yhat # since test_X_scaled is actually not scaled - lol :D
yhat_unscaled_df = pd.DataFrame(yhat_unscaled, columns=[name + "_pred" for name in output_cols])
yhat_unscaled_df.set_index(data.tail(len(yhat)).index, inplace=True)
yhat_unscaled_df



Unnamed: 0,WDIR_42001_pred,WSPD_42001_pred,PRES_42001_pred,ATMP_42001_pred,WTMP_42001_pred,DEWP_42001_pred,WDIR_42002_pred,WSPD_42002_pred,PRES_42002_pred,ATMP_42002_pred,...,WDIR_42039_ERA5_pred,WSPD_42039_ERA5_pred,ATMP_42039_ERA5_pred,WSPD_42035_ERA5_pred,WSPD_42001_ERA5_pred,DEWP_42020_ERA5_pred,ATMP_42019_ERA5_pred,WTMP_42039_ERA5_pred,WSPD_42002_ERA5_pred,PRES_42039_ERA5_pred
2022-12-31 00:00:00,-0.067665,-0.007836,0.167321,-0.084559,0.104929,-0.031779,0.025267,0.062751,-0.138179,-0.056266,...,16.821949,0.036146,-0.031013,-0.002554,0.008022,-0.023686,-0.008213,-0.002591,-0.002721,0.0408
2022-12-31 01:00:00,-0.432389,0.02764,-0.09848,-0.529826,-0.077289,0.074303,0.637668,0.383176,-0.217658,0.714852,...,19.422321,-0.001101,-0.026467,-0.06593,-0.007728,-0.019003,-0.0484,-0.003061,-0.055443,0.068241
2022-12-31 02:00:00,-0.022016,0.178444,-0.137889,0.302488,-0.022078,0.44641,0.008618,0.435822,0.24358,0.379887,...,10.407141,-0.057829,-0.073988,0.010505,-0.093764,-0.12951,-0.184593,-0.006569,-0.152169,0.126858
2022-12-31 03:00:00,0.056639,0.003211,-0.001394,-0.040182,0.076263,-0.080128,0.039829,-0.136879,0.082483,-0.009333,...,1.354479,-0.00399,-0.051895,-0.048426,-6.9e-05,-0.06814,-0.06352,-0.003771,-0.007381,0.038534
2022-12-31 04:00:00,-0.108752,-0.172973,0.03455,-0.037077,0.100366,-0.076487,0.059895,-0.037153,-0.091535,-0.065216,...,-2.272961,0.016285,-0.020803,-0.029035,-0.003778,0.023807,0.021491,-0.002263,0.004143,0.006051
2022-12-31 05:00:00,-0.271184,0.056969,-0.172108,-0.485183,0.038879,-0.041742,0.755505,0.299332,-0.137994,0.820281,...,-9.409861,-0.001048,0.012667,-0.026081,-0.032857,-0.061659,-0.114619,-0.005504,-0.084066,0.033389
2022-12-31 06:00:00,-0.254871,0.169037,-0.181575,-0.612638,0.024905,-0.022933,0.918392,0.449026,-0.028297,1.141362,...,-4.570562,0.018595,-0.017439,-0.007963,-0.076637,-0.158216,-0.217387,-0.005497,-0.144545,0.091574
2022-12-31 07:00:00,-0.170443,-0.157249,-0.147704,-0.306118,-0.066344,0.014866,0.516033,0.110499,-0.064598,0.578646,...,0.104834,-0.063852,0.020294,-0.066571,-0.001979,-0.078352,-0.091087,-0.000168,0.012301,0.050614
2022-12-31 08:00:00,-0.076196,-0.154466,0.093333,-0.025134,0.094833,-0.023543,0.102461,0.003409,-0.123125,-0.037198,...,-2.806997,-0.008006,0.005213,0.023257,0.002638,-0.021384,-0.020847,-0.002138,0.000918,0.002629
2022-12-31 09:00:00,-0.118099,-0.191943,0.084528,-0.034235,0.094281,-0.040871,0.082747,-0.007529,-0.150594,-0.048964,...,-1.435867,-0.004851,0.00212,-0.015166,-0.001514,0.020367,0.012299,-0.002537,-0.004685,-0.01239


In [None]:
# # Get the list of columns starting with "WDIR"
# wdir_columns = [col for col in yhat_unscaled_df.columns if col.startswith("WDIR")]
#
# # Modify the values in the selected columns
# yhat_unscaled_df[wdir_columns] = yhat_unscaled_df[wdir_columns] % 360
#
# yhat_unscaled_df

In [20]:
evaluation_1 = data.tail(len(yhat)+1).copy()  #+1 since i need that value for de-differencing
evaluation_1

Unnamed: 0,WDIR_42001,WSPD_42001,PRES_42001,ATMP_42001,WTMP_42001,DEWP_42001,WDIR_42002,WSPD_42002,PRES_42002,ATMP_42002,...,WDIR_42039_ERA5,WSPD_42039_ERA5,ATMP_42039_ERA5,WSPD_42035_ERA5,WSPD_42001_ERA5,DEWP_42020_ERA5,ATMP_42019_ERA5,WTMP_42039_ERA5,WSPD_42002_ERA5,PRES_42039_ERA5
2022-12-30 23:00:00,185.0,5.9,1012.9,25.3,24.2,24.4,322.0,5.5,1011.7,24.7,...,113.871622,10.54264,23.935086,2.632245,5.301299,18.452689,19.872416,24.392457,3.651173,1015.146367
2022-12-31 00:00:00,189.0,5.0,1013.5,25.2,24.3,24.5,333.0,5.6,1012.3,24.6,...,108.235067,10.13632,24.123266,3.286975,5.057694,18.248043,20.009884,24.392457,1.420595,1015.755665
2022-12-31 01:00:00,203.0,5.6,1013.8,24.7,24.3,23.5,359.0,4.6,1012.9,24.6,...,97.574364,9.799587,24.028398,4.023334,5.587926,17.794162,20.081088,24.392457,1.13469,1016.328747
2022-12-31 02:00:00,169.0,2.3,1014.5,24.8,24.3,24.4,18.0,4.2,1013.5,24.7,...,84.710673,10.336202,22.973194,3.117344,5.840792,17.447368,20.151057,24.392457,1.613154,1016.790195
2022-12-31 03:00:00,53.0,6.0,1014.8,22.7,24.4,22.1,51.0,4.0,1014.1,24.5,...,70.760477,11.014376,23.074671,1.771335,6.463279,17.274733,20.047338,24.392457,1.853178,1017.056071
2022-12-31 04:00:00,55.0,8.5,1015.2,22.3,24.4,21.9,69.0,3.1,1014.4,24.3,...,61.68271,9.975791,23.811448,1.62455,6.500897,17.075041,19.827142,24.392457,2.384701,1017.194548
2022-12-31 05:00:00,72.0,7.6,1014.9,22.6,24.3,22.1,89.0,3.7,1014.4,24.3,...,65.365573,9.259451,24.030731,1.979889,6.541557,16.895165,19.694613,24.392457,3.354966,1016.693048
2022-12-31 06:00:00,77.0,7.2,1014.4,23.0,24.4,23.0,118.0,2.6,1014.5,24.4,...,77.495898,8.763196,24.01168,2.840376,6.465415,16.681753,19.66786,24.392457,4.305897,1016.111445
2022-12-31 07:00:00,97.0,7.8,1014.1,23.1,24.4,23.0,151.0,2.8,1014.6,24.5,...,81.234776,9.032894,24.101493,3.88953,6.487022,16.582288,19.685558,24.392457,4.524105,1015.494477
2022-12-31 08:00:00,148.0,3.3,1013.9,24.4,24.4,24.3,195.0,1.2,1014.8,24.6,...,77.326712,9.596466,24.324664,3.860175,6.690521,16.566664,19.631641,24.392457,4.23395,1015.054333


In [21]:
for col in evaluation_1.columns:
    evaluation_1[f"{col}_pred"]= evaluation_1[col].shift(STATIONARY_SHIFT) + yhat_unscaled_df[f"{col}_pred"]

evaluation_1 = evaluation_1.iloc[STATIONARY_SHIFT:]  # remove first n entries since there is no delta value for them
evaluation_1

  evaluation_1[f"{col}_pred"]= evaluation_1[col].shift(STATIONARY_SHIFT) + yhat_unscaled_df[f"{col}_pred"]
  evaluation_1[f"{col}_pred"]= evaluation_1[col].shift(STATIONARY_SHIFT) + yhat_unscaled_df[f"{col}_pred"]
  evaluation_1[f"{col}_pred"]= evaluation_1[col].shift(STATIONARY_SHIFT) + yhat_unscaled_df[f"{col}_pred"]
  evaluation_1[f"{col}_pred"]= evaluation_1[col].shift(STATIONARY_SHIFT) + yhat_unscaled_df[f"{col}_pred"]
  evaluation_1[f"{col}_pred"]= evaluation_1[col].shift(STATIONARY_SHIFT) + yhat_unscaled_df[f"{col}_pred"]


Unnamed: 0,WDIR_42001,WSPD_42001,PRES_42001,ATMP_42001,WTMP_42001,DEWP_42001,WDIR_42002,WSPD_42002,PRES_42002,ATMP_42002,...,WDIR_42039_ERA5_pred,WSPD_42039_ERA5_pred,ATMP_42039_ERA5_pred,WSPD_42035_ERA5_pred,WSPD_42001_ERA5_pred,DEWP_42020_ERA5_pred,ATMP_42019_ERA5_pred,WTMP_42039_ERA5_pred,WSPD_42002_ERA5_pred,PRES_42039_ERA5_pred
2022-12-31 00:00:00,189.0,5.0,1013.5,25.2,24.3,24.5,333.0,5.6,1012.3,24.6,...,130.693571,10.578786,23.904074,2.629691,5.309321,18.429003,19.864203,24.389866,3.648452,1015.187167
2022-12-31 01:00:00,203.0,5.6,1013.8,24.7,24.3,23.5,359.0,4.6,1012.9,24.6,...,127.657388,10.135219,24.096799,3.221046,5.049966,18.22904,19.961484,24.389396,1.365152,1015.823906
2022-12-31 02:00:00,169.0,2.3,1014.5,24.8,24.3,24.4,18.0,4.2,1013.5,24.7,...,107.981504,9.741758,23.954411,4.033839,5.494162,17.664652,19.896495,24.385888,0.982521,1016.455605
2022-12-31 03:00:00,53.0,6.0,1014.8,22.7,24.4,22.1,51.0,4.0,1014.1,24.5,...,86.065152,10.332212,22.921299,3.068918,5.840723,17.379228,20.087536,24.388686,1.605773,1016.828729
2022-12-31 04:00:00,55.0,8.5,1015.2,22.3,24.4,21.9,69.0,3.1,1014.4,24.3,...,428.487527,11.030661,23.053869,1.7423,6.459502,17.29854,20.068829,24.390194,1.857321,1017.062122
2022-12-31 05:00:00,72.0,7.6,1014.9,22.6,24.3,22.1,89.0,3.7,1014.4,24.3,...,412.272859,9.974742,23.824115,1.598469,6.468039,17.013382,19.712524,24.386953,2.300635,1017.227937
2022-12-31 06:00:00,77.0,7.2,1014.4,23.0,24.4,23.0,118.0,2.6,1014.5,24.4,...,420.795017,9.278046,24.013292,1.971927,6.46492,16.736949,19.477226,24.38696,3.210421,1016.784622
2022-12-31 07:00:00,97.0,7.8,1014.1,23.1,24.4,23.0,151.0,2.8,1014.6,24.5,...,77.600732,8.699344,24.031973,2.773805,6.463436,16.603402,19.576773,24.392289,4.318198,1016.162059
2022-12-31 08:00:00,148.0,3.3,1013.9,24.4,24.4,24.3,195.0,1.2,1014.8,24.6,...,438.42777,9.024888,24.106706,3.912788,6.48966,16.560904,19.664711,24.390319,4.525023,1015.497106
2022-12-31 09:00:00,195.0,5.4,1014.0,25.0,24.4,24.6,207.0,2.3,1014.4,24.5,...,435.89086,9.591615,24.326784,3.845009,6.689007,16.58703,19.64394,24.38992,4.229266,1015.041943


# Correct wind direction (modulo 360)

In [None]:
# Get the list of columns starting with "WDIR"
wdir_columns = [col for col in evaluation_1.columns if col.startswith("WDIR")]

# Modify the values in the selected columns
evaluation_1[wdir_columns] = evaluation_1[wdir_columns] % 360

evaluation_1

# Recurrent forecast (EXCLUDED FOR NOW!)

In [24]:
# model.reset_states()
# _ = model.predict(train_X_scaled, batch_size=1)

In [25]:
# # make a one-step forecast
# # This function helps with reshaping.
# def single_forecast(model, x):
#     x = x.reshape(1, 1, len(x[0]))
#     yhat = model.predict(x, verbose=0)
#     return yhat

In [26]:
# # Prepare data structure
# prediction_2 = pd.DataFrame(test_y_scaled, columns=output_cols)
# for col in output_cols:
#     prediction_2[f"{col}_pred"] = 0
#
# prediction_2

In [27]:
# #Forecast a whole week
# prev_obs = test_X_scaled[0]
#
# for i, row in prediction_2.iterrows():
#     yhat = single_forecast(model, prev_obs)
#     prev_obs = yhat
#
#     #Mapping of array index and df column name
#     for j, element in enumerate(output_cols):
#         prediction_2.at[i, f"{element}_pred"] =yhat[0,j]
#
# # prediction_2.plot(kind='line')
# prediction_2

In [28]:
# #Reverse differenciate
# first_row = data.iloc[-len(yhat)-1]
#
# yhat = prediction_2[[name + "_pred" for name in output_cols]].values
# yhat_unscaled = DP.invert_scaling(yhat, SCALER)
# yhat_true_value = DP.stationary_to_data(yhat_unscaled, first_row)
#
# yhat_true_value_df = pd.DataFrame(yhat_true_value, columns=[name + "_pred" for name in output_cols])
# yhat_true_value_df.set_index(data.tail(len(yhat)).index, inplace=True)
#
# true_value = data.tail(len(yhat)+1).copy()
#
# evaluation_2 = pd.concat([true_value, yhat_true_value_df], axis=1)
# evaluation_2 = evaluation_2.iloc[STATIONARY_SHIFT:]
# evaluation_2

In [29]:
# wtmp_true = [col for col in evaluation_2.columns if col.startswith("WTMP")][0]
#
# mae_2 = mean_absolute_error(evaluation_2[wtmp_true], evaluation_2[f"{wtmp_true}_pred"])
# mse_2 = mean_squared_error(evaluation_2[wtmp_true], evaluation_2[f"{wtmp_true}_pred"])
# print('MAE: ', mae_2)
# print('MSE: ', mse_2)

In [30]:
#evaluation_2.plot(kind='line')

# SAVE

In [31]:
%%notify -m "Finished!!"
print("reached checkpoint")

reached checkpoint


<IPython.core.display.Javascript object>

In [33]:
# create a text input widget for username
filename_widget = widgets.Text(
    value='',
    placeholder='Enter filename',
    description='Filename:',
    disabled=False
)
# add '.csv' to the description
extension_label = widgets.Label('.pickle')

# display the widget
display(widgets.HBox([filename_widget, extension_label]))

print("Please also check if the reports description needs to be changed!")

HBox(children=(Text(value='', description='Filename:', placeholder='Enter filename'), Label(value='.pickle')))

Please also check if the reports description needs to be changed!


In [34]:
report_description="Test #01_b GOM, dataset A, TCN based PINN, Alpha = 0"

In [35]:
filename = filename_widget.value
if filename == "":
    print("Enter a valid filename!")

else:
    #Save Data About executed Test:

    # Convert model summary to string
    stringlist = []
    model.summary(print_fn=lambda x: stringlist.append(x))
    model_summary = "\n".join(stringlist)

    report = Experiment(
        name=filename,
        description=report_description,

        stations = dataset["stations"],
        years = dataset["years"],
        nan_threshold=dataset["nan_threshold"],
        features=dataset["features"],
        era5=dataset["add_era5"],

        stationary_shift=STATIONARY_SHIFT,

        n_test_hours=N_TEST_HOURS,

        #stationary=STATIONARY,
        scaler= None, # SCALER,

        model_name = MODEL_NAME,
        model_summary=model_summary,

        one_shot_forecast = evaluation_1,
        recursive_forecast = None   # evaluation_2
    )


    # open a file for writing in binary mode
    filepath = f'data/reports/{report.name}.pickle'
    with open(filepath, 'wb') as f:
        # write the object to the file using pickle.dump()
        pickle.dump(report, f)
        print("File successfully saved:")
        print(filepath)

File successfully saved:
data/reports/report_01_b.pickle
