In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# %% load packages
import locale
import sys
import os
import pandas as pd
import numpy as np
import polars as pl
import matplotlib.pyplot as plt
import optuna
import requests
import torch
import random
from sqlalchemy import create_engine,inspect
from pathlib import Path
import urllib.parse
import pyarrow
from calendar import day_abbr
import calendar
from typing import Tuple, Union, Dict, List
from concurrent.futures import ThreadPoolExecutor, as_completed
from pygam import LinearGAM, s
from datetime import datetime


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
from srs.utils.tutor_utils import prepare_dataset_tensor, forecasting_study,\
  plot_daily_profile,plot_hour_comparison, build_multiwindow_experts, tune_ewa_eta, \
  ewa_aggregate_forecasts, compute_error_table, tune_expert_window, \
  run_expert_window_test, build_regression_matrix, SimpleMLP, train_mlp, \
  prepare_train_test_tensors, build_mlp_rolling_forecasts, tune_mlp_hyperparameters, \
  DST_trafo
  
from srs.utils.our_utils import run_forecast_step
from srs.collect_data.setup import setup_seed, get_device
from srs.collect_data.entsoe_data import create_entsoe_engine, get_tables, get_spec, \
  get_market_divisions,get_map_codes,get_map_codes_starting_with, get_resolution_codes, \
    prepare_generation, prepare_load,prepare_price, prepare_unavailability, \
    prepare_filling_rate_hydro, prepare_physical_flow, prepare_installed_capacity
from srs.collect_data.datastream_data import create_datastream_engine, get_tables, \
  prepare_datastream
from srs.collect_data.dwd_mosmix_data import fetch_region_weather, prepare_weather
from srs.collect_data.merge_data import merge_datasets, build_training_dataset



In [4]:
# 1. Transform merged dataset using DST_trafo and prepare training data.

repo_root = Path.cwd().parents[1]
data_no1 = pd.read_csv(repo_root / "data" /'data_no1.csv')
data_t_no1, train_t_no1, train_dates, price_t_no1 = prepare_dataset_tensor(
    repo_root / "data" / "data_no1.csv",
    tz="CET",
    seed=42,
    test_days=2*365,         
    dtype=torch.float64, 
)

data_array = data_t_no1         
price_S    = price_t_no1        
dates_S    = train_dates    

D          = 730            
N          = 2 * 365
S          = 24
WD         = [1, 6, 7]
PRICE_S_LAGS = [1, 2, 7]
da_lag = [0]

#validation period length
length_eval = 2 * 365

# The first obdervation in the evaluation period
begin_eval = data_array.shape[0] - length_eval
dat_eval = data_array[:-N,:,:]
days_eval = pd.to_datetime(dates_S)[:-N]
N_s = length_eval

model_names = [
    "true",
    "expert_ext",
    "linar_gam",
    "light_gbm"
]
n_models = len(model_names)

In [None]:
# 3D tensor to hold forecasts:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
forecasts = torch.full((N_s, S, n_models), float('nan'), dtype=torch.float64, device=device)

init_time = datetime.now()
# 2. Create thread pool
from concurrent.futures import ProcessPoolExecutor, as_completed
with ProcessPoolExecutor(max_workers=os.cpu_count()) as executor:
    futures = [
        executor.submit(
            run_forecast_step,
            n,
            price_S,
            data_array,
            begin_eval,
            D,
            dates_S,
            WD,
            PRICE_S_LAGS,
            da_lag,
            data_no1.columns[1:],  # reg_names
            data_no1.columns[1:]   # data_columns
        )
        for n in range(N_s)
    ]

    for future in as_completed(futures):
        try:
            n, gam = future.result()
            #forecasts[n, :, 1] = torch.tensor(gam, dtype=forecasts.dtype, device=forecasts.device)
            forecasts[n, :, 1] = gam.detach().clone().to(forecasts.dtype).to(forecasts.device)
            #forecasts[n, :, insert_order] = true_price
            #forecasts[n, :, insert_order] = torch.tensor(expert, dtype=forecasts.dtype, device=forecasts.device)
            #forecasts[n, :, insert_order] = torch.tensor(lg_gbm, dtype=forecasts.dtype, device=forecasts.device)
        except Exception as e:
            print(f"Thread crashed: {e}")

# End timing
end_time = datetime.now()
duration_minutes = (end_time - init_time).total_seconds() / 60
print(f"\nParallel training duration (threaded): {duration_minutes:.2f} minutes")

In [6]:
print(data_array.shape )
print(price_S.shape )
print(dates_S.shape )

torch.Size([2193, 24, 10])
torch.Size([2193, 24])
(2193,)


In [7]:
print(data_t_no1.shape)
print(price_t_no1.shape)
print(train_dates.shape)
print(train_t_no1.shape)

torch.Size([2193, 24, 10])
torch.Size([2193, 24])
(2193,)
torch.Size([2193, 24, 10])


In [None]:
# -------------------------------------------------------------------------------------------------------------------------
# estimate results using train and validation datasets
# estimate rmse for all models, validation dataset
true_values = forecasts[:, :, 0] 

# Add a new axis to true_values to allow broadcasting
true_expanded = true_values.unsqueeze(-1) 

# Repeat along last dim
FFT = true_expanded.repeat(1, 1, forecasts.shape[2]) 
squared_errors = (FFT - forecasts) ** 2  


# Average squared error over all days and hours (dim=0 and dim=1)
mse_per_model = squared_errors.mean(dim=(0, 1))

# Take square root to get RMSE per model
rmse_per_model = torch.sqrt(mse_per_model) 
print(rmse_per_model)

# %%#####################################################################
#######################Comparison Plots##################################
#######################################################################

# chart for a specific hour and days
#select the hour, chart for a specific hour
hour = 14
# Select the actual and forecasted prices for the specific hour
true_values = forecasts[:, hour, 0].cpu().numpy()
forecast_values = forecasts[:, hour, 1].cpu().numpy()

#Specify the dates of the test data
dates_x = days_eval[-N:]
# Line plot comparison
plt.figure(figsize=(10, 5))
plt.plot(dates_x, true_values, label="True")
plt.plot(dates_x, forecast_values, label="Forecast (Expert)", alpha=0.7, linewidth=2)
plt.title(f"Forecast vs True Values at hour {hour} Across Test Data")
plt.xlabel("Date")
plt.ylabel("Price")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

#%%
# chart for the last days
# Select last days, chart for one day 
obs =  -15
plt.figure(figsize=(10, 4))
plt.plot(forecasts[obs:, :, 0].flatten().cpu().numpy(), label="True", linewidth=2)
plt.plot(forecasts[obs:, :, 1].flatten().cpu().numpy(), label="Expert Forecast", linestyle="--")
plt.plot(forecasts[obs:, :, 2].flatten().cpu().numpy(), label="GAM", linestyle=":")
plt.plot(forecasts[obs:, :, 3].flatten().cpu().numpy(), label="light gbm", linestyle="--")
plt.xlabel("Hours")
plt.ylabel("Price")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

# %%
# chart for validation data
plt.figure(figsize=(10, 4))
plt.plot(forecasts[:, :, 0].flatten().cpu().numpy(), label="True", linewidth=2)
plt.plot(forecasts[:, :, 1].flatten().cpu().numpy(), label="Expert Forecast", linestyle="--")
plt.plot(forecasts[:, :, 2].flatten().cpu().numpy(), label="GAM", linestyle=":")
plt.plot(forecasts[:, :, 3].flatten().cpu().numpy(), label="light gbm", linestyle="--")
plt.xlabel("Hours")
plt.ylabel("Price")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

#%%#######################################################
############## Test Data ###############
###########################################################

# Use all data including the test data
days_test = pd.to_datetime(dates_S)
dat_test = data_array

#%%#######################################################
##############Define test data###############
##########################################################

#  Define the test period length
length_test = (2 * 365 )

# The first obdervation in the evaluation period
begin_test = days_test.shape[0] - length_test

N_s = length_test

# Initialize a 3D tensor to hold forecasts:
forecasts_test = torch.full((N_s, S, n_models), float('nan'), dtype=torch.float64, device=device)

NameError: name 'n' is not defined