In [16]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [17]:
# %% load packages
import locale
import sys
import os
import pandas as pd
import numpy as np
import polars as pl
import matplotlib.pyplot as plt
import optuna
import requests
import torch
import random
from sqlalchemy import create_engine,inspect
from pathlib import Path
import urllib.parse
import pyarrow
from calendar import day_abbr
import calendar
from typing import Tuple, Union, Dict, List
from concurrent.futures import ThreadPoolExecutor, as_completed
from pygam import LinearGAM, s
from datetime import datetime


In [18]:
from srs.utils.tutor_utils import prepare_dataset_tensor, forecasting_study,\
  plot_daily_profile,plot_hour_comparison, build_multiwindow_experts, tune_ewa_eta, \
  ewa_aggregate_forecasts, compute_error_table, tune_expert_window, \
  run_expert_window_test, build_regression_matrix, SimpleMLP, train_mlp, \
  prepare_train_test_tensors, build_mlp_rolling_forecasts, tune_mlp_hyperparameters, \
  DST_trafo
  
from srs.utils.our_utils import run_forecast_step
from srs.collect_data.setup import setup_seed, get_device
from srs.collect_data.entsoe_data import create_entsoe_engine, get_tables, get_spec, \
  get_market_divisions,get_map_codes,get_map_codes_starting_with, get_resolution_codes, \
    prepare_generation, prepare_load,prepare_price, prepare_unavailability, \
    prepare_filling_rate_hydro, prepare_physical_flow, prepare_installed_capacity
from srs.collect_data.datastream_data import create_datastream_engine, get_tables, \
  prepare_datastream
from srs.collect_data.dwd_mosmix_data import fetch_region_weather, prepare_weather
from srs.collect_data.merge_data import merge_datasets, build_training_dataset



In [19]:
# 1. Transform merged dataset using DST_trafo and prepare training data.

repo_root = Path.cwd().parents[1]
data_no1 = pd.read_csv(repo_root / "data" /'data_no1.csv')
data_t_no1, train_t_no1, train_dates, price_t_no1 = prepare_dataset_tensor(
    repo_root / "data" / "data_no1.csv",
    tz="CET",
    seed=42,
    test_days=2*365,         
    dtype=torch.float64, 
)

data_array = data_t_no1         
price_S    = price_t_no1        
dates_S    = train_dates    

D          = 730            
S          = 24
WD         = [1, 6, 7]
PRICE_S_LAGS = [1, 2, 7]
da_lag = [0]

#validation period length
length_eval = 2 * 365

# The first obdervation in the evaluation period
begin_eval = data_array.shape[0] - length_eval

N_s = length_eval

model_names = [
    "true",
    "expert_ext",
    "linar_gam",
    "light_gbm"
]
n_models = len(model_names)

# 3D tensor to hold forecasts:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
forecasts = torch.full((N_s, S, n_models), float('nan'), dtype=torch.float64, device=device)

In [21]:
# 2. Create thread pool
init_time = datetime.now()
with ThreadPoolExecutor() as executor:
    futures = [
        executor.submit(
            run_forecast_step,
            n,
            price_S,
            data_array,
            begin_eval,
            D,
            dates_S,
            WD,
            PRICE_S_LAGS,
            da_lag,
            data_no1.columns[1:],  # reg_names
            data_no1.columns[1:]   # data_columns
        )
        for n in range(N_s)
    ]

    for future in as_completed(futures):
        try:
            n, gam = future.result()
            forecasts[n, :, 1] = torch.tensor(gam, dtype=forecasts.dtype, device=forecasts.device)
            #forecasts[n, :, insert_order] = true_price
            #forecasts[n, :, insert_order] = torch.tensor(expert, dtype=forecasts.dtype, device=forecasts.device)
            #forecasts[n, :, insert_order] = torch.tensor(lg_gbm, dtype=forecasts.dtype, device=forecasts.device)
        except Exception as e:
            print(f"Thread crashed: {e}")

# End timing
end_time = datetime.now()
duration_minutes = (end_time - init_time).total_seconds() / 60
print(f"\nParallel training duration (threaded): {duration_minutes:.2f} minutes")

START NS: 0  index: 1463  data shape: torch.Size([2193, 24, 10])
START NS: 1  index: 1464  data shape: torch.Size([2193, 24, 10])
START NS: 2  index: 1465  data shape: torch.Size([2193, 24, 10])
START NS: 3  index: 1466  data shape: torch.Size([2193, 24, 10])
START NS: 4  index: 1467  data shape: torch.Size([2193, 24, 10])
START NS: 5  index: 1468  data shape: torch.Size([2193, 24, 10])
START NS: 6  index: 1469  data shape: torch.Size([2193, 24, 10])
START NS: 7  index: 1470  data shape: torch.Size([2193, 24, 10])
START NS: 8  index: 1471  data shape: torch.Size([2193, 24, 10])
START NS: 9  index: 1472  data shape: torch.Size([2193, 24, 10])
START NS: 10  index: 1473  data shape: torch.Size([2193, 24, 10])
START NS: 11  index: 1474  data shape: torch.Size([2193, 24, 10])
END NS: 10
START NS: 12  index: 1475  data shape: torch.Size([2193, 24, 10])
Thread crashed: not enough values to unpack (expected 5, got 3)
END NS: 3
START NS: 13  index: 1476  data shape: torch.Size([2193, 24, 10])
T

KeyboardInterrupt: 

END NS: 42
START NS: 54  index: 1517  data shape: torch.Size([2193, 24, 10])
END NS: 43
START NS: 55  index: 1518  data shape: torch.Size([2193, 24, 10])
END NS: 44START NS: 56  index: 1519  data shape: torch.Size([2193, 24, 10])
END NS: 45
START NS: 57  index: 1520  data shape: torch.Size([2193, 24, 10])
END NS: 46
START NS: 58  index: 1521  data shape: torch.Size([2193, 24, 10])
END NS: 48
START NS: 59  index: 1522  data shape: torch.Size([2193, 24, 10])
END NS: 47
START NS: 60  index: 1523  data shape: torch.Size([2193, 24, 10])
END NS: 49
START NS: 61  index: 1524  data shape: torch.Size([2193, 24, 10])
END NS: 51
START NS: 62  index: 1525  data shape: torch.Size([2193, 24, 10])
END NS: 50
START NS: 63  index: 1526  data shape: torch.Size([2193, 24, 10])
END NS: 52
START NS: 64  index: 1527  data shape: torch.Size([2193, 24, 10])
END NS: 53
START NS: 65  index: 1528  data shape: torch.Size([2193, 24, 10])
END NS: 54
START NS: 66  index: 1529  data shape: torch.Size([2193, 24, 10])


In [9]:
print(data_array.shape )
print(price_S.shape )
print(dates_S.shape )

torch.Size([2193, 24, 10])
torch.Size([2193, 24])
(2193,)


In [20]:
print(data_t_no1.shape)
print(price_t_no1.shape)
print(train_dates.shape)
print(train_t_no1.shape)

torch.Size([2193, 24, 10])
torch.Size([2193, 24])
(2193,)
torch.Size([2193, 24, 10])
