In [2]:

# ===== Fragment 0: Imports, globals, helpers =====
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt

# Глобальные параметры matplotlib (без указания конкретных цветов)
mpl.rcParams.update({
    "font.family": "serif",
    "font.serif": ["Times New Roman", "Times", "DejaVu Serif"],
    "font.size": 16,
    "axes.titlesize": 16,
    "axes.labelsize": 14,
    "xtick.labelsize": 16,
    "ytick.labelsize": 16,
    "legend.fontsize": 16,
    "figure.dpi": 350,
    "savefig.dpi": 350,
    "savefig.bbox": "tight",
    "axes.grid": True,
    "grid.alpha": 0.25,
    "lines.linewidth": 1.6,
    "axes.spines.top": False,
    "axes.spines.right": False,
})

np.random.seed(42)
pd.set_option("display.max_rows", 200)
pd.set_option("display.max_columns", 200)

# ---------- helpers ----------
def add_value_labels(ax, fmt="{:.3g}", vpad=0.01):
    """Подписи над столбцами bar-chart."""
    ymin, ymax = ax.get_ylim()
    vrange = ymax - ymin if ymax > ymin else 1.0
    for rect in ax.patches:
        h = rect.get_height()
        if np.isfinite(h):
            x = rect.get_x() + rect.get_width() / 2.0
            y = h + vpad * vrange
            ax.text(x, y, fmt.format(h), ha="center", va="bottom")

def show_hist(series: pd.Series, title: str, xlabel: str, bins: int = 60,
              figsize=(9, 6), dpi=350):
    """Гистограмма без сохранения — только show()."""
    s = pd.to_numeric(series, errors="coerce").dropna()
    if s.empty:
        return
    fig = plt.figure(figsize=figsize, dpi=dpi)
    plt.hist(s, bins=bins)
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel("Count")
    plt.tight_layout()
    plt.show()

def heatmap_with_ann(mat: np.ndarray, xlabels, ylabels, title: str,
                     figsize=(10, 8), dpi=350, fmt="{:.2f}"):
    """Матрица с подписями значений внутри ячеек."""
    fig = plt.figure(figsize=figsize, dpi=dpi)
    ax = plt.gca()
    im = ax.imshow(mat, aspect="auto")
    plt.colorbar(im)
    ax.set_xticks(range(len(xlabels)))
    ax.set_xticklabels(list(xlabels), rotation=45, ha="right")
    ax.set_yticks(range(len(ylabels)))
    ax.set_yticklabels(list(ylabels))
    ax.set_title(title)
    ax.grid(False)

    # Аннотации с адаптивным цветом шрифта
    nrows, ncols = mat.shape
    norm = plt.Normalize(vmin=np.nanmin(mat), vmax=np.nanmax(mat))
    cmap = im.get_cmap()
    for i in range(nrows):
        for j in range(ncols):
            v = mat[i, j]
            if np.isfinite(v):
                rgba = cmap(norm(v))
                luminance = 0.299*rgba[0] + 0.587*rgba[1] + 0.114*rgba[2]
                ax.text(j, i, fmt.format(v),
                        ha="center", va="center",
                        color=("black" if luminance > 0.5 else "white"))
    plt.tight_layout()
    plt.show()


In [3]:
# ===== Fragment 1: Load dataset =====
# Попробуем несколько вероятных путей — первый, который прочитается, будет использован
CANDIDATES = [
    "original_dataset_china/renewables_combined_FULL.csv",
    "original_dataset_china/renewables_combined_CLEAN.csv",

]
DATA_PATH = None
for p in CANDIDATES:
    try:
        _df = pd.read_csv(p, low_memory=False)
        DATA_PATH = p
        break
    except Exception:
        continue

#if DATA_PATH is None:
    # fallback: пользователь может прописать свой путь здесь вручную
    #DATA_PATH = "renewables_combined_en.csv"
    #_df = pd.read_csv(DATA_PATH, low_memory=False)

df = _df.copy()
print("Loaded:", DATA_PATH)
print("Shape:", df.shape)
print("Columns:", list(df.columns)[:20], "...")

# Время: парсим timestamp (наивно, без UTC-принудиловки)
df["timestamp"] = pd.to_datetime(df.get("timestamp"), errors="coerce")
df["year"]  = df["timestamp"].dt.year
df["month"] = df["timestamp"].dt.month
df["day"]   = df["timestamp"].dt.day
df["hour"]  = df["timestamp"].dt.hour
df["doy"]   = df["timestamp"].dt.dayofyear
df["week"]  = df["timestamp"].dt.isocalendar().week.astype('Int64')

# Сезон, если его нет
if "season_derived" not in df.columns:
    def _season(m: int) -> str:
        if m in (12, 1, 2):  return "winter"
        if m in (3, 4, 5):   return "spring"
        if m in (6, 7, 8):   return "summer"
        return "autumn"
    df["season_derived"] = df["month"].map(_season)

print("Time range:", df["timestamp"].min(), "→", df["timestamp"].max())


Loaded: original_dataset_china/renewables_combined_FULL.csv
Shape: (8753, 18)
Columns: ['datetime_solar', 'hour_index_solar', 'temperature_solar', 'humidity_solar', 'surface_irradiance_solar', 'toa_irradiance_solar', 'kWh_solar_power_solar', 'sheet_solar', 'timestamp', 'specific_humidity_solar', 'relative_humidity_solar', 'datetime_wind', 'hour_index_wind', 'air_density_wind', 'wind_speed_wind', 'kWh_wind_power_wind', 'sheet_wind', 'season_derived'] ...
Time range: 2019-01-01 08:00:00 → 2019-12-31 23:00:00


In [4]:
# ===== Fragment 2: Identify targets & features =====
cols = df.columns.tolist()

def find_power_target(suffix: str):
    # 1) kWt_* приоритетно
    cands = [c for c in cols if c.endswith(suffix) and c.lower().startswith("kwt_")]
    if cands:
        return cands[0]
    # 2) без kWt_
    cands = [c for c in cols if c.endswith(suffix) and ("power" in c.lower())]
    return cands[0] if cands else None

solar_target = find_power_target("_solar")
wind_target  = find_power_target("_wind")

# Списки признаков (robust)
solar_feature_keys = ["irradiance_solar", "surface_irradiance_solar", "toa_irradiance_solar",
                      "temperature_solar", "module_temperature_solar", "humidity_solar",
                      "relative_humidity_solar", "specific_humidity_solar"]
wind_feature_keys  = ["wind_speed_wind", "wind_direction_wind", "air_density_wind"]

solar_feats = [c for c in cols if any(k == c for k in solar_feature_keys)]
wind_feats  = [c for c in cols if any(k == c for k in wind_feature_keys)]

print("Solar target:", solar_target)
print("Wind  target:", wind_target)
print("Solar features:", solar_feats)
print("Wind  features:", wind_feats)


Solar target: kWh_solar_power_solar
Wind  target: kWh_wind_power_wind
Solar features: ['temperature_solar', 'humidity_solar', 'surface_irradiance_solar', 'toa_irradiance_solar', 'specific_humidity_solar', 'relative_humidity_solar']
Wind  features: ['air_density_wind', 'wind_speed_wind']


In [5]:
# ===== Fragment 3: Basic overview =====
print("Rows, Cols:", df.shape)
print("Time coverage:", df["timestamp"].min(), "→", df["timestamp"].max())
print("Season distribution:", df["season_derived"].value_counts(dropna=False).to_dict())

# Дубликаты по времени
if "timestamp" in df.columns:
    dup_count = int(df["timestamp"].duplicated().sum())
    print("Duplicate timestamp rows:", dup_count)

# Топ-20 по пропускам
missing = (
    df.isna().sum().sort_values(ascending=False)
      .to_frame("missing_count")
      .assign(missing_pct=lambda s: (s["missing_count"] / df.shape[0] * 100).round(3))
)
print(missing.head(20))

# Числовые статистики (top-20 строк)
desc = df.select_dtypes(include=[np.number]).describe().T
print(desc.head(20))

# Пять первых строк для визуальной проверки
print(df.head(5))


Rows, Cols: (8753, 24)
Time coverage: 2019-01-01 08:00:00 → 2019-12-31 23:00:00
Season distribution: {'spring': 2209, 'summer': 2208, 'autumn': 2184, 'winter': 2152}
Duplicate timestamp rows: 1
                         missing_count  missing_pct
datetime_solar                       0          0.0
hour_index_solar                     0          0.0
doy                                  0          0.0
hour                                 0          0.0
day                                  0          0.0
month                                0          0.0
year                                 0          0.0
season_derived                       0          0.0
sheet_wind                           0          0.0
kWh_wind_power_wind                  0          0.0
wind_speed_wind                      0          0.0
air_density_wind                     0          0.0
hour_index_wind                      0          0.0
datetime_wind                        0          0.0
relative_humidity_solar   

In [6]:
# --- helpers required by dual plots ---
# гарантируем, что есть hour и month
if "timestamp" in df.columns:
    if "hour" not in df.columns:
        df["hour"] = pd.to_datetime(df["timestamp"], errors="coerce").dt.hour
    if "month" not in df.columns:
        df["month"] = pd.to_datetime(df["timestamp"], errors="coerce").dt.month

def _hourly_means(series: pd.Series):
    s = pd.to_numeric(series, errors="coerce")
    tmp = pd.DataFrame({"val": s, "hour": df["hour"]}).dropna()
    if tmp.empty:
        return None
    return tmp.groupby("hour")["val"].mean()

def _monthly_means(series: pd.Series):
    s = pd.to_numeric(series, errors="coerce")
    tmp = pd.DataFrame({"val": s, "month": df["month"]}).dropna()
    if tmp.empty:
        return None
    # reindex на 1..12, чтобы ось была полной
    return tmp.groupby("month")["val"].mean().reindex(range(1, 13))


In [7]:
# ===== Fragment 11: Physical sanity checks =====
def report_count(name, cond):
    n = int(cond.sum())
    print(f"{name}: {n}")

# Ночные значения для солнца: час ∈ {0..5, 20..23}
if "hour" in df.columns:
    night = df["hour"].isin([0,1,2,3,4,5,20,21,22,23])

    if "surface_irradiance_solar" in df.columns:
        irr = pd.to_numeric(df["surface_irradiance_solar"], errors="coerce")
        report_count("Night irradiance > 0", (irr > 0) & night)

    if solar_target:
        sp = pd.to_numeric(df[solar_target], errors="coerce")
        report_count("Night solar power > 0", (sp > 0) & night)

# Отрицательные значения там, где их быть не должно
if "wind_speed_wind" in df.columns:
    ws = pd.to_numeric(df["wind_speed_wind"], errors="coerce")
    report_count("Negative wind speed", ws < 0)

if solar_target:
    sp = pd.to_numeric(df[solar_target], errors="coerce")
    report_count("Negative solar power", sp < 0)

if wind_target:
    wp = pd.to_numeric(df[wind_target], errors="coerce")
    report_count("Negative wind power", wp < 0)


Night irradiance > 0: 579
Night solar power > 0: 354
Negative wind speed: 0
Negative solar power: 0
Negative wind power: 0


In [8]:
# ===== Fragment 12: Modeling prep (skeleton) =====
# Цель — сформировать X/y и базовые train/val/test с временным разбиением.
# Выполняйте этот блок после EDA — он НЕ обучает модели, только готовит данные.

feature_candidates = []
feature_candidates += solar_feats
feature_candidates += wind_feats
# Дополнительные календарные признаки
for c in ["month", "hour", "doy"]:
    if c in df.columns:
        feature_candidates.append(c)

# Удалим дубликаты и строки без timestamp
m = df.dropna(subset=["timestamp"]).copy()
m = m.drop_duplicates(subset=["timestamp"])

# Формируем X/y по отдельности для солнца и ветра
def make_Xy(data: pd.DataFrame, target_col: str, features: list):
    X = data[features].copy()
    y = pd.to_numeric(data[target_col], errors="coerce")
    # Простая фильтрация пропусков (можно заменить на более тонкую)
    mask = (~y.isna())
    for c in X.columns:
        mask &= ~X[c].isna()
    X = X[mask]
    y = y[mask]
    return X, y

X_solar, y_solar = (None, None)
X_wind,  y_wind  = (None, None)

if solar_target:
    X_solar, y_solar = make_Xy(m, solar_target, [c for c in feature_candidates if c in m.columns])
    print("Solar X/y:", X_solar.shape, y_solar.shape)

if wind_target:
    X_wind, y_wind = make_Xy(m, wind_target, [c for c in feature_candidates if c in m.columns])
    print("Wind  X/y:", X_wind.shape, y_wind.shape)

# Пример временного split (последние 20% на тест)
def time_split(df_like: pd.DataFrame, frac_test=0.2):
    n = len(df_like)
    cut = int(np.floor(n*(1-frac_test)))
    idx = np.arange(n)
    return idx[:cut], idx[cut:]

if X_solar is not None:
    idx_tr, idx_te = time_split(X_solar, 0.2)
    print("Solar split:", len(idx_tr), "train /", len(idx_te), "test")

if X_wind is not None:
    idx_tr, idx_te = time_split(X_wind, 0.2)
    print("Wind split:", len(idx_tr), "train /", len(idx_te), "test")


Solar X/y: (8752, 11) (8752,)
Wind  X/y: (8752, 11) (8752,)
Solar split: 7001 train / 1751 test
Wind split: 7001 train / 1751 test


In [9]:
# ===== Fragment 13: Tables for the paper =====
def seasonal_stats(series: pd.Series, name: str):
    s = pd.to_numeric(series, errors="coerce")
    t = pd.DataFrame({"val": s, "season": df["season_derived"]}).dropna()
    if t.empty:
        return
    g = t.groupby("season")["val"].agg(["count","mean","std","min","max"])
    print(f"\nSeasonal stats — {name}")
    print(g)

def monthly_stats(series: pd.Series, name: str):
    s = pd.to_numeric(series, errors="coerce")
    t = pd.DataFrame({"val": s, "month": df["month"]}).dropna()
    if t.empty:
        return
    g = t.groupby("month")["val"].agg(["count","mean","std","min","max"])
    print(f"\nMonthly stats — {name}")
    print(g)

if solar_target:
    seasonal_stats(df[solar_target], "Solar Power (kW)")
    monthly_stats(df[solar_target], "Solar Power (kW)")

if wind_target:
    seasonal_stats(df[wind_target], "Wind Power (kW)")
    monthly_stats(df[wind_target], "Wind Power (kW)")

# Дополнительно — сводная таблица корреляций с таргетами (top-10)
def table_top_corr(target, k=10, method="pearson"):
    if (target is None) or (target not in df.columns):
        return
    num_df = df.select_dtypes(include=[np.number]).copy()
    corr = num_df.corr(method=method)[target].drop(target).sort_values(ascending=False)
    top = corr.head(k).to_frame(f"corr_with_{target}_{method}")
    print(top)

table_top_corr(solar_target, 10, "pearson")
table_top_corr(solar_target, 10, "spearman")
table_top_corr(wind_target,  10, "pearson")
table_top_corr(wind_target,  10, "spearman")



Seasonal stats — Solar Power (kW)
        count         mean          std  min       max
season                                                
autumn   2184   942.717727  1377.896308  0.0  4238.193
spring   2209  1161.535002  1462.703420  0.0  4500.000
summer   2208   966.957184  1200.696901  0.0  4036.684
winter   2152   721.235507  1237.513954  0.0  4500.000

Monthly stats — Solar Power (kW)
       count         mean          std  min       max
month                                                
1        736   685.842629  1177.053979  0.0  4005.425
2        672  1061.194115  1543.353973  0.0  4500.000
3        744  1050.507254  1467.837257  0.0  4500.000
4        720  1318.606440  1544.096468  0.0  4500.000
5        745  1120.613136  1362.291924  0.0  4300.442
6        720  1144.471549  1320.815891  0.0  4036.684
7        744  1000.190301  1185.015244  0.0  3742.922
8        744   761.935972  1056.473145  0.0  3886.848
9        720  1196.589754  1485.146683  0.0  4197.958
10     

In [10]:
# --- авто-детект имён таргетов (поддержка kW/kWt и разных суффиксов) ---
def _find_target(cols, suffix):
    cands = [c for c in cols if c.lower().endswith(suffix) and ("kw" in c.lower()) and ("power" in c.lower())]
    if cands: return cands[0]
    # запасной вариант: любое "power_*suffix"
    cands = [c for c in cols if c.lower().endswith(suffix) and ("power" in c.lower())]
    return cands[0] if cands else None

cols = df.columns.tolist()
target_solar = _find_target(cols, "_solar")  # напр., 'kW_solar_power_solar' или 'kWt_solar_power_solar'
target_wind  = _find_target(cols, "_wind")   # напр., 'kW_wind_power_wind'  или 'kWt_wind_power_wind'

assert target_solar is not None, "Не найден столбец таргета для солнца"
assert target_wind  is not None, "Не найден столбец таргета для ветра"

# --- кандидаты признаков (оба таргета используют один и тот же пул) ---
base_feature_keys = [
    # солнце
    "surface_irradiance_solar","toa_irradiance_solar","irradiance_solar",
    "temperature_solar","module_temperature_solar",
    "humidity_solar","relative_humidity_solar","specific_humidity_solar",
    # ветер
    "wind_speed_wind","wind_direction_wind","air_density_wind",
    # календарные (если есть)
    "hour","month","doy"
]
features_all = [c for c in base_feature_keys if c in df.columns]

# --- строгая фильтрация пропусков: только строки, где есть ВСЕ features_all и оба таргета ---
m = df.dropna(subset=features_all + [target_solar, target_wind]).copy()

# общий X, отдельные y
X = m[features_all].copy()
y_solar = m[target_solar].astype(float)
y_wind  = m[target_wind].astype(float)

print("Targets:", target_solar, "|", target_wind)
print("Shared features:", features_all)
print("Shapes:", X.shape, y_solar.shape, y_wind.shape)

# --- простой временной split (последние 20% на тест) ---
def time_split(n, frac_test=0.2):
    cut = int(n * (1 - frac_test))
    idx_tr = np.arange(cut)
    idx_te = np.arange(cut, n)
    return idx_tr, idx_te

idx_tr, idx_te = time_split(len(X), 0.2)
Xtr, Xte = X.iloc[idx_tr], X.iloc[idx_te]
ys_tr, ys_te = y_solar.iloc[idx_tr], y_solar.iloc[idx_te]
yw_tr, yw_te = y_wind.iloc[idx_tr],  y_wind.iloc[idx_te]


Targets: kWh_solar_power_solar | kWh_wind_power_wind
Shared features: ['surface_irradiance_solar', 'toa_irradiance_solar', 'temperature_solar', 'humidity_solar', 'relative_humidity_solar', 'specific_humidity_solar', 'wind_speed_wind', 'air_density_wind', 'hour', 'month', 'doy']
Shapes: (8753, 11) (8753,) (8753,)


In [11]:
# ===== 0. Common setup (metrics, splits, CV & report) =====
import numpy as np, pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, explained_variance_score
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import StandardScaler

# ---- безопасные метрики ----
def _mape(y_true, y_pred, eps=1e-8):
    y_true = np.asarray(y_true)
    y_pred = np.asarray(y_pred)
    m = np.abs((y_true - y_pred) / np.clip(np.abs(y_true), eps, None)).mean()
    return m * 100.0

def compute_metrics(y_true, y_pred):
    mse  = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    mae  = mean_absolute_error(y_true, y_pred)
    mape = _mape(y_true, y_pred)
    r2   = r2_score(y_true, y_pred)
    evs  = explained_variance_score(y_true, y_pred)
    return {"MSE":mse, "RMSE":rmse, "MAE":mae, "MAPE%":mape, "R2":r2, "EVS":evs}

# ---- финальный отчёт по фолдам ----
def summarize_cv(cv_metrics_list):
    # cv_metrics_list: list[dict(metric->value)]
    keys = list(cv_metrics_list[0].keys())
    agg = {}
    n = len(cv_metrics_list)
    for k in keys:
        vals = np.array([d[k] for d in cv_metrics_list], float)
        mean = vals.mean()
        std  = vals.std(ddof=1) if n>1 else 0.0
        # 90% CI с t-критерием
        from math import sqrt
        from scipy.stats import t
        alpha = 0.10
        tcrit = t.ppf(1 - alpha/2, df=max(n-1,1))
        margin = tcrit * std / sqrt(n) if n>1 else 0.0
        agg[k] = {"mean":mean, "std":std, "CI90_low":mean-margin, "CI90_high":mean+margin}
    return pd.DataFrame(agg).T

# ---- 80/20 сплит (по времени) ----
def train_test_time_split(y, frac_test=0.2, X_exog=None):
    n = len(y)
    cut = int(np.floor(n*(1-frac_test)))
    sl_tr = slice(0, cut)
    sl_te = slice(cut, n)
    if X_exog is None:
        return (y.iloc[sl_tr], y.iloc[sl_te], None, None)
    else:
        return (y.iloc[sl_tr], y.iloc[sl_te],
                X_exog.iloc[sl_tr] if isinstance(X_exog, pd.DataFrame) else X_exog[sl_tr],
                X_exog.iloc[sl_te]  if isinstance(X_exog, pd.DataFrame) else X_exog[sl_te])

# ---- 5-fold TimeSeriesSplit ----
def folds_time_series(n_splits=5):
    return TimeSeriesSplit(n_splits=n_splits)

# Для сравнения всех моделей:
report_rows = []  # сюда будем добавлять строки отчёта holdout и CV для обеих целей


# **DP-STH: Dual-Purpose Sequence–Temporal Hybrid (LSTM+GRU+causal TCN+causal Transformer) с неопределённостно-взвешенной регрессией и фокальной экстремум-детекцией**

(кратко: **DP-STH**; альтернативы: **Causal-MultiPath ST-Hybrid**, **UW-MTNet**)

## Краткое научное описание

**DP-STH** — мультизадачная каузальная гибридная сеть для **совместного прогнозирования** солнечной и ветровой генерации (2 регрессионные головы) и **детекции экстремумов** этих рядов (2 бинарные головы). Архитектура объединяет четыре **строго каузальные** последовательностные ветви — **LSTM**, **GRU**, dilated **causal TCN** и лёгкий **causal Transformer** — плюс табличную ветвь (MLP+BatchNorm). Такое многоканальное представление устойчиво к различным режимам динамики (медленная/быстрая память, мультишкальные колебания, дальние зависимости) и к шуму.

### Входы и препроцессинг (anti-leakage)

* Признаки: train-only **winsorize** (1–99% по train), затем монотонная **sign-log1p** трансформация (без «фита»), далее **MinMaxScaler** обучается **только на train** и применяется к val/test.
* Последовательности формируются окнами длины *L* с **левым нулевым padding** (без доступа к будущему).
* Плоский экзогенный вход (**flat\_in**) берётся как **последний шаг окна** (i-1), т.е. 100% каузально.
* Метки экстремумов формируются по **верхнему квантилю** (*q* = 0.95) **только на train** и теми же порогами биниризуется test.
* Цели регрессии используются в устойчивом **sign-log1p**-пространстве. Для устойчивой отчётности AUC применяется **safe AUC** (возвращает NaN при одноклассовых фолдах).

### Архитектура (multi-path fusion)

1. **LSTM-ветвь:** `LSTM(64, return_sequences=True) → LSTM(32)`.
2. **GRU-ветвь:** `GRU(64, return_sequences=True) → GRU(32)`.
3. **Causal TCN:** residual-блоки `Conv1D(k=3, padding="causal", dilations=1,2,4,8)` с `LayerNorm` и `Dropout`; агрегация `GlobalAveragePooling1D`.
4. **Лёгкий causal Transformer:** `Conv1D(32)` → `MultiHeadAttention(num_heads=4, use_causal_mask=True)` + residual/`LayerNorm` → `Dense(64) → Dropout → Dense(32)` + residual → `GlobalAveragePooling1D`.
5. **Tabular MLP:** `Dense(32, relu) → BatchNormalization`.

Конкатенация путей → `Dense(128, relu) → Dropout(0.2)` → служебный слой **`LossScaleLayer`** с двумя обучаемыми параметрами `log_var_solar_reg`, `log_var_wind_reg`.

### Выходы и функция потерь

Четыре головы:
`solar_reg`, `wind_reg` (линейные регрессии) и `solar_ext`, `wind_ext` (сигмоидные классификаторы).
Регрессии обучаются по **uncertainty-weighted MSE** (гомоскедастическая неопределённость); экстремумы — по **фокальной BCE** (γ = 2), которая по определению неотрицательна.

Совокупная потеря:

$$
\mathcal{L}=
e^{-\ell_s}\mathrm{MSE}_s+\ell_s+\lambda\ell_s^2+
e^{-\ell_w}\mathrm{MSE}_w+\ell_w+\lambda\ell_w^2+
\mathrm{FBCE}(y^{ext}_s,\hat y^{ext}_s)+
\mathrm{FBCE}(y^{ext}_w,\hat y^{ext}_w),
$$

где $\ell_s=\log\sigma_s^2$, $\ell_w=\log\sigma_w^2$, $\lambda$ — слабая L2-регуляризация на $\ell$ (в коде `l2=1e-4`). Для численной стабильности $\ell$ клиппируются в $[-3,3]$. Оптимизатор — **Adam(0.001)**.

**Интерпретация:** UW-термы автоматически балансируют вклад регрессионных задач без ручного тюнинга весов; фокальная BCE усиливает сигнал редких хвостовых событий и не становится отрицательной.

### Оценивание и метрики

* **Hold-out 80/20** и **TimeSeriesSplit(5)** без перемешивания. На каждом фолде препроцессинг выполняется **с нуля на train**.
* Метрики (на sign-log1p-шкале): **RMSE**, **MAE**, **MAPE** (устойчивый к нулям), **R²**, **EVS**; для экстремумов — **ROC-AUC** (через safe-вариант).
* В CV дополнительно приводятся средние и **90% t-интервалы** по фолдам.

### Новизна и позиционирование

* **Строгая каузальность** во всех ветвях: causal-пэддинг/маска внимания, каузальный flat\_in, train-only пороги.
* **Многоканальная гибридизация** (LSTM+GRU+causal TCN+causal Transformer) покрывает комплиментарные типы зависимостей и частот.
* **Неопределённостно-взвешенная регрессия** устраняет ручной подбор весов между задачами и адаптируется к меняющейся дисперсии ошибок.
* **Фокальная экстремум-детекция** улучшает качество по редким событиям без деградации по массе наблюдений.
* **Reproducibility-friendly:** фиксированные сиды (глобально и по фолдам), стабильные процедуры препроцессинга.

### Практические настройки по умолчанию

*L* = 24, dilations = (1, 2, 4, 8); Dropout: 0.05 (TCN), 0.1 (Transformer), 0.2 (fusion); batch size = 64; epochs = 20–25.


In [None]:
# ===== ONE-FRAGMENT (drop-in replacement) =====
# Новая гибридная модель DP-STH (LSTM+GRU+causal TCN+causal Transformer)
# + uncertainty-weighted MSE ТОЛЬКО для регрессии
# + НЕОТРИЦАТЕЛЬНАЯ фокальная бинарная кроссэнтропия для экстремум-голов
# Формат вывода и функций полностью совпадает с вашим исходным.
# Исправлено: анти-утечки (train-only winsorize/scale, train-only пороги экстремумов),
# безопасный flat_in (последний шаг окна), фикс воспроизводимости и safe AUC.

import numpy as np, pandas as pd
from scipy.stats import mstats, t
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, explained_variance_score, roc_auc_score

import tensorflow as tf
from tensorflow.keras.layers import (Input, Conv1D, LSTM, GRU, Dense, BatchNormalization,
                                     Concatenate, Dropout, GlobalAveragePooling1D,
                                     LayerNormalization, MultiHeadAttention, Add)
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend as K

# --- reproducibility ---
tf.keras.utils.set_random_seed(42)

# -------------------------
# 1) Preprocess & helpers (как у вас — для совместимости форматов)
#    ВНИМАНИЕ: X из preprocess далее ПЕРЕОБРАБАТЫВАЕТСЯ train-only в run_holdout/run_cv,
#    чтобы исключить утечки. y возвращаются в sign-log1p-пространстве и используются как есть.
# -------------------------
def preprocess(df, features_all, y_solar, y_wind):
    X = df[features_all].copy()
    # (оставляем как было, но далее НЕ используем X из preprocess для обучения/валидации)
    for col in features_all:
        X[col] = mstats.winsorize(X[col], limits=[0.01,0.01])
        v = X[col].values
        if v.min() >= 0:
            X[col] = np.log1p(v)
        else:
            X[col] = np.sign(v)*np.log1p(np.abs(v))
    def log_tr(y):
        return np.log1p(y) if (y<0).sum()==0 else np.sign(y)*np.log1p(np.abs(y))
    y1, y2 = log_tr(y_solar), log_tr(y_wind)
    scaler = MinMaxScaler()
    X_scaled = scaler.fit_transform(X.values)
    return X_scaled, y1.values, y2.values

def make_seq(X, L):
    out = []
    for i in range(len(X)):
        seq = X[max(0,i-L):i]
        if len(seq)<L:
            seq = np.pad(seq,((L-len(seq),0),(0,0)),'constant')
        out.append(seq)
    return np.stack(out)

def create_extreme_labels(y,q=0.95):
    thr = np.percentile(y,q*100)
    return (y>thr).astype(int)

def safe_mape(y_true,y_pred,eps=1e-8):
    mask = np.abs(y_true)>eps
    if mask.sum()==0: return np.nan
    return np.mean(np.abs((y_true[mask]-y_pred[mask])/y_true[mask]))*100

def safe_auc(y_true_bin, y_score):
    p = int(np.sum(y_true_bin))
    n = int(len(y_true_bin) - p)
    if p == 0 or n == 0:
        return np.nan
    return roc_auc_score(y_true_bin, y_score)

# вспомогательная монотонная трансформация признаков (без фита)
def _sign_log1p_df(df_):
    A = df_.astype(float).copy()
    return np.sign(A) * np.log1p(np.abs(A))

# -------------------------
# 2) Потери: UW для регрессии (может быть отрицательной суммой — это НОРМА),
#    но экстремумы — только НЕОТРИЦАТЕЛЬНАЯ фокальная BCE.
# -------------------------
class LossScaleLayer(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.log_var_solar_reg = self.add_weight("log_var_solar_reg", initializer="zeros", trainable=True)
        self.log_var_wind_reg  = self.add_weight("log_var_wind_reg",  initializer="zeros", trainable=True)
    def call(self, z):  # важно: слой в графе
        return z

def _u_mse(log_var, l2=1e-4, clip_min=-3.0, clip_max=3.0):
    def loss(y_true, y_pred):
        lv = tf.clip_by_value(log_var, clip_min, clip_max)  # стабилизация
        mse = tf.reduce_mean(tf.square(y_true - y_pred))
        return tf.exp(-lv) * mse + lv + l2*tf.square(lv)   # UW + слабая регуляризация
    return loss

# Фокальная BCE (всегда >=0)
try:
    FocalBCE = tf.keras.losses.BinaryFocalCrossentropy
    focal_bce = FocalBCE(gamma=2.0, reduction=tf.keras.losses.Reduction.SUM_OVER_BATCH_SIZE)
except Exception:
    focal_bce = tf.keras.losses.BinaryCrossentropy()

# -------------------------
# 3) Блоки энкодера: causal TCN + causal self-attn
# -------------------------
def causal_dilated_tcn(x, filters=48, k=3, dilations=(1,2,4,8), dropout=0.05):
    h = Conv1D(filters, 1, padding="same", activation=None)(x)
    for d in dilations:
        y = LayerNormalization()(h)
        y = Conv1D(filters, k, padding="causal", dilation_rate=d, activation="relu")(y)
        y = Dropout(dropout)(y)
        y = Conv1D(filters, 1, padding="same", activation=None)(y)
        h = Add()([h, y])
    return h

def light_transformer_branch(x, d_model=32, nhead=4, ff_dim=64, dropout=0.1):
    xn = LayerNormalization()(x)
    attn = MultiHeadAttention(num_heads=nhead, key_dim=max(1, d_model//nhead))(xn, xn, use_causal_mask=True)
    attn = Dropout(dropout)(attn)
    out1 = Add()([x, attn])
    xn = LayerNormalization()(out1)
    ff = Dense(ff_dim, activation="relu")(xn)
    ff = Dropout(dropout)(ff)
    ff = Dense(d_model)(ff)
    out2 = Add()([out1, ff])
    out2 = GlobalAveragePooling1D()(out2)
    return out2

# -------------------------
# 4) Новая модель: DP-STH + UW MSE (регрессия) + Focal BCE (экстремумы)
#    Имя и сигнатура build_light_hybrid сохранены как у вас.
# -------------------------
def build_light_hybrid(input_shapes):
    seq_in = Input(shape=input_shapes['seq'], name='seq_in')
    flat_in= Input(shape=input_shapes['flat'],name='flat_in')

    # Branch 1: LSTM
    p1 = LSTM(64, return_sequences=True)(seq_in)
    p1 = LSTM(32, return_sequences=False)(p1)

    # Branch 2: GRU
    p2 = GRU(64, return_sequences=True)(seq_in)
    p2 = GRU(32, return_sequences=False)(p2)

    # Branch 3: causal TCN
    tcn = causal_dilated_tcn(seq_in, filters=48, k=3, dilations=(1,2,4,8), dropout=0.05)
    p3 = GlobalAveragePooling1D()(tcn)

    # Branch 4: causal Transformer (light)
    proj = Conv1D(32, 1, padding="same", activation="relu")(seq_in)
    p4 = light_transformer_branch(proj, d_model=32, nhead=4, ff_dim=64, dropout=0.1)

    # Branch 5: flat exogenous
    p5 = Dense(32, activation='relu')(flat_in)
    p5 = BatchNormalization()(p5)

    merged = Concatenate()([p1,p2,p3,p4,p5])
    merged = Dense(128, activation='relu')(merged)
    merged = Dropout(0.2)(merged)

    merged = LossScaleLayer(name="loss_scales")(merged)

    # Outputs
    solar_reg = Dense(1, name='solar_reg')(merged)
    wind_reg  = Dense(1, name='wind_reg')(merged)
    solar_ext = Dense(1, activation='sigmoid', name='solar_ext')(merged)
    wind_ext  = Dense(1, activation='sigmoid', name='wind_ext')(merged)

    model = Model(inputs=[seq_in,flat_in],
                  outputs=[solar_reg,wind_reg,solar_ext,wind_ext],
                  name="DP_STH_UWReg_FocalExt")

    lw = model.get_layer("loss_scales")
    losses = {
        'solar_reg': _u_mse(lw.log_var_solar_reg),
        'wind_reg' : _u_mse(lw.log_var_wind_reg),
        'solar_ext': focal_bce,                
        'wind_ext' : focal_bce,               
    }
    model.compile(optimizer=Adam(0.001), loss=losses)
    return model

# -------------------------
#5) Training/Evaluation (fixed: train-only preprocessing + train-only thresholds)
# -------------------------
def run_holdout(df,features_all,y_solar,y_wind,n_steps=24,epochs=25,batch=64):
    #  we get y in sign-log1p through your preprocess (X is not used)
    _, ys_all, yw_all = preprocess(df,features_all,y_solar,y_wind)

    cut = int(len(df)*0.8)

    # ---- TRAIN-ONLY preprocessing X ----
    Xtr_raw = df.iloc[:cut][features_all].copy()
    Xte_raw = df.iloc[cut:][features_all].copy()

    #  winsorizing by train quantiles
    q_low, q_high = Xtr_raw.quantile(0.01), Xtr_raw.quantile(0.99)
    Xtr_raw = Xtr_raw.clip(lower=q_low, upper=q_high, axis=1)
    Xte_raw = Xte_raw.clip(lower=q_low, upper=q_high, axis=1)

    # sign-log1p without fitting
    Xtr_t = _sign_log1p_df(Xtr_raw).values
    Xte_t = _sign_log1p_df(Xte_raw).values

    # MinMaxScaler
    scaler = MinMaxScaler().fit(Xtr_t)
    Xtr = scaler.transform(Xtr_t)
    Xte = scaler.transform(Xte_t)

    # targets (уже в sign-log1p)
    ys_tr, ys_te = ys_all[:cut], ys_all[cut:]
    yw_tr, yw_te = yw_all[:cut], yw_all[cut:]

    #  sequences
    L = min(n_steps, len(Xtr))
    Xtr_seq, Xte_seq = make_seq(Xtr, L), make_seq(Xte, L)

    # Flat exogenous: SAFE — last step of the window (i-1)
    Xtr_flat = Xtr_seq[:, -1, :]
    Xte_flat = Xte_seq[:, -1, :]

    # train-only thresholds
    thr_s = np.percentile(ys_tr, 95)
    thr_w = np.percentile(yw_tr, 95)
    ys_tr_ext = (ys_tr > thr_s).astype(int)
    ys_te_ext = (ys_te > thr_s).astype(int)
    yw_tr_ext = (yw_tr > thr_w).astype(int)
    yw_te_ext = (yw_te > thr_w).astype(int)

    # model
    model = build_light_hybrid({'seq':(L, Xtr.shape[1]), 'flat':(Xtr.shape[1],)})

    model.fit([Xtr_seq, Xtr_flat],
              {'solar_reg':ys_tr, 'wind_reg':yw_tr,
               'solar_ext':ys_tr_ext, 'wind_ext':yw_tr_ext},
              validation_data=([Xte_seq, Xte_flat],
                               {'solar_reg':ys_te, 'wind_reg':yw_te,
                                'solar_ext':ys_te_ext, 'wind_ext':yw_te_ext}),
              epochs=epochs, batch_size=batch, verbose=1)

    pr = model.predict([Xte_seq, Xte_flat], verbose=0)
    s_pred, w_pred = pr[0].ravel(), pr[1].ravel()
    s_ext,  w_ext  = pr[2].ravel(), pr[3].ravel()

    metrics = {}
    metrics['Solar'] = {"RMSE":np.sqrt(mean_squared_error(ys_te,s_pred)),
                        "MAE":mean_absolute_error(ys_te,s_pred),
                        "MAPE":safe_mape(ys_te,s_pred),
                        "R2":r2_score(ys_te,s_pred),
                        "EVS":explained_variance_score(ys_te,s_pred),
                        "AUC":safe_auc(ys_te_ext,s_ext)}
    metrics['Wind']  = {"RMSE":np.sqrt(mean_squared_error(yw_te,w_pred)),
                        "MAE":mean_absolute_error(yw_te,w_pred),
                        "MAPE":safe_mape(yw_te,w_pred),
                        "R2":r2_score(yw_te,w_pred),
                        "EVS":explained_variance_score(yw_te,w_pred),
                        "AUC":safe_auc(yw_te_ext,w_ext)}
    print("\n=== Holdout 80/20 ===")
    print(pd.DataFrame(metrics).T)
    return metrics

def run_cv(df,features_all,y_solar,y_wind,n_steps=24,epochs=25,batch=64):
    #  we get y in sign-log1p through your preprocess (X is not used)
    _, ys_all, yw_all = preprocess(df,features_all,y_solar,y_wind)

    L_global = min(n_steps, len(df))
    scores_s, scores_w, aucs = [], [], []
    tscv = TimeSeriesSplit(n_splits=5)

    for f, (tr, te) in enumerate(tscv.split(df), 1):
        print(f"\nFOLD {f}")
        K.clear_session()
        tf.keras.utils.set_random_seed(42 + f)

        # ---- TRAIN-ONLY preprocessing X in current fold ----
        Xtr_raw = df.iloc[tr][features_all].copy()
        Xte_raw = df.iloc[te][features_all].copy()

        q_low, q_high = Xtr_raw.quantile(0.01), Xtr_raw.quantile(0.99)
        Xtr_raw = Xtr_raw.clip(lower=q_low, upper=q_high, axis=1)
        Xte_raw = Xte_raw.clip(lower=q_low, upper=q_high, axis=1)

        Xtr_t = _sign_log1p_df(Xtr_raw).values
        Xte_t = _sign_log1p_df(Xte_raw).values

        scaler = MinMaxScaler().fit(Xtr_t)
        Xtr = scaler.transform(Xtr_t)
        Xte = scaler.transform(Xte_t)

        ys_tr, ys_te = ys_all[tr], ys_all[te]
        yw_tr, yw_te = yw_all[tr], yw_all[te]

        L = min(n_steps, len(Xtr))
        Xtr_seq, Xte_seq = make_seq(Xtr, L), make_seq(Xte, L)

        # flat_in
        Xtr_flat = Xtr_seq[:, -1, :]
        Xte_flat = Xte_seq[:, -1, :]

        # train-only thresholds
        thr_s = np.percentile(ys_tr, 95)
        thr_w = np.percentile(yw_tr, 95)
        ys_tr_ext = (ys_tr > thr_s).astype(int)
        ys_te_ext = (ys_te > thr_s).astype(int)
        yw_tr_ext = (yw_tr > thr_w).astype(int)
        yw_te_ext = (yw_te > thr_w).astype(int)

        m = build_light_hybrid({'seq':(L, Xtr.shape[1]), 'flat':(Xtr.shape[1],)})
        m.fit([Xtr_seq, Xtr_flat],
              {'solar_reg':ys_tr, 'wind_reg':yw_tr,
               'solar_ext':ys_tr_ext, 'wind_ext':yw_tr_ext},
              epochs=epochs, batch_size=batch, verbose=1)

        pr = m.predict([Xte_seq, Xte_flat], verbose=1)
        s_pred, w_pred = pr[0].ravel(), pr[1].ravel()
        s_ext,  w_ext  = pr[2].ravel(), pr[3].ravel()

        ms = {"MSE":mean_squared_error(ys_te,s_pred),
              "RMSE":np.sqrt(mean_squared_error(ys_te,s_pred)),
              "MAE":mean_absolute_error(ys_te,s_pred),
              "MAPE":safe_mape(ys_te,s_pred),
              "R2":r2_score(ys_te,s_pred),
              "EVS":explained_variance_score(ys_te,s_pred)}
        mw = {"MSE":mean_squared_error(yw_te,w_pred),
              "RMSE":np.sqrt(mean_squared_error(yw_te,w_pred)),
              "MAE":mean_absolute_error(yw_te,w_pred),
              "MAPE":safe_mape(yw_te,w_pred),
              "R2":r2_score(yw_te,w_pred),
              "EVS":explained_variance_score(yw_te,w_pred)}

        scores_s.append(ms); scores_w.append(mw)
        aucs.append({"AUC_s":safe_auc(ys_te_ext,s_ext),
                     "AUC_w":safe_auc(yw_te_ext,w_ext)})

        print(f"Solar fold {f}: {ms}")
        print(f"Wind  fold {f}: {mw}")

    df_s  = pd.DataFrame(scores_s)
    df_w  = pd.DataFrame(scores_w)
    df_auc= pd.DataFrame(aucs)

    def summarize(dfm):
        n = len(dfm)
        m = dfm.mean()
        sd= dfm.std(ddof=1)
        tcrit = t.ppf(1-0.05, df=n-1)
        low  = m - tcrit*sd/np.sqrt(n)
        high = m + tcrit*sd/np.sqrt(n)
        return pd.concat([m.rename('mean'), sd.rename('std'),
                          low.rename('CI90_low'), high.rename('CI90_high')], axis=1)

    print("\n=== CV Summary Solar ==="); print(summarize(df_s))
    print("\n=== CV Summary Wind ===");  print(summarize(df_w))
    print("\n=== CV Summary AUCs ===");  print(df_auc.mean())
    return df_s, df_w, df_auc

# =====  run  =====
hold = run_holdout(df,features_all,y_solar,y_wind,n_steps=24,epochs=20,batch=64)
cv_s,cv_w,cv_auc = run_cv(df,features_all,y_solar,y_wind,n_steps=24,epochs=20,batch=64)


Epoch 1/20


2025-09-02 11:47:51.490846: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Adam/AssignAddVariableOp.


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

=== Holdout 80/20 ===
           RMSE       MAE       MAPE        R2       EVS       AUC
Solar  1.076866  0.655597  23.426150  0.898312  0.920069  0.986183
Wind   0.652177  0.466370  20.239491  0.822705  0.872466  0.996852

FOLD 1
Epoch 1/20


2025-09-02 11:51:23.710941: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Adam/AssignAddVariableOp.


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Solar fold 1: {'MSE': 1.0749156838510963, 'RMSE': 1.0367814060114582, 'MAE': 0.8576896500076532, 'MAPE': 15.544632292533928, 'R2': 0.9229148317068199, 'EVS': 0.946375659678357}
Wind  fold 1: {'MSE': 0.8189755435807873, 'RMSE': 0.9049726755989859, 'MAE': 0.5543364320226374, 'MAPE': 147.85254026907165, 'R2': 0.728668688844047, 'EVS': 0.7804156596393359}

FOLD 2
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Solar fold 2: {'MSE': 1.5743186541436953, 'RMSE': 1.2547185557501312, 'MAE': 0.8165996652660762, 'MAPE': 18.374142248016554, 'R2': 0.8718939821055657, 'EVS': 0.8944677356492108}
Wind  fol

2025-09-02 11:53:15.666237: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Adam/AssignAddVariableOp.


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Solar fold 3: {'MSE': 0.6830073854844424, 'RMSE': 0.8264426087057966, 'MAE': 0.5400405599633331, 'MAPE': 16.995449946153506, 'R2': 0.9432356539024233, 'EVS': 0.9460304580091136}
Wind  fold 3: {'MSE': 0.5287764847592, 'RMSE': 0.7271701896799676, 'MAE': 0.48078984615592907, 'MAPE': 263.94492494488037, 'R2': 0.8521755839104596, 'EVS': 0.8522515505056617}

FOLD 4
Epoch 1/20


2025-09-02 11:54:57.526972: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Adam/AssignAddVariableOp.


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Solar fold 4: {'MSE': 0.7756578996252853, 'RMSE': 0.8807144256938712, 'MAE': 0.5342157804226838, 'MAPE': 13.6688860397093, 'R2': 0.944121179267931, 'EVS': 0.9456645030948168}
Wind  fold 4: {'MSE': 0.42973131393339103, 'RMSE': 0.655538949211556, 'MAE': 0.37901846490967683, 'MAPE': 72.04646391241538, 'R2': 0.8489642977580875, 'EVS': 0.8525381837275224}

FOLD 5
Epoch 1/20


2025-09-02 11:57:11.728518: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Adam/AssignAddVariableOp.


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Solar fold 5: {'MSE': 0.7025431139220373, 'RMSE': 0.8381784499270053, 'MAE': 0.607861031487071, 'MAPE': 16.142028702796793, 'R2': 0.9373984371446329, 'EVS': 0.9379741450702517}
Wind  fold 5: {'MSE': 0.1699108479807198, 'RMSE': 0.41220243568023684, 'MAE': 0.2895546773876691, 'MAPE': 25.074787324226833, 'R2': 0.9313588307969402, 'EVS': 0.9346564530317483}

=== CV Summary Solar ===
           mean       std   CI90_low  CI90_high
MSE    0.962089  0.376684   0.602962   1.321215
RMSE   0.967367  0.181278   0.794538   1.140196
MAE    0.671281  0.154838   0.523661   0.818902
MAPE  16.145028  1.745274  14.481099  17.808957
R2     0.923913  0.030293   0.895032   0.952794
EVS    0.934103  0.022430   0.912718   0.955487

=== CV Summary Wind ===
            mean        std   CI90_lo