#  Event & Regime Detection  
Identify stress periods in the GB power market (2024 replay)


In [1]:
# Add project root to sys.path so 'src' is importable
import sys
from pathlib import Path
sys.path.append(str(Path().resolve().parent)) 

from src.features.regime_flags import add_regime_flags

import pandas as pd

notebook_dir = Path().absolute()
project_root = notebook_dir.parent

# Build absolute path for your data file
DATA_DIR = project_root / "data" / "processed"
DATA_PATH = DATA_DIR / "final_merged_with_features.parquet"

# Load the DataFrame
df = pd.read_parquet(DATA_PATH)

# ── Filter to calendar-year 2024 only ────────────────────────────────────
df = df[(df["datetime"] >= "2024-01-01") & (df["datetime"] < "2025-01-01")].copy()
print("Rows kept for 2024:", len(df))


Rows kept for 2024: 15258


## 1. Quick look at potential drivers  

* Rolling volatility of SBP–MIP spread (`vol_spread_SBP_vs_MIP`)  
* Rolling volatility of TSD forecast error (`vol_err_TSD_%`)  
* Absolute SBP–MIP spread (`|spread_SBP_vs_MIP|`)  


In [2]:
drivers = ["vol_spread_SBP_vs_MIP", "vol_err_TSD_%", "spread_SBP_vs_MIP"]

# If volatility columns are missing, compute 48-period rolling std
for c in ["vol_spread_SBP_vs_MIP", "vol_err_TSD_%"]:
    if c not in df.columns:
        src = "spread_SBP_vs_MIP" if c.startswith("vol_spread") else "err_TSD_%"
        df[c] = df[src].rolling(window=48, min_periods=1).std()

perc_table = (
    df[drivers].abs()
      .quantile([0.90, 0.95, 0.99])
      .T.round(3)
      .rename_axis("driver")
)
display(perc_table)


Unnamed: 0_level_0,0.90,0.95,0.99
driver,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
vol_spread_SBP_vs_MIP,33.461,37.261,55.288
vol_err_TSD_%,0.071,0.078,0.087
spread_SBP_vs_MIP,38.59,50.61,79.053


### 2. Define thresholds  
*We’ll label periods as **High-Vol** if any driver > 95th-percentile,  
and **Extreme** if at least two drivers exceed the 99th-percentile.*


In [3]:
df = add_regime_flags(
    df,
    config={},              
    window=48,              # recompute vol with 1-day window for safety
    perc_95=0.95,
    perc_99=0.99,
)

df[["regime_flag", "is_stress_event"]].head()


Unnamed: 0,regime_flag,is_stress_event
0,NORMAL,0
1,NORMAL,0
2,NORMAL,0
3,NORMAL,0
4,NORMAL,0


In [4]:
print("Regime distribution (2024):")
display(df["regime_flag"].value_counts(dropna=False).to_frame("rows").assign(share=lambda t: (t["rows"]/len(df)).round(3)))


Regime distribution (2024):


Unnamed: 0_level_0,rows,share
regime_flag,Unnamed: 1_level_1,Unnamed: 2_level_1
NORMAL,13177,0.864
HIGH_VOL,2043,0.134
EXTREME,38,0.002


In [5]:
OUT_PATH = Path("data/processed/final_merged_with_regimes.parquet")
OUT_PATH.parent.mkdir(parents=True, exist_ok=True)
df.to_parquet(OUT_PATH, index=False)
print("✅ Saved", OUT_PATH)


✅ Saved data\processed\final_merged_with_regimes.parquet


### 4. Export quick counts for later use



In [6]:
counts_path = Path("data/processed/regime_counts_2024.csv")
df["regime_flag"].value_counts().to_csv(counts_path, header=["rows"])
print("✅ Saved", counts_path)


✅ Saved data\processed\regime_counts_2024.csv
