In [3]:
# load the 2023 balance market data and check distribution
import os
import sys
from pathlib import Path

# Paths
UPREG_ROOT = Path.cwd().parent  # notebooks -> upreg_classify
REPO_ROOT = UPREG_ROOT.parent   # repo root
sys.path.insert(0, str(UPREG_ROOT))  # allow `from src...`


from src.data.preprocess import ensure_datetime_index
data_path = REPO_ROOT / 'upreg_classify' / 'data' / 'raw' / 'balancing' / 'BalanceMarket_2025_SE2_EUR_None_MW.csv'
import pandas as pd
df = pd.read_csv(data_path, delimiter=';')
df.rename(columns={"Delivery Start (CET)": "Time"}, inplace=True)
df = ensure_datetime_index(df, 'Time', fmt='%d.%m.%Y %H:%M:%S')
df = df[~df.index.duplicated(keep='first')]

# Up/down activation volumes at t
df['Activation Volume'] = pd.to_numeric(df['SE2 Activated Up Volume (MW)'], errors='coerce')
df['Activated Down Volume'] = pd.to_numeric(df['SE2 Activated Down Volume (MW)'], errors='coerce')
df['Activated'] = df['Activation Volume'].fillna(0).gt(0)

up_count = df['Activation Volume'].gt(0).sum()
down_count = df['Activated Down Volume'].gt(0).sum()
none_count = (~df['Activation Volume'].gt(0) & ~df['Activated Down Volume'].gt(0)).sum()

distribution = {
    'up': up_count,
    'down': down_count,
    'none': none_count
}
print(distribution)




{'up': np.int64(11084), 'down': np.int64(14254), 'none': np.int64(2517)}
