# Electricity Playground

We will use this notebook to display the power of information theory for energy market analysis. We can use the following data set of days-ahead spot electricity prices in Switzerland. 

In [None]:
import pandas as pd

df = pd.read_csv("/Users/fdolci/projects/entropy_lab/data/ogd106_preise_strom_boerse.csv")
df.columns = ["date", "price"]
df.head()

We will use the KL divergence to compare each year (or month) against a reference period. We expect to observe:
* calm years -> low KL
* crisis/volatility years -> high KL
* market normalization -> KL decreases again

In [None]:
BASELINE_START = "2017-01-01"
BASELINE_END   = "2017-12-31"
GROUP_FREQ = "Y"
N_BINS = 40
EPSILON = 1e-80

In [None]:
# Some data cleaning
df = df.copy()
df["date"] = pd.to_datetime(df["date"], errors="coerce")
df["price"] = pd.to_numeric(df["price"], errors="coerce")
df = df.dropna(subset=["date", "price"]).sort_values("date")
df.head()

We now can build our baseline distribution:

In [None]:
import numpy as np

baseline_mask = (df["date"] >= BASELINE_START) & (df["date"] <= BASELINE_END)
baseline_prices = df.loc[baseline_mask, "price"].values

if len(baseline_prices) == 0:
    raise ValueError("No baseline data found. Check BASELINE_START/END and date parsing.")

# Fixed bins across all data (important)
p_min = df["price"].min()
p_max = df["price"].max()

# Add a small margin so edge values don't sit exactly on the boundary
margin = 0.02 * (p_max - p_min if p_max > p_min else 1.0)
bins = np.linspace(p_min - margin, p_max + margin, N_BINS + 1)

# Reference histogram q
q_hist, _ = np.histogram(baseline_prices, bins=bins)
q_ref = q_hist.astype(float) + EPSILON
q_ref /= q_ref.sum()

We can now compute the KL by period

In [None]:
from entropy_lab.measures.entropy import compute_kl_divergence
import numpy as np 

# Create period labels
df["period"] = df["date"].dt.to_period(GROUP_FREQ)

rows = []
for period, g in df.groupby("period"):
    prices = g["price"].values
    if len(prices) < 5:
        continue  # skip tiny groups

    p_hist, _ = np.histogram(prices, bins=bins)
    p_dist = p_hist.astype(float) + EPSILON
    p_dist /= p_dist.sum()

    kl = compute_kl_divergence(p_dist, q_ref, base=2)

    rows.append({
        "period": str(period),
        "start_date": g["date"].min(),
        "mean_price": np.mean(prices),
        "median_price": np.median(prices),
        "std_price": np.std(prices),
        "min_price": np.min(prices),
        "max_price": np.max(prices),
        "n_obs": len(prices),
        "kl_vs_baseline_bits": kl
    })

res = pd.DataFrame(rows).sort_values("start_date").reset_index(drop=True)

In [None]:
res

And now we can plot the whole:

In [None]:
import matplotlib.pyplot as plt

fig, axes = plt.subplots(
    2, 1, figsize=(13, 7), sharex=True,
    gridspec_kw={"height_ratios": [2.2, 1.2]}
)

# Top panel: original daily prices
axes[0].plot(df["date"], df["price"], linewidth=1.2)
axes[0].set_ylabel("Day-ahead price")
axes[0].set_title("Swiss Day-Ahead Power Prices and KL Divergence vs Baseline")
axes[0].grid(True, alpha=0.3)

# Baseline highlighted
axes[0].axvspan(pd.to_datetime(BASELINE_START), pd.to_datetime(BASELINE_END), alpha=0.15)
axes[0].text(
    pd.to_datetime(BASELINE_START),
    axes[0].get_ylim()[1] * 0.9,
    "Baseline period",
    fontsize=10
)

# Bottom panel: KL divergence
axes[1].plot(res["start_date"], res["kl_vs_baseline_bits"], linewidth=2)
axes[1].axhline(0, linewidth=1)
axes[1].set_ylabel("KL [bits]")
axes[1].set_xlabel("Date")
axes[1].grid(True, alpha=0.3)

# Baseline highlighted
axes[1].axvspan(pd.to_datetime(BASELINE_START), pd.to_datetime(BASELINE_END), alpha=0.15)

plt.tight_layout()
plt.show()