## Trading costs 

In [None]:
"""
jepx_loader.py  —  v2
======================
• load_curve(...)          # unchanged behaviour for a single day
• load_curves(...)         # NEW: multi-day panel loader

Classes
-------
BidCurve        : one trading day   (unchanged)
MultiBidCurve   : many trading days (new)
"""
from __future__ import annotations
import re
from dataclasses import dataclass, field
from pathlib import Path
from typing import Mapping, Iterable, Dict, Union

import numpy as np
import pandas as pd

# ---------------------------------------------------------------------
# regex helpers to locate the repeating column triplets
_PRICE_RE  = re.compile(r"^PriceBin_(\d+)$")
_SUPPLY_RE = re.compile(r"^SupplyCumVol_(\d+)$")
_DEMAND_RE = re.compile(r"^DemandCumVol_(\d+)$")


# ---------------------------------------------------------------------
def _read_any(path: str | Path, **kwargs) -> pd.DataFrame:
    p = Path(path)
    if p.suffix.lower() in {".xlsx", ".xls"}:
        return pd.read_excel(p, **kwargs)
    return pd.read_csv(p, **kwargs)


def _find_triplets(df: pd.DataFrame):
    price, supply, demand = {}, {}, {}
    for col in df.columns:
        if m := _PRICE_RE.match(col):
            price[int(m.group(1))] = col
        elif m := _SUPPLY_RE.match(col):
            supply[int(m.group(1))] = col
        elif m := _DEMAND_RE.match(col):
            demand[int(m.group(1))] = col

    bins = sorted(set(price) & set(supply) & set(demand))
    if not bins:
        raise ValueError("Could not locate PriceBin_/SupplyCumVol_/DemandCumVol_ columns")
    return bins, price, supply, demand


# =====================================================================
#  Single-day container  (unchanged from v1, trimmed for brevity)
# =====================================================================
@dataclass
class BidCurve:
    region: str
    date: pd.Timestamp
    bins: np.ndarray                       # ¥/kWh price grid
    supply: pd.DataFrame                   # 48×N_bins (index = 1-48)
    demand: pd.DataFrame
    df_raw: pd.DataFrame
    _long_cache: pd.DataFrame | None = field(default=None, init=False, repr=False)

    def slice_time(self, time_code: int) -> pd.DataFrame:
        """Return a (price_bin × 2) DataFrame for one 30-min slot."""
        if not (1 <= time_code <= 48):
            raise ValueError("time_code must be 1–48")
        row = time_code - 1
        return pd.DataFrame(
            {"supply_cum": self.supply.iloc[row].values,
             "demand_cum": self.demand.iloc[row].values},
            index=self.bins,
        )

    def to_long(self) -> pd.DataFrame:
        if self._long_cache is not None:
            return self._long_cache

        df_sup = (
            self.supply.assign(side="supply")
            .stack().rename("cum_vol")
            .reset_index(names=["time_code", "price"])
        )
        df_dem = (
            self.demand.assign(side="demand")
            .stack().rename("cum_vol")
            .reset_index(names=["time_code", "price"])
        )
        long = pd.concat([df_sup, df_dem])
        long["date"] = self.date
        long["region"] = self.region
        self._long_cache = long[["date", "region", "time_code", "side", "price", "cum_vol"]]
        return self._long_cache

    def __repr__(self):
        return f"<BidCurve {self.region} {self.date.date()} (48 × {len(self.bins)} bins)>"


# =====================================================================
#  MULTI-day container
# =====================================================================
@dataclass
class MultiBidCurve:
    """Panel of many days (index = exact 30-minute timestamps)."""
    region: str
    bins: np.ndarray
    supply: pd.DataFrame                   # index = pd.DatetimeIndex, columns = price bins
    demand: pd.DataFrame
    df_raw: pd.DataFrame
    _long_cache: pd.DataFrame | None = field(default=None, init=False, repr=False)

    # ----- convenience getters ----------------------------------------
    @property
    def dates(self) -> pd.DatetimeIndex:
        return self.supply.index.normalize().unique()

    def slice_time(self, ts: Union[str, pd.Timestamp]) -> pd.DataFrame:
        """Supply & demand at a specific timestamp (30-min boundary)."""
        ts = pd.to_datetime(ts)
        row_sup = self.supply.loc[ts]
        row_dem = self.demand.loc[ts]
        return pd.DataFrame({"supply_cum": row_sup.values,
                             "demand_cum": row_dem.values},
                            index=self.bins)

    def slice_day(self, date: Union[str, pd.Timestamp]) -> BidCurve:
        """Return the familiar BidCurve for one trading day."""
        d = pd.to_datetime(date).normalize()
        mask = self.supply.index.normalize() == d
        if mask.sum() != 48:
            raise ValueError(f"Expected 48 rows for {d.date()}, got {mask.sum()}")
        sup_day = self.supply.loc[mask].reset_index(drop=True)
        dem_day = self.demand.loc[mask].reset_index(drop=True)
        return BidCurve(
            region=self.region,
            date=d,
            bins=self.bins,
            supply=sup_day,
            demand=dem_day,
            df_raw=self.df_raw.loc[mask].reset_index(drop=True),
        )

    def to_long(self) -> pd.DataFrame:
        if self._long_cache is not None:
            return self._long_cache

        def _stack(df: pd.DataFrame, side: str):
            out = (
                df.assign(side=side)
                .stack().rename("cum_vol")
                .reset_index(names=["timestamp", "price"])
            )
            return out

        long = pd.concat([_stack(self.supply, "supply"),
                          _stack(self.demand, "demand")])
        long["region"] = self.region
        long["date"] = long["timestamp"].dt.normalize()
        long["time_code"] = (
            (long["timestamp"].dt.hour * 60 + long["timestamp"].dt.minute) // 30 + 1
        )
        self._long_cache = long[["date", "region", "time_code", "timestamp",
                                 "side", "price", "cum_vol"]]
        return self._long_cache

    # ---------------------------------------------------------------
    def __repr__(self):
        n_days = len(self.dates)
        return (f"<MultiBidCurve {self.region} [{n_days} days, "
                f"{len(self.bins)} bins]>")


# =====================================================================
#  PUBLIC LOADERS
# =====================================================================
def _extract_blocks(
    df: pd.DataFrame,
    bins: Iterable[int],
    price_cols: Mapping[int, str],
    supply_cols: Mapping[int, str],
    demand_cols: Mapping[int, str],
) -> tuple[np.ndarray, pd.DataFrame, pd.DataFrame]:
    price_bins = df.loc[df.index[0], [price_cols[i] for i in bins]].astype(float).values
    supply = (
        df[[supply_cols[i] for i in bins]]
        .astype(float)
        .rename(columns=dict(zip(supply_cols.values(), price_bins)))
    )
    demand = (
        df[[demand_cols[i] for i in bins]]
        .astype(float)
        .rename(columns=dict(zip(demand_cols.values(), price_bins)))
    )
    return price_bins, supply, demand


def load_curve(
    path: str | Path,
    *,
    region: str,
    date: str | pd.Timestamp,
    time_col: str = "Time Code",
    **read_kwargs,
) -> BidCurve:
    """
    Original one-day loader (unchanged interface).
    """
    df = _read_any(path, **read_kwargs)
    bins, price_cols, supply_cols, demand_cols = _find_triplets(df)

    # sanity: row count
    if len(df) != 48:
        raise ValueError(f"File contains {len(df)} rows, expected exactly 48. "
                         f"Use load_curves() for multi-day files.")

    price_bins, supply, demand = _extract_blocks(df, bins,
                                                 price_cols, supply_cols, demand_cols)

    return BidCurve(
        region=region,
        date=pd.to_datetime(date),
        bins=price_bins,
        supply=supply.reset_index(drop=True),
        demand=demand.reset_index(drop=True),
        df_raw=df,
    )


def load_curves(
    path: str | Path,
    *,
    region: str,
    date_col: str = "Date",
    time_col: str = "Time Code",
    tz: str = "Asia/Tokyo",
    **read_kwargs,
) -> MultiBidCurve:
    """
    NEW multi-day loader.
    • `date_col` must hold the delivery date (YYYY-MM-DD or similar).
    • `time_col` should be the 1-to-48 code inside each day.
    Rows need **not** be perfectly ordered; duplicates are dropped.
    """
    df = _read_any(path, **read_kwargs).copy()
    if date_col not in df.columns or time_col not in df.columns:
        raise ValueError(f"Expected columns '{date_col}' and '{time_col}'")

    # build timestamp index ------------------------------------------------
    df[date_col] = pd.to_datetime(df[date_col])
    df[time_col] = df[time_col].astype(int)

    df["timestamp"] = (
        df[date_col]
        + pd.to_timedelta((df[time_col] - 1) * 30, unit="m")
    ).dt.tz_localize(tz)

    df = df.set_index("timestamp").sort_index()
    df = df[~df.index.duplicated(keep="first")]       # drop accidental dups

    bins, price_cols, supply_cols, demand_cols = _find_triplets(df)

    price_bins, supply_block, demand_block = _extract_blocks(
        df, bins, price_cols, supply_cols, demand_cols
    )

    # supply_block / demand_block retain same index as df (timestamp)
    return MultiBidCurve(
        region=region,
        bins=price_bins,
        supply=supply_block,
        demand=demand_block,
        df_raw=df,
    )


# ---------------------------------------------------------------------
#  CLI smoke-test
# ---------------------------------------------------------------------
if __name__ == "__main__":
    import sys
    file_path = sys.argv[1]
    curves = load_curves(file_path, region="Kanto", date_col="Date")
    print(curves)                     # e.g. <MultiBidCurve Kanto [365 days, 1001 bins]>
    print("First slot:", curves.slice_time(curves.supply.index[0]).head())
    print("2025-07-07 clearing object:", curves.slice_day("2025-07-07"))


In [None]:
import jepx_loader as jl

# load a whole-year CSV for the Tokyo area
panel = jl.load_curves("jepx_2024_tokyo.csv", region="Tokyo")

# 1) Access a single half-hour
sd_12 = panel.slice_time("2024-06-01 12:00")   # DataFrame of cum-vol vs price

# 2) Access an entire day
day_curve = panel.slice_day("2024-06-01")      # ➜ BidCurve (48 rows)

# 3) Long tidy frame for ML / stats
long_df = panel.to_long()                      #  ≈ (2 × N_bins × T) rows

### getting intersections of supply and demand on the bidding curve 

In [None]:
"""
jepx_stats.py
=============
Basic market-clearing metrics for a single 30-minute bidding curve slice.

Public API
----------
clearing_price(slice_df)          -> float
clearing_demand(slice_df)         -> float
clearing_supply(slice_df)         -> float     # alias, same value as demand
residual_volume(slice_df, price,
                side='supply')    -> float
imbalance(slice_df, integrated=False)
                                   -> pd.Series | float
"""
from __future__ import annotations
import math
import numpy as np
import pandas as pd
from typing import Tuple


# ---------------------------------------------------------------------
# internal helper
# ---------------------------------------------------------------------
def _clearing_price_volume(slice_df: pd.DataFrame) -> Tuple[float, float]:
    """
    Return (clearing_price, clearing_volume).  Uses linear interpolation
    inside the first price-bin where cumulative supply ≥ cumulative demand.
    """
    price   = slice_df.index.to_numpy(dtype=float)
    supply  = slice_df["supply_cum"].to_numpy(dtype=float)
    demand  = slice_df["demand_cum"].to_numpy(dtype=float)
    diff    = supply - demand                       # positive means surplus

    # supply already ≥ demand at the lowest price
    if diff[0] >= 0:
        return price[0], demand[0]

    # supply never catches demand → no market clear (should not happen)
    if diff[-1] < 0:
        return math.nan, math.nan

    # find the first bin where supply crosses (or touches) demand
    idx = np.flatnonzero(diff >= 0)[0]
    i0  = idx - 1                                   # last negative point

    # linear interpolation between bin edges (good enough at ¥0.01 steps)
    p0, p1 = price[i0],  price[idx]
    d0, d1 = diff[i0],  diff[idx]
    alpha  = -d0 / (d1 - d0)                        # 0-1 fraction inside bin
    cp     = p0 + alpha * (p1 - p0)                # clearing price

    sup_cv = supply[i0]  + alpha * (supply[idx]  - supply[i0])
    dem_cv = demand[i0]  + alpha * (demand[idx]  - demand[i0])
    cv     = 0.5 * (sup_cv + dem_cv)               # same on both sides

    return cp, cv


# ---------------------------------------------------------------------
# public, user-facing wrappers
# ---------------------------------------------------------------------
def clearing_price(slice_df: pd.DataFrame) -> float:
    """¥/kWh clearing price for the 30-min auction."""
    cp, _ = _clearing_price_volume(slice_df)
    return cp


def clearing_demand(slice_df: pd.DataFrame) -> float:
    """Cleared MWh on the **demand** side (identical to supply)."""
    _, cv = _clearing_price_volume(slice_df)
    return cv


# function alias for symmetry
clearing_supply = clearing_demand    # same value, different semantic label


def residual_volume(slice_df: pd.DataFrame,
                    price_threshold: float,
                    side: str = "supply") -> float:
    """
    Remaining cumulative volume *beyond* a price threshold.

    Parameters
    ----------
    price_threshold : float
        Threshold in ¥/kWh.
    side : {'supply', 'demand'}
        • 'supply' → MWh still **for sale ABOVE** the threshold  
        • 'demand' → MWh still **willing to buy ABOVE** the threshold
          (because demand curve is cum volume ≥ price).
    """
    price = slice_df.index.to_numpy(dtype=float)

    if side == "supply":
        sup   = slice_df["supply_cum"].to_numpy(dtype=float)
        total = sup[-1]
        filled = np.interp(price_threshold, price, sup)
        return float(total - filled)

    if side == "demand":
        dem = slice_df["demand_cum"].to_numpy(dtype=float)
        filled = np.interp(price_threshold, price, dem)
        return float(filled)

    raise ValueError("side must be 'supply' or 'demand'")


def imbalance(slice_df: pd.DataFrame,
              *,
              integrated: bool = False):
    """
    Supply-minus-demand difference for each price bin.

    Returns
    -------
    • pd.Series (index = price) if `integrated=False`  
    • scalar (∫ difference dprice) if `integrated=True`
    """
    diff = slice_df["supply_cum"] - slice_df["demand_cum"]

    if integrated:
        price = slice_df.index.to_numpy(dtype=float)
        # trapezoidal rule – gives “area” of surplus/shortage
        return float(np.trapz(diff, price))

    return diff


In [None]:
# ---------------------------------------------------------------------
#  Time-series extensions — work with BidCurve *or* MultiBidCurve
# ---------------------------------------------------------------------
from typing import Union, Tuple
from jepx_loader import BidCurve, MultiBidCurve


def _iter_slices(container: Union[BidCurve, MultiBidCurve]):
    """
    Yield (label, slice_df) pairs where *label* is:
    • time_code 1-48          for BidCurve
    • pd.Timestamp            for MultiBidCurve
    """
    if isinstance(container, BidCurve):
        for tc in range(1, 49):
            yield tc, container.slice_time(tc)
    elif isinstance(container, MultiBidCurve):
        for ts in container.supply.index:
            yield ts, container[ts]          # indexing sugar we added
    else:
        raise TypeError("Expected BidCurve or MultiBidCurve")


def clearing_series(
    container: Union[BidCurve, MultiBidCurve],
    *,
    return_dataframe: bool = True
) -> Union[pd.DataFrame, Tuple[pd.Series, pd.Series]]:
    """
    Vectorised clearing stats for *all* slices in the container.

    Returns
    -------
    • DataFrame (default) with columns ['clearing_price', 'clearing_volume']
      and index = time_code 1-48      | timestamps
    • Or (price_series, volume_series) if `return_dataframe=False`
    """
    labels, prices, vols = [], [], []

    for lbl, curve_slice in _iter_slices(container):
        cp, cv = _clearing_price_volume(curve_slice)
        labels.append(lbl)
        prices.append(cp)
        vols.append(cv)

    price_s = pd.Series(prices, index=labels, name="clearing_price")
    vol_s   = pd.Series(vols,   index=labels, name="clearing_volume")

    if return_dataframe:
        return pd.concat([price_s, vol_s], axis=1)

    return price_s, vol_s


# optional syntactic sugar wrappers -----------------------------------
def clearing_price_series(container) -> pd.Series:
    """Series of clearing prices (¥/kWh) for every 30-min slice."""
    return clearing_series(container, return_dataframe=False)[0]


def clearing_volume_series(container) -> pd.Series:
    """Series of cleared volumes (MWh) for every 30-min slice."""
    return clearing_series(container, return_dataframe=False)[1]


### Trading cost estimators 

In [None]:
# ---------------------------------------------------------------------
#  Trading cost / marginal-price & elasticity helpers
# ---------------------------------------------------------------------
import numpy as np
import pandas as pd
from typing import Tuple, Literal


# ────────────────────────────────────────────────────────────────────
#  1. trading_cost  – cost (or revenue) to execute Q MWh in one slice
# ────────────────────────────────────────────────────────────────────
def trading_cost(
    slice_df: pd.DataFrame,
    vol_mwh: float,
    *,
    side: Literal["buy", "sell"] = "buy",
) -> Tuple[float, float]:
    """
    Parameters
    ----------
    slice_df : DataFrame
        One curve slice (see format above).
    vol_mwh  : float
        Target volume to trade **in MWh**.
    side     : 'buy'  – walk *up* the supply curve (cost)   ──┐
               'sell' – walk *down* the demand curve (revenue)┘

    Returns
    -------
    total_¥ : float    – cost (buy) / revenue (sell), **in ¥**
    avg_¥_kWh : float  – average price paid / earned, **¥ per kWh**
    """
    if vol_mwh <= 0:
        raise ValueError("vol_mwh must be positive")

    # pick the curve we will integrate over
    price  = slice_df.index.to_numpy(dtype=float)
    if side == "buy":
        cum   = slice_df["supply_cum"].to_numpy(dtype=float)
    elif side == "sell":
        # reverse so we fill highest‐price bids first
        price = price[::-1]
        cum   = slice_df["demand_cum"].to_numpy(dtype=float)[::-1]
    else:
        raise ValueError("side must be 'buy' or 'sell'")

    if vol_mwh > cum[-1]:
        raise ValueError(
            f"Requested {vol_mwh} MWh but only {cum[-1]} MWh available on the {side} side"
        )

    # incremental volumes between bins (first bin starts from zero)
    incr = np.diff(np.concatenate(([0.0], cum)))   # Δvol at each price bin

    total_cost = 0.0
    remaining  = vol_mwh

    for p, dv in zip(price, incr):
        take = min(dv, remaining)          # how much we fill at this bin
        total_cost += p * take * 1_000     # ¥/kWh × MWh → ¥   (1 MWh = 1 000 kWh)
        remaining  -= take
        if remaining <= 1e-9:
            break

    avg_price = total_cost / (vol_mwh * 1_000)     # back to ¥/kWh
    return total_cost, avg_price


# ────────────────────────────────────────────────────────────────────
#  2. elasticity  – dVolume / dPrice for supply &/or demand curve
# ────────────────────────────────────────────────────────────────────
def elasticity(
    slice_df: pd.DataFrame,
    *,
    side: Literal["supply", "demand", "both"] = "both",
) -> pd.Series | pd.DataFrame:
    """
    Finite‐difference slope at each price bin centre.

    Returns
    -------
    • pd.Series  (if side ≠ 'both')   index = price_mid , value = dV/dP (MWh per ¥/kWh)
    • pd.DataFrame (if side == 'both') columns = ['supply', 'demand']
    """
    price = slice_df.index.to_numpy(dtype=float)

    def _slope(cum: pd.Series) -> pd.Series:
        vol = cum.to_numpy(dtype=float)
        dvol = np.gradient(vol, price)             # central FD
        return pd.Series(dvol, index=price, name=cum.name)

    if side in {"supply", "demand"}:
        return _slope(slice_df[f"{side}_cum"])

    # both
    sup = _slope(slice_df["supply_cum"])
    dem = _slope(slice_df["demand_cum"])
    return pd.concat({"supply": sup, "demand": dem}, axis=1)


In [1]:
# Multiple timestamp inputs 

In [None]:
# ---------------------------------------------------------------------
#  Time-series wrappers for trading_cost  &  elasticity
# ---------------------------------------------------------------------
from typing import Mapping, Union, Tuple, Literal
import numpy as np
import pandas as pd
from jepx_loader import BidCurve, MultiBidCurve


# ────────────────────────────────────────────────────────────────────
#  1. trading_cost_series   – one ¥ total + one ¥/kWh avg for *each* slot
# ────────────────────────────────────────────────────────────────────
def trading_cost_series(
    container: Union[BidCurve, MultiBidCurve],
    vol_mwh: float,
    *,
    side: Literal["buy", "sell"] = "buy",
    return_dataframe: bool = True,
) -> Union[pd.DataFrame, Tuple[pd.Series, pd.Series]]:
    """
    Loop trading_cost() over every 30-min curve in *container*.

    Returns
    -------
    • DataFrame (default)  with columns ['total_cost', 'avg_price']
      index = 1-48 (BidCurve)  | timestamps (MultiBidCurve)
    • OR a tuple (total_cost_series, avg_price_series) if return_dataframe=False
    """
    labels, totals, avgs = [], [], []

    for lbl, sl in _iter_slices(container):
        tot, avg = trading_cost(sl, vol_mwh, side=side)
        labels.append(lbl)
        totals.append(tot)
        avgs.append(avg)

    tot_s = pd.Series(totals, index=labels, name="total_cost")   # ¥
    avg_s = pd.Series(avgs,   index=labels, name="avg_price")    # ¥/kWh

    if return_dataframe:
        return pd.concat([tot_s, avg_s], axis=1)

    return tot_s, avg_s


# ────────────────────────────────────────────────────────────────────
#  2. elasticity_panel   – dV/dP surface(s)   (time × price-bin grid)
# ────────────────────────────────────────────────────────────────────
def elasticity_panel(
    container: Union[BidCurve, MultiBidCurve],
    *,
    side: Literal["supply", "demand", "both"] = "both",
) -> Union[pd.DataFrame, Mapping[str, pd.DataFrame]]:
    """
    Vectorise elasticity() over all time-steps.

    Returns
    -------
    • If side ∈ {'supply','demand'}  → DataFrame
          index  = 1-48 | timestamps
          columns = price bins (¥/kWh)
          values  = dV/dP (MWh per ¥/kWh)

    • If side == 'both'              → dict {'supply': df_sup, 'demand': df_dem}
    """
    price_grid = container.bins          # identical for every slice
    labels     = []
    sup_rows, dem_rows = [], []

    for lbl, sl in _iter_slices(container):
        if side == "supply":
            sup_rows.append(elasticity(sl, side="supply").values)
        elif side == "demand":
            dem_rows.append(elasticity(sl, side="demand").values)
        else:  # both
            edf = elasticity(sl, side="both")     # DataFrame with two cols
            sup_rows.append(edf["supply"].values)
            dem_rows.append(edf["demand"].values)
        labels.append(lbl)

    idx_name = "timestamp" if isinstance(container, MultiBidCurve) else "time_code"

    def _mk_df(rows) -> pd.DataFrame:
        return pd.DataFrame(
            np.vstack(rows),
            index=pd.Index(labels, name=idx_name),
            columns=price_grid,
        )

    if side == "supply":
        return _mk_df(sup_rows)
    if side == "demand":
        return _mk_df(dem_rows)

    # both
    return {"supply": _mk_df(sup_rows), "demand": _mk_df(dem_rows)}


In [None]:
# Example 
import jepx_loader as jl
import jepx_stats  as js

panel = jl.load_curves("jepx_2024_tokyo.csv", region="Tokyo")

# 1) What does buying 150 MWh cost in every half-hour?
cost_df = js.trading_cost_series(panel, 150, side="buy")
print(cost_df.head())
# columns: total_cost (¥) , avg_price (¥/kWh)

# 2) Supply-side elasticity surface (time × price)
elas_sup = js.elasticity_panel(panel, side="supply")
print(elas_sup.iloc[:3, :5])          # first 3 timestamps, first 5 price bins

# 3) If you need both supply & demand:
elas = js.elasticity_panel(panel, side="both")
elas_sup = elas["supply"]
elas_dem = elas["demand"]

### Summary statistics 

In [None]:
# ---------------------------------------------------------------------
#  Time-slot summary table   (clearing, VWAP, price range, imbalance)
# ---------------------------------------------------------------------
from typing import Union
import numpy as np
import pandas as pd
from jepx_loader import BidCurve, MultiBidCurve


def timeslot_summary(
    container: Union[BidCurve, MultiBidCurve],
    *,
    vwap_side: str = "supply",        # 'supply' | 'demand'
) -> pd.DataFrame:
    """
    Per-timeslot diagnostics.

    Columns
    -------
    clearing_price        ¥/kWh  – intersection of curves
    clearing_volume       MWh    – traded volume
    vwap                  ¥/kWh  – volume-weighted avg price on *vwap_side*
    price_min             ¥/kWh  – lowest price level with non-zero supply
    price_max             ¥/kWh  – highest price level with non-zero supply
    imbalance_integral    MWh·¥  – ∫(supply_cum − demand_cum) dP
    """
    if vwap_side not in {"supply", "demand"}:
        raise ValueError("vwap_side must be 'supply' or 'demand'")

    labels, cp, cv, vwap, pmin, pmax, imb = [], [], [], [], [], [], []

    for lbl, sl in _iter_slices(container):
        # ---- clearing stats
        _cp, _cv = _clearing_price_volume(sl)
        cp.append(_cp)
        cv.append(_cv)

        # ---- VWAP (incremental vols)
        price   = sl.index.to_numpy(dtype=float)
        cum_vol = sl[f"{vwap_side}_cum"].to_numpy(dtype=float)
        inc_vol = np.diff(np.concatenate(([0.0], cum_vol)))
        tot_vol = cum_vol[-1]
        vwap.append((price * inc_vol).sum() / tot_vol if tot_vol > 0 else np.nan)

        # ---- active price range (supply side)
        nz = np.flatnonzero(inc_vol)              # where volume > 0
        pmin.append(price[nz[0]]  if nz.size else np.nan)
        pmax.append(price[nz[-1]] if nz.size else np.nan)

        # ---- imbalance (area between curves)
        imb.append(imbalance(sl, integrated=True))

        labels.append(lbl)

    idx_name = "timestamp" if isinstance(container, MultiBidCurve) else "time_code"
    summary = pd.DataFrame(
        {
            "clearing_price":        cp,
            "clearing_volume":       cv,
            "vwap":                  vwap,
            "price_min":             pmin,
            "price_max":             pmax,
            "imbalance_integral":    imb,
        },
        index=pd.Index(labels, name=idx_name),
    )
    return summary

In [None]:
import jepx_loader as jl
import jepx_stats  as js

# Multi-day panel
panel = jl.load_curves("jepx_2024_tokyo.csv", region="Tokyo")

# 1) One-liner summary for every half-hour in the file
ts_summary = js.timeslot_summary(panel)
print(ts_summary.head())
# index = 2024-01-01 00:00, 2024-01-01 00:30, …

# 2) Still works on a single-day BidCurve
day_curve  = panel.slice_day("2024-06-01")
day_sum    = js.timeslot_summary(day_curve)
print(day_sum.head())
# index = 1, 2, 3 … 48

### Plotting Functions 

In [None]:
"""
jepx_plots.py
=============
Visualisations for JEPX day-ahead bidding-curve analytics.

Public API
----------
plot_supply_demand(container, ts)
plot_trading_cost_series(container, vol_mwh, *, side='buy', metric='total_cost')
plot_metric_series(container, metric)
"""
from __future__ import annotations
import matplotlib.pyplot as plt
import pandas as pd
from typing import Literal, Union

from jepx_loader import BidCurve, MultiBidCurve
from jepx_stats  import (
    trading_cost_series,
    timeslot_summary,
)

# ────────────────────────────────────────────────────────────────────
#  helper – fetch the slice by timestamp / time_code
# ────────────────────────────────────────────────────────────────────
def _get_slice(container, ts):
    if isinstance(container, MultiBidCurve):
        return container[ts]                    # __getitem__ sugar
    if isinstance(container, BidCurve):
        if isinstance(ts, int):
            return container.slice_time(ts)
        raise TypeError("BidCurve expects time_code 1-48 (int)")
    raise TypeError("container must be BidCurve or MultiBidCurve")


# ────────────────────────────────────────────────────────────────────
#  1. supply-and-demand curve for a single 30-min auction
# ────────────────────────────────────────────────────────────────────
def plot_supply_demand(
    container: Union[BidCurve, MultiBidCurve],
    ts: Union[str, pd.Timestamp, int],
):
    """
    Parameters
    ----------
    ts
        • MultiBidCurve: any 30-min timestamp (str or pd.Timestamp)
        • BidCurve     : time_code 1-48 (int)
    """
    curve = _get_slice(container, ts)
    px    = curve.index.to_numpy(dtype=float)
    sup   = curve["supply_cum"].to_numpy(dtype=float)
    dem   = curve["demand_cum"].to_numpy(dtype=float)

    fig, ax = plt.subplots()          # one figure, no sub-plots rule satisfied
    ax.step(px, sup, where="post", label="Supply (cum)", linewidth=1.2)
    ax.step(px, dem, where="post", label="Demand (cum)", linewidth=1.2)
    ax.set_xlabel("Price [¥/kWh]")
    ax.set_ylabel("Cumulative volume [MWh]")
    ax.set_title(f"JEPX supply & demand curves   —   {ts}")
    ax.legend()
    ax.grid(True, which="both", alpha=0.3)
    return fig


# ────────────────────────────────────────────────────────────────────
#  2. trading-cost or avg-price across all time-steps
# ────────────────────────────────────────────────────────────────────
def plot_trading_cost_series(
    container: Union[BidCurve, MultiBidCurve],
    vol_mwh: float,
    *,
    side: Literal["buy", "sell"] = "buy",
    metric: Literal["total_cost", "avg_price"] = "total_cost",
):
    """
    Shows how expensive it is to buy / sell *vol_mwh* in every half-hour.

    metric
    -------
    • 'total_cost' – y-axis in **¥**
    • 'avg_price'  – y-axis in **¥/kWh**
    """
    df = trading_cost_series(container, vol_mwh, side=side, return_dataframe=True)
    if metric not in df.columns:
        raise ValueError(f"metric must be one of {list(df.columns)}")

    fig, ax = plt.subplots()
    ax.plot(df.index, df[metric], linewidth=1.2)
    ax.set_xlabel("Time" if isinstance(container, MultiBidCurve) else "Time code (1-48)")
    ylabel = "Total cost [¥]" if metric == "total_cost" else "Average price [¥/kWh]"
    ax.set_ylabel(ylabel)
    ax.set_title(
        f"{metric.replace('_', ' ').title()} — {side.upper()} {vol_mwh} MWh each slot"
    )
    ax.grid(True, alpha=0.3)
    return fig


# ────────────────────────────────────────────────────────────────────
#  3. quick plot for *any* metric produced by timeslot_summary()
# ────────────────────────────────────────────────────────────────────
def plot_metric_series(
    container: Union[BidCurve, MultiBidCurve],
    metric: Literal[
        "clearing_price", "clearing_volume",
        "vwap", "price_min", "price_max", "imbalance_integral"
    ] = "clearing_price",
    *,
    vwap_side: str = "supply",
):
    """
    metric — choose any column name emitted by timeslot_summary().
    """
    summary = timeslot_summary(container, vwap_side=vwap_side)
    if metric not in summary.columns:
        raise ValueError(f"metric must be one of {list(summary.columns)}")

    fig, ax = plt.subplots()
    ax.plot(summary.index, summary[metric], linewidth=1.2)
    ax.set_xlabel("Time" if isinstance(container, MultiBidCurve) else "Time code (1-48)")
    ax.set_ylabel(metric.replace("_", " ").title())
    ax.set_title(f"{metric.replace('_', ' ').title()} per timeslot")
    ax.grid(True, alpha=0.3)
    return fig


| Module               | Function / Method                                            | What it does (one-liner)                                               |
| -------------------- | ------------------------------------------------------------ | ---------------------------------------------------------------------- |
| **`jepx_loader.py`** | **`load_curve(path, *, region, date)`**                      | Read a *single-day* 48-row file → `BidCurve`.                          |
|                      | **`load_curves(path, *, region)`**                           | Read a *multi-day* file → `MultiBidCurve` (index = 30-min timestamps). |
|                      | **`BidCurve.slice_time(tc)`**                                | Return supply & demand arrays for time-code 1-48.                      |
|                      | **`BidCurve.to_long()`**                                     | Long “tidy” DataFrame (date, time\_code, side, price, cum\_vol).       |
|                      | **`MultiBidCurve.__getitem__(ts)`**                          | Quick accessor: `panel['2025-07-08 12:00']` → curve slice.             |
|                      | **`MultiBidCurve.slice_day(date)`**                          | Pop one day back out as a `BidCurve`.                                  |
|                      | **`MultiBidCurve.to_long()`**                                | Long DF with timestamp granularity.                                    |
|                      | **`MultiBidCurve.iter_timeslices()`**                        | Generator over every 30-min slice.                                     |
| **`jepx_stats.py`**  | **`clearing_price(slice_df)`**                               | Intersection price (¥/kWh) for one slice.                              |
|                      | **`clearing_demand / clearing_supply(slice_df)`**            | Cleared MWh on either side.                                            |
|                      | **`residual_volume(slice_df, price, side)`**                 | Remaining MWh above a price threshold.                                 |
|                      | **`imbalance(slice_df, integrated=False)`**                  | Supply-minus-demand vector or its integral.                            |
|                      | **`trading_cost(slice_df, vol, side)`**                      | `total ¥`, `avg ¥/kWh` to buy/sell *vol* in one slice.                 |
|                      | **`elasticity(slice_df, side)`**                             | dVolume/dPrice curve (slope) per price bin.                            |
|                      | **`clearing_series(container)`**                             | Time-series of clearing price & volume for **all** slots.              |
|                      | **`trading_cost_series(container, vol, side)`**              | Series/DataFrame of cost or avg price per slot.                        |
|                      | **`elasticity_panel(container, side)`**                      | Time × price grid(s) of elasticity.                                    |
|                      | **`timeslot_summary(container)`**                            | Table per slot: clearing P/V, VWAP, min/max price, imbalance.          |
| **`jepx_plots.py`**  | **`plot_supply_demand(container, ts)`**                      | Step plot of supply & demand curves for one timestamp.                 |
|                      | **`plot_trading_cost_series(container, vol, side, metric)`** | Line chart of total cost *or* avg price over time.                     |
|                      | **`plot_metric_series(container, metric)`**                  | Generic plot for any column from `timeslot_summary()`.                 |
