### Upload Data

In [2]:
"""
Simple processing for the modified Excel:
  /mnt/data/10yearsZCB.xlsx

Assumptions (typical for a cleaned file):
- One sheet (or you know which sheet to use)
- One date column and one rate/yield column (already tidy)

Outputs:
- Cleaned time series (date, y_pct, y_dec)
- Daily/native Δy stats (percentile shocks)
- Monthly (EOM) Δy stats (percentile shocks)
- Worst k-month window shocks (1, 3, 6, 12 months)

You may only need to adjust DATE_COL and YIELD_COL once.
"""

from __future__ import annotations

import numpy as np
import pandas as pd

FILEPATH = "/mnt/data/10yearsZCB.xlsx"

# ---- set these to match your modified Excel headers ----
DATE_COL = "date"      # e.g., "Date", "Tanggal"
YIELD_COL = "yield"    # e.g., "Yield", "Rate", "Imbal Hasil"
SHEET_NAME = 0         # 0 = first sheet, or set to "Sheet1"
# --------------------------------------------------------


def load_clean_series(path: str) -> pd.DataFrame:
    df = pd.read_excel(path, sheet_name=SHEET_NAME)

    df = df[[DATE_COL, YIELD_COL]].copy()
    df.columns = ["date", "y_raw"]

    df["date"] = pd.to_datetime(df["date"], errors="coerce", dayfirst=True)

    # robust numeric conversion for "7,25", "7.25", "7.25%"
    s = df["y_raw"].astype(str).str.strip()
    s = s.str.replace("%", "", regex=False).str.replace(" ", "", regex=False)
    s = s.str.replace(r"(?<=\d),(?=\d)", ".", regex=True)
    df["y"] = pd.to_numeric(s, errors="coerce")

    df = df.dropna(subset=["date", "y"]).sort_values("date").reset_index(drop=True)

    # store both percent and decimal; auto-detect scale
    if df["y"].median() < 1.0:
        df["y_dec"] = df["y"]
        df["y_pct"] = 100.0 * df["y"]
    else:
        df["y_pct"] = df["y"]
        df["y_dec"] = df["y"] / 100.0

    return df[["date", "y_pct", "y_dec"]]


def bps(x: pd.Series) -> float:
    """Convert decimal rate change to basis points."""
    return float(x / 1e-4)


def shock_stats(delta: pd.Series) -> dict:
    """Percentile-based shocks in bps."""
    d = delta.dropna()
    return {
        "count": int(d.shape[0]),
        "p95_up_bps": bps(d.quantile(0.95)),
        "p99_up_bps": bps(d.quantile(0.99)),
        "p05_down_bps": bps(d.quantile(0.05)),
        "p01_down_bps": bps(d.quantile(0.01)),
        "max_up_bps": bps(d.max()),
        "max_down_bps": bps(d.min()),
    }


def main() -> None:
    df = load_clean_series(FILEPATH)

    # Native/daily changes
    df["dy"] = df["y_dec"].diff()
    native = shock_stats(df["dy"])

    # Monthly EOM changes
    m = df.set_index("date")[["y_dec"]].resample("M").last().dropna()
    m["dy_m"] = m["y_dec"].diff()
    monthly = shock_stats(m["dy_m"])

    # Worst k-month window shocks (using monthly series)
    worst = {}
    for k in [1, 3, 6, 12]:
        dk = m["y_dec"].diff(k).dropna()
        worst[f"worst_{k}m_up_bps"] = bps(dk.max())
        worst[f"worst_{k}m_down_bps"] = bps(dk.min())

    # Print summary
    print("=== SUMMARY ===")
    print(f"Date range : {df['date'].min().date()} to {df['date'].max().date()}")
    print(f"Obs        : {len(df):,}")
    print(f"Median y%  : {df['y_pct'].median():.3f}")

    print("\n=== NATIVE Δy (bps) ===")
    for k, v in native.items():
        print(f"{k:>14}: {v:,.2f}" if isinstance(v, float) else f"{k:>14}: {v:,}")

    print("\n=== MONTHLY EOM Δy (bps) ===")
    for k, v in monthly.items():
        print(f"{k:>14}: {v:,.2f}" if isinstance(v, float) else f"{k:>14}: {v:,}")

    print("\n=== WORST WINDOW (bps, monthly) ===")
    for k, v in worst.items():
        print(f"{k:>18}: {v:,.2f}")

    # Optional: export cleaned series
    out_csv = "/mnt/data/10yearsZCB_clean.csv"
    df.to_csv(out_csv, index=False)
    print(f"\nSaved cleaned series to: {out_csv}")


if __name__ == "__main__":
    main()


ModuleNotFoundError: No module named 'numpy'