In [21]:
# %% [markdown]
# # Monte Carlo IR Cone Notebook (Stylish Visualization)
#
# This notebook fits a simple multivariate normal “model” to historical yield‐curve changes,
# registers it in MLflow, and produces IR cones (simulated shocked curves) for a given as‐of date.
# We use seaborn for a clean theme and interpolate each scenario onto a fine‐grid to get smooth “curvy” lines.
#
# **Drop‐in ready**: place this file under notebooks/monte_carlo_ir_cone.ipynb (or as a .py with Jupyter markers).
# Adjust paths/curve_type/horizon as needed.

# %%
import sys
from pathlib import Path
sys.path.append(str(Path.cwd().parent))

import pandas as pd
import numpy as np
import altair as alt
from datetime import datetime
from dateutil.relativedelta import relativedelta

import mlflow
import mlflow.sklearn

from sklearn.covariance import EmpiricalCovariance

from data.data_source import get_data_source
from data.treasury_curve import get_yield_curve
from models.empirical_covariance import EmpiricalCovarianceModel

# %%
# ─────────────────────────────────────────────────────────────
# CONFIGURATION
# ─────────────────────────────────────────────────────────────
#
# 1. Which base curve to pull from rate_curves table
CURVE_TYPE = "US Treasury Par"

# 2. As‐of date for which we generate cones
ASOF: datetime.date = datetime.today().date()

# 3. Historical window length (e.g., 3 years of daily curves)
HIST_YEARS = 1
START_DATE = ASOF - relativedelta(years=HIST_YEARS)

# 4. Tenors of interest (must match tenor_num in rate_curves)
TENORS = [0.25, 0.5, 1, 2, 3, 5, 7, 10, 20, 30]

# 5. Number of Monte Carlo simulations
N_SIMS = 1000

# 6. MLflow experiment name
MLFLOW_EXPERIMENT = "IR Cone Monte Carlo"

# %%
# ─────────────────────────────────────────────────────────────
# 1. LOAD HISTORICAL CURVE DATA (INCLUDING ASOF) INTO pivot
# ─────────────────────────────────────────────────────────────
ds = get_data_source()

sql = f"""
SELECT
    curve_date,
    tenor_num,
    rate
FROM rate_curves
WHERE curve_type = '{CURVE_TYPE}'
  AND curve_date BETWEEN '{START_DATE}' AND '{ASOF}'
  AND tenor_num IN ({', '.join(str(x) for x in TENORS)})
ORDER BY curve_date, tenor_num
;"""
historic = ds.query(sql).to_pandas()

# Pivot to get a matrix: index = curve_date, columns = tenor_num, values = rate
pivot = (
    historic
    .pivot(index="curve_date", columns="tenor_num", values="rate")
    .sort_index()
)

# Drop any rows with missing tenors
pivot = pivot.dropna(axis=0, how="any")

print(f">>> Loaded {pivot.shape[0]} historical daily curves (dates) with {pivot.shape[1]} tenors.")

# %%
# ─────────────────────────────────────────────────────────────
# 2. COMPUTE DAILY CHANGES (ABSOLUTE)
# ─────────────────────────────────────────────────────────────
# Use simple absolute changes: Δr_t = r_t − r_{t−1}
deltas = pivot.diff().dropna(axis=0, how="any")
print(f">>> Computed {deltas.shape[0]} daily change observations for {deltas.shape[1]} tenors.")

# 3.2: Fit the wrapped estimator on our daily deltas
wrapped = EmpCovWrapper().fit(deltas.values)
cov_model = wrapped

# 3.3: Create an input example & signature so MLflow logs everything properly
#    - We use one row of deltas as an example (shape (1, n_tenors))
example_X = deltas.values[:1]
#    - “Predict” will return a (1, n_tenors, n_tenors) array
signature = infer_signature(
    example_X,
    wrapped.predict(example_X)
)

# 3.4: Log to MLflow
mlflow.set_experiment(MLFLOW_EXPERIMENT)
with mlflow.start_run(run_name=f"cov_fit_{ASOF}") as run:
    mlflow.sklearn.log_model(
        sk_model=wrapped,
        artifact_path="ir_covariance_model",
        registered_model_name="ir_covariance_model",
        input_example=example_X,
        signature=signature
    )
    mlflow.log_param("cov_fit_window_years", HIST_YEARS)
    mlflow.log_param("n_observations", deltas.shape[0])
    mlflow.log_param("n_tenors", deltas.shape[1])

print(f">>> Wrapped EmpCovariance logged to MLflow run ID {run.info.run_id}.")
# %%
# ─────────────────────────────────────────────────────────────
# 4. DEFINE A REUSABLE “MODEL” FUNCTION
# ─────────────────────────────────────────────────────────────
def generate_ir_cone(
    base_curve: pd.Series,
    cov_model: EmpiricalCovariance,
    n_sims: int = 1000
) -> pd.DataFrame:
    """
    Generate Monte Carlo IR cone simulations for a single base curve.

    Args:
        base_curve: pd.Series indexed by tenor_num, values = yield rates
        cov_model:  fitted EmpiricalCovariance on historical daily deltas
        n_sims:    number of scenarios to draw

    Returns:
        DataFrame with columns [sim_id, tenor_num, rate_simulated].
        - sim_id runs from 0 to n_sims-1
        - tenor_num is one of the tenor keys (e.g. 0.25, 0.5, 1, …)
        - rate_simulated = base_rate + Δr_sampled
    """
    tenor_order = list(base_curve.index)
    cov_mat = cov_model.covariance_

    rand_deltas = np.random.multivariate_normal(
        mean=np.zeros(len(tenor_order)),
        cov=cov_mat,
        size=n_sims
    )

    base_vals = base_curve.values.reshape((1, -1))
    sims = base_vals + rand_deltas

    records = []
    for sim_id in range(n_sims):
        for idx, tenor in enumerate(tenor_order):
            records.append({
                "sim_id": sim_id,
                "tenor_num": tenor,
                "rate_simulated": sims[sim_id, idx]
            })
    return pd.DataFrame.from_records(records)

# %%
# ─────────────────────────────────────────────────────────────
# 5. RETRIEVE BASE CURVE FOR ASOF — VIA pivot LAST ROW
# ─────────────────────────────────────────────────────────────
if ASOF in pivot.index:
    base_curve = pivot.loc[ASOF]
else:
    last_date = pivot.index.max()
    base_curve = pivot.loc[last_date]
    print(f"⚠️ ASOF={ASOF} not in rate_curves; using last available date: {last_date}")

print(f">>> Base curve (via pivot) on {base_curve.name}:")
print(base_curve)

ir_cone_df = generate_ir_cone(
    base_curve=base_curve,
    cov_model=cov_model,
    n_sims=N_SIMS
)
print(f">>> Generated IR cone: {ir_cone_df.shape[0]} rows (simulations × tenors).")

# %%
# ─────────────────────────────────────────────────────────────
# 6. (OPTIONAL) INSERT SIMULATIONS INTO A NEW DB TABLE
# ─────────────────────────────────────────────────────────────
# If you want to persist these cones, create a table first:
#
#   CREATE TABLE monte_carlo_cones (
#     cone_date   DATE    NOT NULL,
#     sim_id      INTEGER NOT NULL,
#     tenor_num   DOUBLE PRECISION NOT NULL,
#     rate        DOUBLE PRECISION NOT NULL,
#     inserted_at TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP,
#     PRIMARY KEY (cone_date, sim_id, tenor_num)
#   );
#
# Then run:
#
# insert_sql = \"""
#   INSERT INTO monte_carlo_cones (cone_date, sim_id, tenor_num, rate)
#   VALUES (%(cone_date)s, %(sim_id)s, %(tenor_num)s, %(rate_simulated)s)
#   ON CONFLICT DO NOTHING;
# \"""
#
# df_to_insert = ir_cone_df.copy()
# df_to_insert["cone_date"] = ASOF
#
# for _, row in df_to_insert.iterrows():
#     ds.execute(insert_sql, row.to_dict())
#
# print(f"Inserted {len(df_to_insert)} rows into monte_carlo_cones.")
#
# (Uncomment and run if desired.)

# %%
# ─────────────────────────────────────────────────────────────
# 7. VISUALIZE A FEW SAMPLES (STYLISH, SMOOTHED CURVES)
# ─────────────────────────────────────────────────────────────

# 7.1: Sample a subset of simulation IDs to plot (e.g., 100)
sample_ids = np.random.choice(ir_cone_df["sim_id"].unique(), size=500, replace=False)
sample_df = ir_cone_df[ir_cone_df["sim_id"].isin(sample_ids)].copy()

# 7.2: Prepare base‐curve DataFrame for Altair
base_df = pd.DataFrame({
    "tenor_num": TENORS,
    "rate": [base_curve.loc[t] for t in TENORS]
})

# 7.3: Build Altair chart: gray simulation lines, with y‐axis scale set to autoscale to data range
sim_lines = (
    alt.Chart(sample_df)
    .mark_line(color="gray", opacity=0.2, strokeWidth=1)
    .encode(
        x=alt.X("tenor_num:Q", title="Tenor (years)"),
        y=alt.Y(
            "rate_simulated:Q",
            title="Yield (%)",
            scale=alt.Scale(zero=False)  # allow autoscaling without forcing zero baseline
        ),
        detail="sim_id:N"
    )
)

# ─────────────────────────────────────────────────────────────
# 7. VISUALIZE A FEW SAMPLES WITH ALTARIR (SMOOTHED CURVES, AUTOSCALED Y‐AXIS)
# ─────────────────────────────────────────────────────────────

# 7.1: Sample a subset of simulation IDs to plot (e.g., 100)
sample_ids = np.random.choice(ir_cone_df["sim_id"].unique(), size=100, replace=False)
sample_df = ir_cone_df[ir_cone_df["sim_id"].isin(sample_ids)].copy()

# 7.2: Prepare base‐curve DataFrame for Altair
base_df = pd.DataFrame({
    "tenor_num": TENORS,
    "rate": [base_curve.loc[t] for t in TENORS]
})

# 7.3: Build Altair chart: gray simulation lines with smooth interpolation,
#      y‐axis autoscaled (zero=False ensures it doesn’t force 0 at baseline)
sim_lines = (
    alt.Chart(sample_df)
    .mark_line(color="gray", opacity=0.1, strokeWidth=1, interpolate="monotone")
    .encode(
        x=alt.X("tenor_num:Q", title="Tenor (years)"),
        y=alt.Y(
            "rate_simulated:Q",
            title="Yield (%)",
            scale=alt.Scale(zero=False)
        ),
        detail="sim_id:N"
    )
)

# 7.4: Build Altair chart: red base curve, also smooth‐interpolated
base_line = (
    alt.Chart(base_df)
    .mark_line(color="crimson", strokeWidth=3, interpolate="monotone")
    .encode(
        x="tenor_num:Q",
        y=alt.Y(
            "rate:Q",
            scale=alt.Scale(zero=False)
        )
    )
)

# 7.5: Layer them together
chart = (
    (sim_lines + base_line)
    .properties(
        width=700,
        height=400,
        title=f"Monte Carlo IR Cones ({N_SIMS} sims) on {base_curve.name}"
    )
    .configure_title(fontSize=16, fontWeight="bold")
    .configure_axis(
        labelFontSize=12,
        titleFontSize=13
    )
)

chart


getting data source for sandbox
>>> Loaded 249 historical daily curves (dates) with 10 tenors.
>>> Computed 248 daily change observations for 10 tenors.


Registered model 'ir_covariance_model' already exists. Creating a new version of this model...
2025/06/05 22:32:17 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: ir_covariance_model, version 41
Created version '41' of model 'ir_covariance_model'.


🏃 View run cov_fit_2025-06-05 at: http://127.0.0.1:8768/#/experiments/1501/runs/a85bee53e5244a45ba421008a998a8cd
🧪 View experiment at: http://127.0.0.1:8768/#/experiments/1501
>>> Wrapped EmpCovariance logged to MLflow run ID a85bee53e5244a45ba421008a998a8cd.
⚠️ ASOF=2025-06-05 not in rate_curves; using last available date: 2025-06-04
>>> Base curve (via pivot) on 2025-06-04:
tenor_num
0.25     4.44
0.50     4.29
1.00     4.06
2.00     3.87
3.00     3.84
5.00     3.93
7.00     4.14
10.00    4.37
20.00    4.90
30.00    4.89
Name: 2025-06-04, dtype: float64
>>> Generated IR cone: 10000 rows (simulations × tenors).
