In [5]:
import pandas as pd
import numpy as np
from datetime import datetime
from dateutil.relativedelta import relativedelta
import matplotlib.pyplot as plt
from concurrent.futures import ThreadPoolExecutor, as_completed

import mlflow
from mlflow.tracking import MlflowClient
from mlflow.models.signature import infer_signature
import mlflow.sklearn

from data.data_source import get_data_source
from data.treasury_curve import get_yield_curve
from models.covariance.empirical_covariance import EmpiricalCovarianceModel as model_choice
from config import env
from utils.artifact_saver import get_artifact_path
import math

# ─── CONFIG ─────────────────────────────────────────────────────────────────
CURVE_TYPE        = "US Treasury Par"
TENORS            = [1/12, 0.125, 2/12, 0.25, 4/12, 0.5, 1, 2, 3, 5, 7, 10, 20, 30]
N_SIMS            = 1000
MLFLOW_EXPERIMENT = "IR Cone Fit betaExperiment"
backfill_DAYS     = 5       # how many days back to pull data
MAX_WORKERS       = 12
ds                = get_data_source()
model_class = model_choice
model_shortname = model_class.name


def format_tenor(x):
    total_months = round(x * 12 * 2) / 2
    years = int(total_months // 12)
    months = total_months - years * 12
    parts = []
    if years:
        parts.append(f"{years}Y")
    if months:
        parts.append(f"{months:.1f}M" if not months.is_integer() else f"{int(months)}M")
    return "".join(parts) or "0M"


def batch_insert_rate_cones(records, batch_size=200):
    total = 0
    for i in range(0, len(records), batch_size):
        batch = records[i : i + batch_size]
        vals = ",\n".join(
            f"('{r['curve_type']}', {r['days_forward']:.1f}, '{r['curve_date']}', "
            f"'{r['cone_type']}', '{r['tenor_str']}', {r['rate']:.8f}, {r['tenor_num']:.8f}, '{r['model_type']}')"
            for r in batch
        )        
        sql = f"""
        INSERT INTO rate_cones
          (curve_type, days_forward, curve_date, cone_type, tenor_str, rate, tenor_num, model_type)
        VALUES {vals}
        ON CONFLICT DO NOTHING;
        """
        ds.query(sql)
        total += len(batch)
    return total


def plot_ir_cones_matplotlib(base_curve: pd.Series, ir_cone_df: pd.DataFrame, days_forward: int, title: str = ""):
    plt.figure(figsize=(10, 6))
    sample_ids = np.random.choice(
        ir_cone_df["sim_id"].unique(),
        size=min(100, ir_cone_df["sim_id"].nunique()),
        replace=False
    )
    for sim_id in sample_ids:
        sim = ir_cone_df[ir_cone_df["sim_id"] == sim_id]
        plt.plot(sim["tenor_num"], sim["rate_simulated"], color="gray", alpha=0.1)

    plt.plot(base_curve.index, base_curve.values,
             color="crimson", linewidth=2.5, label="Base Curve")
    plt.xlabel("Tenor (years)")
    plt.ylabel("Yield (%)")
    plt.title(title or f"{days_forward}-Day IR Cones ({N_SIMS} sims) on {base_curve.name}")
    plt.grid(True, linestyle="--", alpha=0.3)
    plt.legend()
    plt.tight_layout()
    fn = get_artifact_path(f"tsy_cones_{base_curve.name}_{days_forward}d.png")
    plt.savefig(fn, dpi=150)
    plt.close()
    return fn


def generate_ir_cone(base_curve: pd.Series,
                     cov_model: model_class,
                     n_sims: int,
                     days_forward: int) -> pd.DataFrame:

    N = len(base_curve)
    
    cov = cov_model.covariance_ * days_forward
    
    drift = getattr(cov_model, "drift_", np.zeros(N))
    drift_fw = drift * days_forward  # horizon drift    

    rand_deltas = np.random.multivariate_normal(
        mean=np.zeros(len(base_curve)),
        cov=cov,
        size=n_sims
    )
    
    base_vals = base_curve.to_numpy()
    sims = base_vals[np.newaxis, :] + drift_fw[np.newaxis, :] + rand_deltas

    records = [
        {"sim_id": sim, "tenor_num": base_curve.index[i], "rate_simulated": sims[sim, i]}
        for sim in range(n_sims)
        for i in range(len(base_curve))
    ]
    return pd.DataFrame.from_records(records)


def populate_ir_cones(backfill_days: int,
                      fit_window_years: int = 1,
                      years_back: int = 0,
                      max_workers: int = 4):
    end_date   = datetime.today().date()
    start_date = max(
        end_date - relativedelta(days=backfill_days, years=years_back),
        datetime(2010, 1, 1).date()
    )
    all_dates = pd.date_range(start=start_date, end=end_date, freq="D").date

    mlflow.set_experiment(MLFLOW_EXPERIMENT)
    client = MlflowClient()

    with mlflow.start_run(run_name=f"populate_ir_cones_{end_date}") as parent:
        parent_id = parent.info.run_id
        mlflow.log_params({
            "as_of_date": str(start_date),
            "backfill_days": backfill_days,
            "fit_window_years": fit_window_years,
            "curve_type": CURVE_TYPE,
            "n_sims": N_SIMS,
        })

        total_obs, total_vars, trace_covs, errors = [], [], [], []

        def task(asof_date):
            try:
                window_start = asof_date - relativedelta(years=fit_window_years)
                window_start = max(window_start, datetime(2010,1,1).date())

                sql = f"""
                SELECT curve_date, tenor_num, rate
                  FROM rate_curves
                 WHERE curve_type = '{CURVE_TYPE}'
                   AND curve_date BETWEEN '{window_start}' AND '{asof_date}'
                   AND tenor_num    IN ({', '.join(map(str, TENORS))})
                 ORDER BY curve_date DESC, tenor_num;
                """
                df = ds.query(sql).to_pandas()
                if df.empty:
                    return (asof_date, "No curve data")

                pivot = (
                    df.pivot(index="curve_date", columns="tenor_num", values="rate")
                      .sort_index()
                      .interpolate(method="linear", axis=0)
                      .dropna()
                )
                if asof_date not in pivot.index:
                    print(f"⏭️  Skipping {asof_date}: no exact curve_date in pivot")
                    return None

                asof_actual = (asof_date if asof_date in pivot.index
                               else pivot.index[pivot.index <= asof_date].max())
                base_curve = pivot.loc[asof_date]
                deltas     = pivot.diff().dropna()   # these deltas now span your lookback window

                model_name = f'{model_shortname}_{fit_window_years}yrFit'
                print('using model: ' + model_name)

                for days_forward in (30, 90):
                    model   = model_class().fit(deltas.values)
                    cone_df = generate_ir_cone(base_curve, model, N_SIMS, days_forward)
                    chart   = plot_ir_cones_matplotlib(base_curve, cone_df,
                                                       days_forward,
                                                       title=f"{days_forward}-day cones")
    
                    pctls = [1,5,10,50,90,95,99]
                    pct_df = (
                        cone_df.groupby("tenor_num")["rate_simulated"]
                               .quantile([p/100 for p in pctls])
                               .unstack(level=1)
                               .reset_index()
                               .melt(id_vars="tenor_num", var_name="percentile", value_name="rate")
                    )
                    pct_df["percentile"] = pct_df["percentile"].astype(float)
    
                    pct_df["curve_type"]   = CURVE_TYPE
                    pct_df["tenor_str"]    = pct_df["tenor_num"].apply(format_tenor)
                    pct_df["cone_type"]    = pct_df["percentile"].apply(lambda p: f"{int(p*100)}%")
                    pct_df["curve_date"]   = asof_date
                    pct_df["days_forward"] = days_forward
                    pct_df["model_type"] = model_name
    
                    recs = pct_df[[
                        "curve_type","days_forward","curve_date",
                        "cone_type","tenor_str","rate","tenor_num", "model_type"
                    ]].to_dict(orient="records")
                    inserted = batch_insert_rate_cones(recs)
    
                    n_obs     = len(deltas)
                    total_var = float(np.var(deltas.values))
                    trace_cv  = float(np.trace(model.covariance_))
    
                    total_obs.append(n_obs)
                    total_vars.append(total_var)
                    trace_covs.append(trace_cv)
                
                    input_example = deltas.values[:1]
    
                    with mlflow.start_run(
                        run_name=f"IR_{asof_date}",
                        nested=True,
                        tags={"mlflow.parentRunId": parent_id}
                    ):
                        mlflow.log_params({
                            "as_of_date": str(asof_actual),
                            "backfill_days": backfill_days,
                            "fit_window_years": fit_window_years,
                            "curve_type": CURVE_TYPE,
                            "n_sims": N_SIMS,
                        })
                        mlflow.log_metrics({
                            "n_obs": n_obs,
                            "total_var": total_var,
                            "trace_cov": trace_cv,
                            "days_forward": days_forward,
                            "dates_processed": 1,
                        })
                        mlflow.sklearn.log_model(
                            sk_model=model,
                            artifact_path="model",
                            registered_model_name=model_name,
                            input_example=input_example
                        )
                        mlflow.log_artifact(chart, artifact_path="charts")
    
                return None

            except Exception as e:
                return (asof_date, str(e))

        with ThreadPoolExecutor(max_workers=max_workers) as exe:
            futures = [exe.submit(task, d) for d in all_dates]
            for fut in as_completed(futures):
                if err := fut.result():
                    errors.append(err)

        mlflow.log_metrics({
            "dates_processed": len(all_dates) - len(errors),
            "n_errors": len(errors),
            "n_obs": sum(total_obs),
            "total_var": float(np.mean(total_vars)) if total_vars else 0.0,
            "trace_cov": float(np.mean(trace_covs)) if trace_covs else 0.0,
        })

        if errors:
            print(f"⚠️  {len(errors)} errors:")
            for d, msg in errors:
                print(f"  • {d}: {msg}")
        else:
            print("✅ All cones processed and logged.")


if __name__ == "__main__":
    import sys

    populate_ir_cones(
        backfill_days=backfill_DAYS,
        fit_window_years=1,
        max_workers=MAX_WORKERS,
        years_back=0
    )
    
    populate_ir_cones(
        backfill_days=backfill_DAYS,
        fit_window_years=5,
        max_workers=MAX_WORKERS,
        years_back=0
    )


getting data source for sandbox
using model: DefaultEmpiricalCovarianceEstimator_1yrFit
using model: DefaultEmpiricalCovarianceEstimator_1yrFit
using model: DefaultEmpiricalCovarianceEstimator_1yrFit
using model: DefaultEmpiricalCovarianceEstimator_1yrFit
⏭️  Skipping 2025-06-08: no exact curve_date in pivot
⏭️  Skipping 2025-06-13: no exact curve_date in pivot


Successfully registered model 'DefaultEmpiricalCovarianceEstimator_1yrFit'.
Registered model 'DefaultEmpiricalCovarianceEstimator_1yrFit' already exists. Creating a new version of this model...
Registered model 'DefaultEmpiricalCovarianceEstimator_1yrFit' already exists. Creating a new version of this model...
Registered model 'DefaultEmpiricalCovarianceEstimator_1yrFit' already exists. Creating a new version of this model...
2025/06/13 18:41:29 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: DefaultEmpiricalCovarianceEstimator_1yrFit, version 1
Created version '1' of model 'DefaultEmpiricalCovarianceEstimator_1yrFit'.
2025/06/13 18:41:29 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: DefaultEmpiricalCovarianceEstimator_1yrFit, version 2
Created version '2' of model 'DefaultEmpiricalCovarianceEstimator_1yrFit'.
2025/06/13 18:41:29 IN

🏃 View run IR_2025-06-10 at: http://127.0.0.1:8768/#/experiments/1513/runs/c48a4ea89eae43fbb9aa37b61964a00a
🧪 View experiment at: http://127.0.0.1:8768/#/experiments/1513


Created version '3' of model 'DefaultEmpiricalCovarianceEstimator_1yrFit'.
2025/06/13 18:41:29 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: DefaultEmpiricalCovarianceEstimator_1yrFit, version 4
Created version '4' of model 'DefaultEmpiricalCovarianceEstimator_1yrFit'.


🏃 View run IR_2025-06-11 at: http://127.0.0.1:8768/#/experiments/1513/runs/0616131d15f74bc4b84647e1d3927992
🧪 View experiment at: http://127.0.0.1:8768/#/experiments/1513
🏃 View run IR_2025-06-09 at: http://127.0.0.1:8768/#/experiments/1513/runs/acee579e06cc43d78f1dfd1b1f92c042
🧪 View experiment at: http://127.0.0.1:8768/#/experiments/1513
🏃 View run IR_2025-06-12 at: http://127.0.0.1:8768/#/experiments/1513/runs/eaac9ba79f4c48a18ec08e48cd590cb6
🧪 View experiment at: http://127.0.0.1:8768/#/experiments/1513


Registered model 'DefaultEmpiricalCovarianceEstimator_1yrFit' already exists. Creating a new version of this model...
Registered model 'DefaultEmpiricalCovarianceEstimator_1yrFit' already exists. Creating a new version of this model...
Registered model 'DefaultEmpiricalCovarianceEstimator_1yrFit' already exists. Creating a new version of this model...
Registered model 'DefaultEmpiricalCovarianceEstimator_1yrFit' already exists. Creating a new version of this model...
2025/06/13 18:41:36 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: DefaultEmpiricalCovarianceEstimator_1yrFit, version 5
Created version '5' of model 'DefaultEmpiricalCovarianceEstimator_1yrFit'.
2025/06/13 18:41:36 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: DefaultEmpiricalCovarianceEstimator_1yrFit, version 6
Created version '6' of model 'DefaultEmpiricalCovarianc

🏃 View run IR_2025-06-10 at: http://127.0.0.1:8768/#/experiments/1513/runs/68816f22febc404d97338521ed347e71
🧪 View experiment at: http://127.0.0.1:8768/#/experiments/1513
🏃 View run IR_2025-06-09 at: http://127.0.0.1:8768/#/experiments/1513/runs/adaa4b37423b4c60909c67af51f7bd4a
🧪 View experiment at: http://127.0.0.1:8768/#/experiments/1513
🏃 View run IR_2025-06-11 at: http://127.0.0.1:8768/#/experiments/1513/runs/565fa2c037a94f9bb6f745d7cd8d5890
🧪 View experiment at: http://127.0.0.1:8768/#/experiments/1513
🏃 View run IR_2025-06-12 at: http://127.0.0.1:8768/#/experiments/1513/runs/9aa424b4624648b68939ef2001f250bd
🧪 View experiment at: http://127.0.0.1:8768/#/experiments/1513
✅ All cones processed and logged.
🏃 View run populate_ir_cones_2025-06-13 at: http://127.0.0.1:8768/#/experiments/1513/runs/38823e546c8c45d3a6cae746d6c60b6d
🧪 View experiment at: http://127.0.0.1:8768/#/experiments/1513
using model: DefaultEmpiricalCovarianceEstimator_5yrFit
using model: DefaultEmpiricalCovarianceE

Registered model 'DefaultEmpiricalCovarianceEstimator_5yrFit' already exists. Creating a new version of this model...
Registered model 'DefaultEmpiricalCovarianceEstimator_5yrFit' already exists. Creating a new version of this model...
Registered model 'DefaultEmpiricalCovarianceEstimator_5yrFit' already exists. Creating a new version of this model...
Registered model 'DefaultEmpiricalCovarianceEstimator_5yrFit' already exists. Creating a new version of this model...
2025/06/13 18:41:46 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: DefaultEmpiricalCovarianceEstimator_5yrFit, version 17
Created version '17' of model 'DefaultEmpiricalCovarianceEstimator_5yrFit'.
2025/06/13 18:41:46 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: DefaultEmpiricalCovarianceEstimator_5yrFit, version 18
Created version '18' of model 'DefaultEmpiricalCovar

🏃 View run IR_2025-06-12 at: http://127.0.0.1:8768/#/experiments/1513/runs/504606ebae894b329d1a482c0e59428f
🧪 View experiment at: http://127.0.0.1:8768/#/experiments/1513
🏃 View run IR_2025-06-09 at: http://127.0.0.1:8768/#/experiments/1513/runs/69c503d6acb44356850a6618798bee36
🧪 View experiment at: http://127.0.0.1:8768/#/experiments/1513


Created version '20' of model 'DefaultEmpiricalCovarianceEstimator_5yrFit'.


🏃 View run IR_2025-06-10 at: http://127.0.0.1:8768/#/experiments/1513/runs/a024cf0851d14c71a6a794dd356a7f20
🧪 View experiment at: http://127.0.0.1:8768/#/experiments/1513
🏃 View run IR_2025-06-11 at: http://127.0.0.1:8768/#/experiments/1513/runs/c6cc844d515043fdb7ad2e934dc03ce2
🧪 View experiment at: http://127.0.0.1:8768/#/experiments/1513


Registered model 'DefaultEmpiricalCovarianceEstimator_5yrFit' already exists. Creating a new version of this model...
Registered model 'DefaultEmpiricalCovarianceEstimator_5yrFit' already exists. Creating a new version of this model...
Registered model 'DefaultEmpiricalCovarianceEstimator_5yrFit' already exists. Creating a new version of this model...
2025/06/13 18:41:53 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: DefaultEmpiricalCovarianceEstimator_5yrFit, version 21
Created version '21' of model 'DefaultEmpiricalCovarianceEstimator_5yrFit'.
Registered model 'DefaultEmpiricalCovarianceEstimator_5yrFit' already exists. Creating a new version of this model...
2025/06/13 18:41:54 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: DefaultEmpiricalCovarianceEstimator_5yrFit, version 22
Created version '22' of model 'DefaultEmpiricalCovar

🏃 View run IR_2025-06-09 at: http://127.0.0.1:8768/#/experiments/1513/runs/e2e8671c807d4e87af60743e761041b4
🧪 View experiment at: http://127.0.0.1:8768/#/experiments/1513
🏃 View run IR_2025-06-11 at: http://127.0.0.1:8768/#/experiments/1513/runs/5fdd265399e14e58a3222edc64668f81
🧪 View experiment at: http://127.0.0.1:8768/#/experiments/1513


Created version '23' of model 'DefaultEmpiricalCovarianceEstimator_5yrFit'.
2025/06/13 18:41:54 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: DefaultEmpiricalCovarianceEstimator_5yrFit, version 24
Created version '24' of model 'DefaultEmpiricalCovarianceEstimator_5yrFit'.


🏃 View run IR_2025-06-12 at: http://127.0.0.1:8768/#/experiments/1513/runs/8697b27ffa694437b88b5026953ca767
🧪 View experiment at: http://127.0.0.1:8768/#/experiments/1513
🏃 View run IR_2025-06-10 at: http://127.0.0.1:8768/#/experiments/1513/runs/26442d8532224c25bb208da2d0d8c22c
🧪 View experiment at: http://127.0.0.1:8768/#/experiments/1513
✅ All cones processed and logged.
🏃 View run populate_ir_cones_2025-06-13 at: http://127.0.0.1:8768/#/experiments/1513/runs/a7f5457856f14c069de0243934acbef9
🧪 View experiment at: http://127.0.0.1:8768/#/experiments/1513
