# Option Viz — SVI Calibration Test

This notebook fetches an option chain (equity via **yfinance** or crypto via **OKX**),
runs preprocessing, and calibrates an **SVI** smile:

1. Fetch chain → DataFrame
2. Robust mids + flags (crossed / wide)
3. Put–Call Parity diagnostics and **forward** estimation
4. Build `(k, w, weights)` and **fit SVI**
5. Quick plots + butterfly check

> If imports fail, ensure the repository `src/` is on `PYTHONPATH` (the next cell does this).

In [None]:
# --- Setup: ensure src/ is importable; show versions ---
import sys, os
from pathlib import Path

ROOT = Path.cwd()
SRC = ROOT / "src"
if str(SRC) not in sys.path:
    sys.path.append(str(SRC))
print("Using SRC:", SRC)
import pandas as pd, numpy as np, matplotlib

print(
    "pandas:",
    pd.__version__,
    "numpy:",
    np.__version__,
    "matplotlib:",
    matplotlib.__version__,
)

## Select Backend & Underlying

- For **equity**, try: `AAPL`, `SPY`
- For **crypto**, try: `BTC`, `ETH`

In [None]:
asset_class = "equity"  # "equity" or "crypto"
underlying = "AAPL"  # e.g., "AAPL" or "SPY", "BTC" or "ETH"
expiry_index = 0  # nearest expiry; adjust to avoid illiquid chains

# Preprocess thresholds
WIDE_REL_THRESHOLD = 0.15  # (ask-bid)/mid > 15% → wide
DAY_COUNT = 365.25  # year fraction denominator

## Fetch a Chain and Convert to DataFrame

In [None]:
import asyncio


def arun(coro):
    """Run a coroutine in notebooks (handles existing event loop)."""
    try:
        loop = asyncio.get_running_loop()
    except RuntimeError:
        return asyncio.run(coro)
    else:
        try:
            import nest_asyncio

            nest_asyncio.apply(loop)
            return loop.run_until_complete(coro)
        except Exception as e:
            raise RuntimeError(
                "An asyncio loop is running and nest_asyncio is not available.\n"
                "Install with: pip install nest_asyncio"
            ) from e


from data.registry import get_fetcher
from data.historical_loader import chain_to_dataframe

fetcher = get_fetcher(asset_class)
expiries = arun(fetcher.list_expiries(underlying))
assert expiries, f"No expiries for {underlying} ({asset_class})"
expiries_sorted = sorted(expiries)
expiry = expiries_sorted[min(expiry_index, len(expiries_sorted) - 1)]
print(
    f"Selected expiry: {expiry.date().isoformat()}  | total expiries: {len(expiries_sorted)}"
)
chain = arun(fetcher.fetch_chain(underlying, expiry))
print(
    f"Spot: {chain.spot} | Quotes: {len(chain.quotes)} | AsOf: {chain.asof_utc}"
)

try:
    df = chain_to_dataframe(chain)
except Exception:
    # Fallback conversion if helper is unavailable
    rows = []
    for q in chain.quotes:
        rows.append(
            {
                "symbol": q.symbol,
                "underlying": q.underlying,
                "asset_class": q.asset_class,
                "expiry": q.expiry,
                "strike": q.strike,
                "type": getattr(q, "opt_type", getattr(q, "type", None)),
                "bid": q.bid,
                "ask": q.ask,
                "last": q.last,
                "mark": q.mark,
                "volume": q.volume,
                "open_interest": q.open_interest,
                "contract_size": q.contract_size,
                "underlying_ccy": q.underlying_ccy,
                "quote_ccy": q.quote_ccy,
                "iv": getattr(q, "extra", {}).get("iv", None),
            }
        )
    import pandas as pd

    df = pd.DataFrame(rows)
print("DataFrame shape:", df.shape)
df.head()

## Preprocess: mids, PCP, forward, and k = ln(K/F)

In [None]:
from preprocess.midprice import add_midprice_columns
from preprocess.pcp import add_pcp_diagnostics
from preprocess.forward import estimate_forward_from_chain
from data.risk_free import RiskFreeProvider, RiskFreeConfig
import numpy as np

# Robust mids + flags
dfp = add_midprice_columns(
    df,
    bid_col="bid",
    ask_col="ask",
    out_prefix="",
    wide_rel_threshold=WIDE_REL_THRESHOLD,
)
print(
    "Both sides:",
    int((dfp["side_used"] == "both").sum()),
    "| Crossed:",
    int(dfp["crossed"].sum()),
    "| Wide:",
    int(dfp["wide"].sum()),
)

# Risk-free and year fraction
asof = getattr(chain, "asof_utc", None)
T = max((expiry - asof).total_seconds() / (DAY_COUNT * 24 * 3600), 1e-8)
rf = RiskFreeProvider(RiskFreeConfig(default_rate=0.05, forward_fill=True))
r = rf.get_rate(asof.date()) if hasattr(asof, "date") else 0.05
print(f"T ~ {T:.6f} years | r ~ {r:.4f}")

# Ensure we have a 'type' column with 'C'/'P'
if "type" not in dfp.columns and "opt_type" in dfp.columns:
    dfp["type"] = dfp["opt_type"]

# PCP diagnostics (optional display)
diag = add_pcp_diagnostics(
    dfp,
    spot=chain.spot or np.nan,
    r=r,
    T=T,
    price_col="mid",
    type_col="type",
    strike_col="strike",
)
diag.head()

# Forward estimate for this expiry
F_est = estimate_forward_from_chain(
    dfp,
    r=r,
    T=T,
    price_col="mid",
    type_col="type",
    strike_col="strike",
    spot_hint=chain.spot,
)
print(f"Estimated forward F ≈ {F_est:.6f}")

# Add log-moneyness for convenience
dfp["k"] = np.log(dfp["strike"].astype(float) / F_est)
dfp[["strike", "type", "mid", "rel_spread", "k"]].head()

## Build (k, w, weights) and Fit SVI

In [None]:
from vol.svi import prepare_smile_data, fit_svi, svi_w

k, w, wts = prepare_smile_data(
    dfp,
    T=T,
    r=r,
    F=F_est,
    price_col="mid",
    type_col="type",
    strike_col="strike",
    iv_col="iv",
    use_flags=True,
)
print(f"Data for SVI: n={len(k)} points")
assert (
    len(k) > 6
), "Not enough valid points to fit SVI. Try a different expiry."

fit = fit_svi(k, w, weights=wts)
print("SVI params:", fit)

k_grid = np.linspace(float(np.min(k)), float(np.max(k)), 201)
w_fit = svi_w(k_grid, *fit.as_tuple())
iv_points = np.sqrt(w / T)
iv_fit = np.sqrt(np.maximum(w_fit, 0.0) / T)

## Plot: Implied Vols and Fitted SVI

In [None]:
import matplotlib.pyplot as plt

fig = plt.figure(figsize=(7, 4))
ax = fig.gca()
ax.scatter(k, iv_points, s=12, label="Market IV")
ax.plot(k_grid, iv_fit, label="SVI fit")
ax.set_xlabel("k = ln(K/F)")
ax.set_ylabel("Implied vol")
ax.set_title(
    f"SVI fit — {underlying} @ {expiry.date().isoformat()} ({asset_class})"
)
ax.legend()
plt.show()

## Butterfly Check (Convexity of Call Prices vs Strike)

In [None]:
from vol.no_arb import butterfly_violations

df_calls = dfp[dfp["type"].astype(str).str.upper().str[0] == "C"].copy()
K = df_calls["strike"].astype(float).to_numpy()
C = df_calls["mid"].astype(float).to_numpy()
diag_bfly = butterfly_violations(K, C, tol=1e-10)
print(
    "Butterfly violations: ",
    diag_bfly["count"],
    "/",
    diag_bfly["n_interior"],
    " (fraction=",
    round(diag_bfly["fraction"], 4),
    ")",
)

## Save Artifacts (CSV + PNG)

In [None]:
from pathlib import Path

OUT = Path("out")
OUT.mkdir(exist_ok=True, parents=True)

csv_path = (
    OUT
    / f"{underlying}_{expiry.date().isoformat()}_{asset_class}_svi_input.csv"
)
dfp.to_csv(csv_path, index=False)
print("Saved:", csv_path)

fig_path = (
    OUT / f"{underlying}_{expiry.date().isoformat()}_{asset_class}_svi_fit.png"
)
try:
    fig.savefig(fig_path, dpi=140, bbox_inches="tight")
    print("Saved:", fig_path)
except Exception as e:
    print("Plot save failed:", e)