
# Option Viz — **Polygon Live Data** Notebook

This notebook fetches **real option chains from Polygon.io** and then runs your pipeline:
**midprice → PCP/forward → IVs → SVI fits/surface → BL density**, with plots.

> **Requirements**
> - Set `POLYGON_API_KEY` in your environment.
> - Internet access enabled when you run this locally.
> - Pick an equity ticker (e.g., `NVDA`, `AAPL`).

**Sections**
1. Bootstrap project modules → `src.*` packages  
2. Configure Polygon + risk-free provider  
3. Fetch expiries & select sample maturities  
4. Fetch chains → normalize → midprice & quality plots  
5. Forward estimation via PCP and k-grid  
6. Implied vols (prefer venue IV, else Black-76 from mid calls)  
7. SVI fits per expiry + smile plots  
8. IV surface sketch & calendar sanity  
9. BL density from smoothed call curve (PDF, CDF, moments)


## 1) Bootstrap modules into `src.*`

In [1]:
import sys, types, importlib.util
from pathlib import Path

BASE = Path("C:\\Users\\drewv\\Documents\\option-density-viz\\src")


def ensure_pkg(name: str):
    if name in sys.modules:
        return sys.modules[name]
    mod = types.ModuleType(name)
    mod.__package__ = name
    sys.modules[name] = mod
    return mod


for pkg in ["src", "src.data", "src.preprocess", "src.vol", "src.density"]:
    ensure_pkg(pkg)


def load_as(name: str, filepath: Path):
    spec = importlib.util.spec_from_file_location(name, filepath)
    mod = importlib.util.module_from_spec(spec)
    sys.modules[name] = mod
    assert spec and spec.loader, f"Cannot load spec for {name}"
    spec.loader.exec_module(mod)
    return mod


mapping = {
    # data
    "data/base.py": "src.data.base",
    "data/registry.py": "src.data.registry",
    "data/yf_fetcher.py": "src.data.yf_fetcher",
    "data/polygon_fetcher.py": "src.data.polygon_fetcher",
    "data/okx_fetcher.py": "src.data.okx_fetcher",
    "data/cache.py": "src.data.cache",
    "data/rate_limit.py": "src.data.rate_limit",
    "data/historical_loader.py": "src.data.historical_loader",
    "data/risk_free.py": "src.data.risk_free",
    "data/risk_free_fetchers.py": "src.data.risk_free_fetchers",
    # preprocess
    "preprocess/forward.py": "src.preprocess.forward",
    "preprocess/midprice.py": "src.preprocess.midprice",
    "preprocess/pcp.py": "src.preprocess.pcp",
    # vol
    "vol/no_arb.py": "src.vol.no_arb",
    "vol/svi.py": "src.vol.svi",
    "vol/surface.py": "src.vol.surface",
    # density
    "density/bl.py": "src.density.bl",
    "density/cdf.py": "src.density.cdf",
}
loaded = {}
for fname, modname in mapping.items():
    try:
        loaded[modname] = load_as(modname, BASE / fname)
        print(f"OK  | {modname} <- {fname}")
    except Exception as e:
        print(f"FAIL| {modname}: {e}")
print(f"Loaded {len(loaded)} modules.")

OK  | src.data.base <- data/base.py
FAIL| src.data.registry: No module named 'src.data.okx_fetcher'; 'src.data' is not a package


python-dotenv could not parse statement starting at line 6


OK  | src.data.yf_fetcher <- data/yf_fetcher.py
OK  | src.data.polygon_fetcher <- data/polygon_fetcher.py
OK  | src.data.okx_fetcher <- data/okx_fetcher.py
OK  | src.data.cache <- data/cache.py
OK  | src.data.rate_limit <- data/rate_limit.py
OK  | src.data.historical_loader <- data/historical_loader.py
OK  | src.data.risk_free <- data/risk_free.py
OK  | src.data.risk_free_fetchers <- data/risk_free_fetchers.py
OK  | src.preprocess.forward <- preprocess/forward.py
OK  | src.preprocess.midprice <- preprocess/midprice.py
OK  | src.preprocess.pcp <- preprocess/pcp.py
OK  | src.vol.no_arb <- vol/no_arb.py
OK  | src.vol.svi <- vol/svi.py
OK  | src.vol.surface <- vol/surface.py
OK  | src.density.bl <- density/bl.py
OK  | src.density.cdf <- density/cdf.py
Loaded 17 modules.


## 2) Imports & configuration

In [3]:
import os, math, asyncio, numpy as np, pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime, timezone

# Core types & IO
from src.data.base import OptionChain
from src.data.historical_loader import chain_to_dataframe

# Fetchers & rate
from src.data.polygon_fetcher import PolygonFetcher
from src.data.risk_free import (
    RiskFreeConfig,
    RiskFreeProvider,
    cont_to_simple,
    simple_to_cont,
    cont_to_apy,
)

# Processing
from src.preprocess.midprice import add_midprice_columns
from src.preprocess.pcp import pcp_residual
from src.preprocess.forward import (
    yearfrac,
    estimate_forward_from_chain,
    log_moneyness,
)

# Vol
from src.vol.no_arb import butterfly_violations

try:
    from src.vol.no_arb import calendar_violations
except ImportError:
    calendar_violations = None
from src.vol.svi import calibrate_svi_from_quotes, svi_total_variance
from src.vol.surface import _implied_vol_black76_call

# Density
from src.density.bl import bl_pdf_from_calls
from src.density.cdf import build_cdf, moments_from_pdf

# ---------- User config ----------
TICKER = os.getenv("OVIZ_TICKER", "NVDA")
NUM_EXPIRIES = int(
    os.getenv("OVIZ_NUM_EXPIRIES", "3")
)  # how many nearby expiries to analyze
WIDE_REL = 0.20  # midprice wide threshold
DEFAULT_R = float(
    os.getenv("OVIZ_R_CONT", "0.045")
)  # fallback continuous rate

assert os.getenv(
    "POLYGON_API_KEY"
), "POLYGON_API_KEY is required in your environment."
print("Ticker:", TICKER, "| Expiries to analyze:", NUM_EXPIRIES)

Ticker: NVDA | Expiries to analyze: 3


## 3) Fetch expiries & select sample maturities

In [4]:
async def get_expiries(sym: str):
    f = PolygonFetcher()
    exps = await f.list_expiries(sym)
    await f.client.aclose()
    # choose the nearest NUM_EXPIRIES expiries >= 10d out
    today = datetime.now(timezone.utc).date()
    exps_sorted = sorted([e for e in exps if (e.date() - today).days >= 10])
    return (
        exps_sorted[:NUM_EXPIRIES]
        if exps_sorted
        else sorted(exps)[:NUM_EXPIRIES]
    )


expiries = asyncio.run(get_expiries(TICKER))
print("Chosen expiries:", [e.date().isoformat() for e in expiries])
assert expiries, "No expiries returned. Check API key/ticker."

RuntimeError: asyncio.run() cannot be called from a running event loop

## 4) Fetch chains → normalize → midprice & quality plots

In [5]:
async def fetch_chain(sym: str, expiry_dt: datetime) -> OptionChain:
    f = PolygonFetcher()
    ch = await f.fetch_chain(sym, expiry_dt)
    await f.client.aclose()
    return ch


chains = [asyncio.run(fetch_chain(TICKER, e)) for e in expiries]
dfs = [chain_to_dataframe(ch) for ch in chains]

# Add midprice diagnostics
dfc = []
for df in dfs:
    out = add_midprice_columns(
        df.rename(columns={"type": "opt_type"}).rename(
            columns={"opt_type": "type"}
        ),
        wide_rel_threshold=WIDE_REL,
    )
    dfc.append(out)

# Plot quick quality histograms for first expiry
df0 = dfc[0]
finite_rel = df0["rel_spread"].replace([np.inf, -np.inf], np.nan).dropna()
finite_mid = df0["mid"].replace([np.inf, -np.inf], np.nan).dropna()

plt.figure(figsize=(7.5, 4.5))
plt.title("Midprice distribution (first expiry)")
plt.hist(finite_mid.values, bins=30)
plt.xlabel("mid")
plt.ylabel("count")
plt.show()

plt.figure(figsize=(7.5, 4.5))
plt.title("Relative spread distribution (first expiry)")
plt.hist(finite_rel.values, bins=30)
plt.xlabel("rel_spread")
plt.ylabel("count")
plt.show()

print("Rows per expiry:", [len(d) for d in dfc])

NameError: name 'expiries' is not defined

## 5) Estimate forward via PCP and build k-grid

In [None]:
rf = RiskFreeProvider(RiskFreeConfig(default_rate=DEFAULT_R))

for i, (exp, df) in enumerate(zip(expiries, dfc), start=1):
    T = yearfrac(datetime.now(timezone.utc), exp)
    r_cont = rf.get_rate(
        datetime.now(timezone.utc), tenor_days=max(1, int(T * 365))
    )
    F = estimate_forward_from_chain(df, r=r_cont, T=T, price_col="mid")
    k = log_moneyness(df["strike"].values, F)
    print(f"[{i}] {exp.date()}  T={T:.4f}  r={r_cont:.4f}  F≈{F:.4f}")

## 6) Implied vols — prefer provided IV, else solve from mid calls

In [None]:
def compute_iv_series(df, F, T, r):
    # Prefer 'iv' column if present and finite
    if "iv" in df.columns and np.isfinite(df["iv"].astype(float)).any():
        iv = pd.to_numeric(df["iv"], errors="coerce")
        return iv.values.astype(float)
    # Else solve from call mids only to keep it simple
    Df = math.exp(-r * T)
    ivs = []
    for _, row in df.iterrows():
        if (
            row["type"] == "C"
            and row["mid"]
            and row["mid"] > 0
            and row["strike"] > 0
        ):
            ivs.append(
                _implied_vol_black76_call(
                    C=float(row["mid"]),
                    F=float(F),
                    K=float(row["strike"]),
                    T=float(T),
                    Df=float(Df),
                )
            )
        else:
            ivs.append(np.nan)
    return np.array(ivs, dtype=float)


iv_grids = []
for exp, df in zip(expiries, dfc):
    T = yearfrac(datetime.now(timezone.utc), exp)
    r = rf.get_rate(
        datetime.now(timezone.utc), tenor_days=max(1, int(T * 365))
    )
    F = estimate_forward_from_chain(df, r=r, T=T, price_col="mid")
    iv = compute_iv_series(df, F, T, r)
    k = log_moneyness(df["strike"].values, F)
    mask = np.isfinite(iv) & np.isfinite(k)
    iv_grids.append((exp, k[mask], iv[mask], T, F, r))

# Plot smiles (IV vs k) for each expiry
for exp, k, iv, T, F, r in iv_grids:
    plt.figure(figsize=(7.5, 4.5))
    plt.title(f"Smile (IV vs k) — {exp.date()}")
    plt.scatter(k, iv, s=12)
    plt.xlabel("k = ln(K/F)")
    plt.ylabel("implied vol")
    plt.show()

## 7) SVI fits per expiry — observed vs fitted total variance

In [None]:
svi_fits = []
for exp, k, iv, T, F, r in iv_grids:
    w = iv**2 * T
    fit = calibrate_svi_from_quotes(k=k, w=w, T=T)
    w_fit = svi_total_variance(k, *fit.params)
    svi_fits.append((exp, k, w, w_fit, T, fit))
    plt.figure(figsize=(7.5, 4.5))
    plt.title(f"SVI fit — {exp.date()}")
    plt.scatter(k, w, s=12, label="observed w")
    plt.plot(k, w_fit, label="SVI w(k)")
    plt.xlabel("k")
    plt.ylabel("total variance w")
    plt.legend()
    plt.show()

# Print params
for exp, k, w, w_fit, T, fit in svi_fits:
    print(
        f"{exp.date()} params:",
        fit.params,
        "loss:",
        fit.loss,
        "n_used:",
        fit.n_used,
    )

## 8) Surface sketch — calendar monotonicity spot check

In [None]:
# Choose a common k-grid (intersection) to compare w across expiries
if len(svi_fits) >= 2:
    k_common = None
    for _, k, _, _, _, _ in svi_fits:
        k_round = np.round(k, 3)
        k_set = set(k_round)
        k_common = k_set if k_common is None else (k_common & k_set)
    k_common = np.array(sorted(list(k_common))) if k_common else None

    if k_common is not None and len(k_common) >= 5:
        plt.figure(figsize=(7.5, 4.5))
        plt.title("SVI w(k) across maturities (common k)")
        for exp, k, _, _, T, fit in svi_fits:
            w_line = svi_total_variance(k_common, *fit.params)
            plt.plot(k_common, w_line, label=f"{exp.date()} (T={T:.2f})")
        plt.xlabel("k")
        plt.ylabel("w(k,T)")
        plt.legend()
        plt.show()

## 9) BL density — recover PDF/CDF from a smoothed call curve

In [None]:
# Use the *first* expiry for a density demo (calls only)
exp, df = expiries[0], dfc[0]
T = yearfrac(datetime.now(timezone.utc), exp)
r = rf.get_rate(datetime.now(timezone.utc), tenor_days=max(1, int(T * 365)))
F = estimate_forward_from_chain(df, r=r, T=T, price_col="mid")

# Build a discounted call curve C(K) on a cleaned grid
kk = df["strike"].values.astype(float)
cc = df["mid"].values.astype(float)
mask = np.isfinite(kk) & np.isfinite(cc) & (cc > 0) & (kk > 0)
K = kk[mask]
C = cc[mask]
# Discount to undiscounted forward numeraire if needed; our bl implementation expects undiscounted C or handles exp(rT) internally.
K_grid, pdf, _ = bl_pdf_from_calls(K, C, T=T, r=r, grid_n=801)
Kg, cdf = build_cdf(K_grid, np.maximum(pdf, 0.0))
mom = moments_from_pdf(Kg, np.maximum(pdf, 0.0))

plt.figure(figsize=(7.5, 4.5))
plt.title(f"Recovered PDF — {exp.date()}")
plt.plot(K_grid, np.maximum(pdf, 0.0))
plt.xlabel("K")
plt.ylabel("pdf(K)")
plt.show()

plt.figure(figsize=(7.5, 4.5))
plt.title(f"CDF — {exp.date()}")
plt.plot(Kg, cdf)
plt.xlabel("K")
plt.ylabel("CDF(K)")
plt.show()

print("RN moments (mean, var, skew, kurt_ex):", mom)