# 02 - Baseline Hedge Strategies

This notebook runs core baseline strategies and compares their out-of-sample hedge quality on one walk-forward split.

In [None]:
from __future__ import annotations

import sys
from pathlib import Path

import pandas as pd
import yaml

PROJECT_ROOT = Path.cwd()
if not (PROJECT_ROOT / "src").exists() and (PROJECT_ROOT.parent / "src").exists():
    PROJECT_ROOT = PROJECT_ROOT.parent
SRC_ROOT = PROJECT_ROOT / "src"
if str(SRC_ROOT) not in sys.path:
    sys.path.insert(0, str(SRC_ROOT))

from costs.transaction_costs import TransactionCostConfig
from data.loaders import load_proxy_hedging_prices
from data.preprocess import MissingDataPolicy, preprocess_prices_and_returns
from evaluation.metrics import evaluate_hedge_performance
from hedging.engine import HedgeConstraints, run_hedging_engine
from models.baselines import RidgeHedgeEstimator, RollingOLSHedgeEstimator, StaticOLSHedgeEstimator


In [None]:
cfg = yaml.safe_load((PROJECT_ROOT / "src/config/default.yaml").read_text(encoding="utf-8"))
data_cfg = cfg["data"]
target_name = data_cfg["target"]["name"]
proxy_names = [item["name"] for item in data_cfg["proxies"]]
proxy_files = {item["name"]: item["file"] for item in data_cfg["proxies"]}

prices = load_proxy_hedging_prices(
    raw_dir=PROJECT_ROOT / data_cfg["raw_dir"],
    target_file=data_cfg["target"]["file"],
    proxy_files=proxy_files,
    target_name=target_name,
    date_column=data_cfg.get("date_column", "Date"),
    price_column=data_cfg.get("price_column", "Price"),
)

_, simple_returns, _ = preprocess_prices_and_returns(
    prices=prices,
    frequency=cfg.get("frequency", "B"),
    start_date=cfg.get("date_range", {}).get("start"),
    end_date=cfg.get("date_range", {}).get("end"),
    missing_data_policy=MissingDataPolicy(**cfg.get("missing_data_policy", {})),
)
returns = simple_returns.dropna()

split = cfg["walk_forward"]["splits"][0]
train = returns.loc[split["train_start"]:split["train_end"]]
val = returns.loc[split["val_start"]:split["val_end"]]
test = returns.loc[split["test_start"]:split["test_end"]]
train_val = pd.concat([train, val]).sort_index()

cost_cfg = TransactionCostConfig.from_dict(cfg["transaction_costs"])
constraints = HedgeConstraints(**cfg["hedging"]["constraints"])
rebalance_frequency = cfg["hedging"].get("rebalance_frequency", "daily")
scenario = cfg["hedging"].get("cost_scenario", "med")


In [None]:
results = {}
single_proxy = cfg["models"].get("single_proxy", proxy_names[0])

# 1) Static OLS (single proxy)
static_model = StaticOLSHedgeEstimator(target_column=target_name, proxy_columns=(single_proxy,)).fit(train_val[[target_name, single_proxy]])
static_ratios = static_model.hedge_ratio_time_series(index=test.index)
static_engine = run_hedging_engine(
    target_returns=test[target_name],
    proxy_returns=test[[single_proxy]],
    hedge_ratios=static_ratios,
    rebalance_frequency=rebalance_frequency,
    constraints=constraints,
    cost_config=cost_cfg,
    cost_scenario=scenario,
)
results["single_static_ols"] = static_engine

# 2) Rolling OLS (single proxy)
rolling_model = RollingOLSHedgeEstimator(
    target_column=target_name,
    proxy_columns=(single_proxy,),
    window=cfg["models"]["rolling_ols"].get("window", 60),
    refit_frequency=cfg["models"]["rolling_ols"].get("refit_frequency", 5),
).fit(pd.concat([train, val, test])[[target_name, single_proxy]])
rolling_ratios = rolling_model.hedge_ratio_time_series(index=test.index).fillna(0.0)
rolling_engine = run_hedging_engine(
    target_returns=test[target_name],
    proxy_returns=test[[single_proxy]],
    hedge_ratios=rolling_ratios,
    rebalance_frequency=rebalance_frequency,
    constraints=constraints,
    cost_config=cost_cfg,
    cost_scenario=scenario,
)
results["single_rolling_ols"] = rolling_engine

# 3) Multi-proxy ridge
ridge_model = RidgeHedgeEstimator(
    target_column=target_name,
    proxy_columns=tuple(proxy_names),
    alphas=tuple(cfg["models"]["ridge"].get("alphas", [1e-4, 1e-3, 1e-2, 1e-1])),
    cv_splits=cfg["models"]["ridge"].get("cv_splits", 5),
).fit(train_val[[target_name, *proxy_names]])
ridge_ratios = ridge_model.hedge_ratio_time_series(index=test.index)
ridge_engine = run_hedging_engine(
    target_returns=test[target_name],
    proxy_returns=test[proxy_names],
    hedge_ratios=ridge_ratios,
    rebalance_frequency=rebalance_frequency,
    constraints=constraints,
    cost_config=cost_cfg,
    cost_scenario=scenario,
)
results["multi_proxy_ridge"] = ridge_engine


In [None]:
rows = []
for method, res in results.items():
    m = evaluate_hedge_performance(
        unhedged_pnl=res.unhedged_pnl,
        hedged_pnl_gross=res.hedged_pnl_gross,
        hedged_pnl_net=res.hedged_pnl_net,
        turnover=res.turnover,
        transaction_cost=res.transaction_cost,
    )
    m["method"] = method
    rows.append(m)

summary = pd.DataFrame(rows).set_index("method").sort_values("hedge_effectiveness", ascending=False)
summary[["hedge_effectiveness", "tracking_error", "hedged_net_sharpe_annualized_no_rf", "total_transaction_cost"]]


## Interpretation Notes

- Static OLS is transparent and stable but may underperform under regime shifts.
- Rolling OLS adapts to local structure at the cost of higher turnover and costs.
- Ridge tends to improve stability under multi-collinearity in multi-proxy sets.