In [1]:
import itertools
from pathlib import Path

import pandas as pd

from portfolio_factor_allocation.backtesting import bootstrap_studentized_sharpe_diff

In [7]:
data_dir = Path.cwd().parent / "data"
returns_dir = data_dir / "returns"
bootstrap_dir = data_dir / "bootstrap"
studentized_dir = bootstrap_dir / "studentized" / "sharpe"

studentized_dir.mkdir(parents=True, exist_ok=True)

### Load Data

In [8]:
df_market = pd.read_csv(data_dir / "returns" / "market.csv", parse_dates=["date"])
df_market["date"] = df_market["date"].dt.to_period("M")
df_monthly_rf = pd.read_csv(data_dir / "monthly_rf.csv", parse_dates=["date"])
df_monthly_rf["date"] = df_monthly_rf["date"].dt.to_period("M")

market_series = df_market.set_index("date")["market"]
rf_series = df_monthly_rf.set_index("date")["rf"]

### Bootstrap Parameters

In [9]:
factors = ["V", "W", "C", "R", "L"]
factor_combs = [
    list(c)
    for r in range(2, len(factors) + 1)
    for c in itertools.combinations(factors, r)
]

In [10]:
suffix_groups = ["ter", "dec", "bw"]
block_size = 5
n_sim = 1000
seed = 42

### Compute Studentized Statistics

In [11]:
for factor_comb in factor_combs:
    factor_str = "_".join(factor_comb)

    # Load strategy returns
    df_r = pd.read_csv(returns_dir / f"{factor_str}.csv")
    df_r["date"] = pd.to_datetime(df_r["date"]).dt.to_period("M")
    df_r = df_r.set_index("date")

    # Align with risk-free rate (and market, though not strictly needed for Sharpe diff unless we want to be consistent)
    common_index = df_r.index.intersection(market_series.index).intersection(
        rf_series.index
    )

    aligned_r = df_r.loc[common_index]
    aligned_rf = rf_series.loc[common_index]

    # Calculate Excess Returns
    excess_r = aligned_r.sub(aligned_rf, axis=0)

    results_data = {}

    for group in suffix_groups:
        col_int = f"{factor_str}_int_{group}"
        col_mix = f"{factor_str}_mix_{group}"

        r_i = excess_r[col_int]
        r_n = excess_r[col_mix]

        # Compute studentized statistics
        d_tilde_stars = bootstrap_studentized_sharpe_diff(
            r_i, r_n, block_size, n_sim, seed=seed
        )

        results_data[group] = d_tilde_stars

    df_results = pd.DataFrame(results_data)
    output_path = studentized_dir / f"{factor_str}.csv"
    df_results.to_csv(output_path, index=False)

### Hypothesis testing