In [18]:
from pathlib import Path

import numpy as np
import pandas as pd

from portfolio_factor_allocation.backtesting import calculate_strategy_returns

In [19]:
data_dir = Path.cwd().parent / "data"
strategies_dir = data_dir / "strategies"
returns_dir = data_dir / "returns"

### Load data

Retrieve sample df_weights to be able to remove unnecessary companies from df_crsp

In [12]:
df_weights = pd.read_csv(strategies_dir / "V_W.csv")

In [13]:
df_crsp = pd.read_csv(data_dir / "crsp_raw.csv", parse_dates=["date"])
df_crsp["date"] = df_crsp["date"].dt.to_period("M")

# Filter for companies in df_weights
valid_permnos = df_weights["PERMNO"].unique()
df_crsp = df_crsp[df_crsp["PERMNO"].isin(valid_permnos)]

# Drop columns
df_crsp = df_crsp.drop(columns=["SHRCD", "EXCHCD"], errors="ignore")

# Drop non-numeric RET
df_crsp["RET"] = pd.to_numeric(df_crsp["RET"], errors="coerce")
df_crsp = df_crsp.dropna(subset=["RET"])

# # Remove duplicates, keeping the first occurrence
# df_crsp = df_crsp.drop_duplicates(subset=["PERMNO", "date"])

df_crsp.to_csv(data_dir / "monthly_returns.csv", index=False)

### Compute returns

In [20]:
ID_COLS = ["PERMNO", "date", "tic", "conm"]
factor_combs = [["V", "W"]]
suffixes = [
    "_int_ter",
    "_mix_ter",
    "_int_dec",
    "_mix_dec",
    "_int_bw",
    "_mix_bw",
]
year_shift = 1
n_month_rebalance = 1
r_f = 0.0
monthly_returns = df_crsp

In [21]:
df_weights = pd.read_csv(strategies_dir / "market.csv")

df_market_r = calculate_strategy_returns(
    df_weights, monthly_returns, year_shift, n_month_rebalance, ["market"]
)

df_market_r.to_csv(returns_dir / "market.csv", index=False)

In [23]:
sharpe_data = {}
ir_data = {}

# Ensure market returns are indexed by date for alignment
market_series = df_market_r.set_index("date")["market"]

for factor_comb in factor_combs:
    factor_str = "_".join(factor_comb)
    weights_cols = [factor_str + s for s in suffixes]
    df_weights = pd.read_csv(strategies_dir / f"{factor_str}.csv")

    df_r = calculate_strategy_returns(
        df_weights, monthly_returns, year_shift, n_month_rebalance, weights_cols
    )

    df_r.to_csv(returns_dir / f"{factor_str}.csv", index=False)

    # Calculate stats
    df_r_indexed = df_r.set_index("date")

    # Align with market
    aligned_r, aligned_m = df_r_indexed.align(market_series, join="inner", axis=0)

    sharpes = {}
    irs = {}

    for col, suffix in zip(weights_cols, suffixes):
        # Returns for this strategy
        r = aligned_r[col]

        # Sharpe
        if r.std() == 0:
            sharpe = np.nan
        else:
            sharpe = ((r - r_f).mean() / r.std()) * np.sqrt(12)
        sharpes[suffix] = sharpe

        # IR
        active_r = r - aligned_m
        if active_r.std() == 0:
            ir = np.nan
        else:
            ir = (active_r.mean() / active_r.std()) * np.sqrt(12)
        irs[suffix] = ir

    sharpe_data[factor_str] = sharpes
    ir_data[factor_str] = irs

df_sharpe = pd.DataFrame(sharpe_data)
df_ir = pd.DataFrame(ir_data)

df_sharpe.to_csv(data_dir / "sharpe_summary.csv")
df_ir.to_csv(data_dir / "ir_summary.csv")