In [36]:
import itertools
from pathlib import Path

import numpy as np
import pandas as pd

from portfolio_factor_allocation.scoring import append_avg_score, yearly_score
from portfolio_factor_allocation.weighting import (
    bw_portfolio_weights,
    factor_adjusted_weights,
    percentile_portfolio_weights,
)

In [37]:
data_dir = Path.cwd().parent / "data"
strategies_dir = data_dir / "active_strategies"

strategies_dir.mkdir(parents=True, exist_ok=True)

In [38]:
df_factors = pd.read_csv(data_dir / "factors.csv")
factor_rename = {
    "ret_geo": "W",
    "vol_36m": "L",
    "value": "V",
    "investment": "C",
    "profitability": "R",
}
df_factors = df_factors.rename(columns=factor_rename)

In [39]:
df_returns = pd.read_csv(data_dir / "monthly_returns.csv", parse_dates=["date"])
df_returns["date"] = pd.to_datetime(df_returns["date"])

In [40]:
factors = ["V", "W", "C", "R", "L"]
factor_combs = [
    list(c)
    for r in range(2, len(factors) + 1)
    for c in itertools.combinations(factors, r)
]

In [41]:
ID_COLS = ["PERMNO", "date", "tic", "conm"]
p_dict = {
    2: 0.2,
    3: 0.1225,
    4: 0.1,
    5: 0.0875,
}
p_dec = 1 / 10
p_ter = 1 / 3
power_dict = {
    2: 2,
    3: 3,
    4: 5,
    5: 8,
}
percentile_suffixes = ["_ter", "_dec"]
method_percentile = "rank"
method_bw = "z"
method_te = "z"
int_factor_name = "int"
n_subportfolios = 20
high_multiplier = 1.95
increment = 0.1
tracking_error = 0.02
tracking_error_dict = {
    2: 3.0,
    3: 3.5,
    4: 3.8,
    5: 4.0,
}

In [42]:
for factor_comb in factor_combs:
    # Initialize df_weights with ID columns
    df_weights = df_factors[ID_COLS].copy()

    ### TER & DEC ###
    # Compute percentile weight scores
    df_score = yearly_score(
        df_factors,
        factor_comb,
        method_percentile,
    )

    # Compute integrated score
    df_score = append_avg_score(df_score, factor_comb, method_percentile)

    factor_weights = [1 / len(factor_comb)] * len(factor_comb)
    factor_num = len(factor_comb)

    ### DEC ###

    # Compute integrated percentile weights
    df_w = percentile_portfolio_weights(
        df_score,
        [int_factor_name],
        method_percentile,
        p_dec,
    )
    df_w.columns = ["date", "_".join(factor_comb) + "_int"]

    # Compute temporary factor percentile weights for mixed calculation
    temp = percentile_portfolio_weights(
        df_score,
        factor_comb,
        method_percentile,
        p_dec,
    )

    # Compute mixed percentile weights
    df_w["_".join(factor_comb) + "_mix"] = factor_adjusted_weights(
        temp, factor_comb, factor_weights, method_percentile
    )

    # Rename columns with suffix and drop date (already in df_weights)
    df_w = df_w.drop(columns=["date"]).add_suffix("_dec")

    # Concatenate to main df
    df_weights = pd.concat([df_weights, df_w], axis=1)

    ### TER ###

    # Compute integrated percentile weights
    df_w = percentile_portfolio_weights(
        df_score,
        [int_factor_name],
        method_percentile,
        p_ter,
    )
    df_w.columns = ["date", "_".join(factor_comb) + "_int"]

    # Compute temporary factor percentile weights for mixed calculation
    temp = percentile_portfolio_weights(
        df_score,
        factor_comb,
        method_percentile,
        p_dict[factor_num],
    )

    # Compute mixed percentile weights
    df_w["_".join(factor_comb) + "_mix"] = factor_adjusted_weights(
        temp, factor_comb, factor_weights, method_percentile
    )

    # Rename columns with suffix and drop date (already in df_weights)
    df_w = df_w.drop(columns=["date"]).add_suffix("_ter")

    # Concatenate to main df
    df_weights = pd.concat([df_weights, df_w], axis=1)

    ### BW ###

    # Compute BW weight scores
    df_score = yearly_score(
        df_factors,
        factor_comb,
        method_bw,
    )

    # Compute integrated score
    df_score = append_avg_score(df_score, factor_comb, method_bw)

    # Compute integrated BW weights
    df_w = bw_portfolio_weights(
        df_score,
        [int_factor_name],
        method_bw,
        n_subportfolios,
        high_multiplier,
        increment,
    )
    df_w.columns = ["date", "_".join(factor_comb) + "_int"]

    # Compute temporary factor BW weights for mixed calculation
    temp = bw_portfolio_weights(
        df_score,
        factor_comb,
        method_bw,
        n_subportfolios,
        high_multiplier,
        increment,
        power_dict[factor_num],
    )

    # Compute mixed BW weights
    df_w["_".join(factor_comb) + "_mix"] = factor_adjusted_weights(
        temp, factor_comb, factor_weights, method_bw
    )

    # Rename columns with suffix and drop date
    df_w = df_w.drop(columns=["date"]).add_suffix("_bw")

    # Concatenate to main df
    df_weights = pd.concat([df_weights, df_w], axis=1)

    # ### TE ###

    # # Compute TE weight scores (same as bw, so can be dropped)
    # df_score = yearly_score(
    #     df_factors,
    #     factor_comb,
    #     method_te,
    # )
    # df_score = append_avg_score(df_score, factor_comb, method_te)

    # # Compute integrated TE weights
    # df_w = te_portfolio_weights(
    #     df_score,
    #     [int_factor_name],
    #     method_te,
    #     tracking_error,
    #     df_returns,
    # )
    # df_w.columns = ["date", "_".join(factor_comb) + "_int"]

    # # Compute temporary factor TE weights for mixed calculation
    # temp = te_portfolio_weights(
    #     df_score,
    #     factor_comb,
    #     method_te,
    #     tracking_error_dict[factor_num],
    #     df_returns,
    # )

    # # Compute mixed TE weights
    # df_w["_".join(factor_comb) + "_mix"] = factor_adjusted_weights(
    #     temp, factor_comb, factor_weights, method_te
    # )

    # # Rename columns with suffix and drop date
    # df_w = df_w.drop(columns=["date"]).add_suffix("_te")

    # # Concatenate to main df
    # df_weights = pd.concat([df_weights, df_w], axis=1)

    df_weights.to_csv(strategies_dir / f"{'_'.join(factor_comb)}.csv", index=False)

In [43]:
# Verify that weights sum to 1 per year
weight_cols = [c for c in df_weights.columns if "weight_" in c]
yearly_sums = df_weights.groupby("date")[weight_cols].sum()

# Check if all sums are approximately 1
if np.allclose(yearly_sums, 1.0):
    print("Success: All portfolio weights sum to 1.0 for each year.")
else:
    print("Failure: Some portfolio weights do not sum to 1.0.")
    # Show first few failures
    failures = yearly_sums[~np.isclose(yearly_sums, 1.0).all(axis=1)]
    print(failures.head())

Success: All portfolio weights sum to 1.0 for each year.
