In [6]:
from pathlib import Path

import polars as pl

REPORT_DIR = Path("../reports/scores")
PORTFOLIO_DIR = Path("../reports/portfolios")

In [5]:
for portfolio in PORTFOLIO_DIR.glob("portfolio_*.xlsx"):
    df = pl.read_excel(portfolio, sheet_name="Full Portfolio")
    top = df.head(5)

    if "fwd_return_4Q" not in df.columns:
        continue

    top_freturn = top.select(pl.col("fwd_return_4Q")).mean().item()
    top_hits = top.select(pl.col("fwd_return_4Q") > 0).sum().item()

    top_hitrate = (top_hits / len(top)) * 100

    print(f"\nPortfolio: {portfolio}")
    print(f"Average return: {top_freturn:.2f}% ({top_hitrate:.1f})")

In [18]:
from itertools import combinations


def filter_portfolio(df: pl.DataFrame) -> pl.DataFrame:
    score_threshold = df["avg_score"].mean() + df["avg_score"].std()
    quality_filters = (
        (pl.col("pe") > 0)
        & (pl.col("saleq_yoy") > -20)
        & (pl.col("fcf_yoy") > -50)
        & (pl.col("price_mom") > -20)
        & (pl.col("avg_score") > score_threshold)
    )
    return df.filter(quality_filters)


# For each score file
for score_file in REPORT_DIR.glob("*.csv"):
    df = pl.read_csv(score_file)

    # Find all prediction columns
    pred_cols = [col for col in df.columns if col.startswith("pred_")]

    if not pred_cols:
        continue

    print(f"\nAnalyzing {score_file.name}")

    # Try different combinations of prediction columns
    best_return = float("-inf")
    best_hitrate = float("-inf")
    best_combo = None
    best_metrics = None

    # Try different sizes of combinations
    for n in range(1, len(pred_cols) + 1):
        for pred_combo in combinations(pred_cols, n):
            # Average the predictions from selected columns
            combo_scores = sum(df[col] for col in pred_combo) / len(pred_combo)

            try:
                # Get top stocks based on this combination
                top_k = 10
                sorted_indices = combo_scores.arg_sort()[::-1][:top_k]
                top_returns = df["fwd_return_4Q"][sorted_indices]

                if len(top_returns) < top_k:
                    continue

                avg_return = float(top_returns.mean())
                hit_rate = float((top_returns > 0).sum() / top_k * 100)
            except Exception as e:
                print(e)
                continue

            # Update best if this combination performs better
            if hit_rate > best_hitrate:
                best_return = avg_return
                best_hitrate = hit_rate
                best_combo = pred_combo
                best_metrics = {"return": avg_return, "hit_rate": hit_rate, "columns": pred_combo}

    if best_metrics:
        print(f"\nTrade Date: {score_file.name}")
        print(f"Best column combination: {[col.replace('pred_', '') for col in best_combo]}")
        print(f"Average return: {best_metrics['return']:.2f}%")
        print(f"Hit rate: {best_metrics['hit_rate']:.1f}%")


Analyzing scores_2022-09-01.csv

Trade Date: scores_2022-09-01.csv
Best column combination: ['fwd_return_4Q_hit']
Average return: 38.47%
Hit rate: 70.0%

Analyzing scores_2023-03-01.csv

Trade Date: scores_2023-03-01.csv
Best column combination: ['risk_return_3Q_hit', 'risk_return_4Q_hit']
Average return: 15.57%
Hit rate: 80.0%

Analyzing scores_2024-09-01.csv
float() argument must be a string or a real number, not 'NoneType'
float() argument must be a string or a real number, not 'NoneType'
float() argument must be a string or a real number, not 'NoneType'
float() argument must be a string or a real number, not 'NoneType'
float() argument must be a string or a real number, not 'NoneType'
float() argument must be a string or a real number, not 'NoneType'
float() argument must be a string or a real number, not 'NoneType'
float() argument must be a string or a real number, not 'NoneType'
float() argument must be a string or a real number, not 'NoneType'
float() argument must be a string