In [2]:
import pandas as pd
from pathlib import Path

REPORTS_DIR = Path.cwd().resolve().parents[0] / "reports"

lgbm = pd.read_csv(REPORTS_DIR / "sub_lgbm.csv")
cat  = pd.read_csv(REPORTS_DIR / "sub_catboost.csv")

id_col = lgbm.columns[0]
target_col = lgbm.columns[1]

assert (lgbm[id_col].values == cat[id_col].values).all()

# Rank versions (AUC-focused)
lgb_r = lgbm[target_col].rank(method="average") / len(lgbm)
cat_r = cat[target_col].rank(method="average") / len(cat)

# Prob versions (direct averaging)
lgb_p = lgbm[target_col].astype(float).values
cat_p = cat[target_col].astype(float).values

weights = [0.75, 0.85, 0.90, 0.95]  # CatBoost-heavy

for w_cat in weights:
    # Rank blend
    out_rank = lgbm.copy()
    out_rank[target_col] = (1 - w_cat) * lgb_r + w_cat * cat_r
    p_rank = REPORTS_DIR / f"sub_rankblend_lgbm_cat_{int(w_cat*100)}.csv"
    out_rank.to_csv(p_rank, index=False)

    # Prob blend
    out_prob = lgbm.copy()
    out_prob[target_col] = (1 - w_cat) * lgb_p + w_cat * cat_p
    p_prob = REPORTS_DIR / f"sub_probblend_lgbm_cat_{int(w_cat*100)}.csv"
    out_prob.to_csv(p_prob, index=False)

    print(f"Saved w_cat={w_cat:.2f}:")
    print("  ", p_rank)
    print("  ", p_prob)


Saved w_cat=0.75:
   C:\Dev\kaggle-ps-s6e2-heart\reports\sub_rankblend_lgbm_cat_75.csv
   C:\Dev\kaggle-ps-s6e2-heart\reports\sub_probblend_lgbm_cat_75.csv
Saved w_cat=0.85:
   C:\Dev\kaggle-ps-s6e2-heart\reports\sub_rankblend_lgbm_cat_85.csv
   C:\Dev\kaggle-ps-s6e2-heart\reports\sub_probblend_lgbm_cat_85.csv
Saved w_cat=0.90:
   C:\Dev\kaggle-ps-s6e2-heart\reports\sub_rankblend_lgbm_cat_90.csv
   C:\Dev\kaggle-ps-s6e2-heart\reports\sub_probblend_lgbm_cat_90.csv
Saved w_cat=0.95:
   C:\Dev\kaggle-ps-s6e2-heart\reports\sub_rankblend_lgbm_cat_95.csv
   C:\Dev\kaggle-ps-s6e2-heart\reports\sub_probblend_lgbm_cat_95.csv
