In [1]:
import pandas as pd
import numpy as np
from functools import reduce

ID_COL = "id"
PRED_COL = "diagnosed_diabetes"

def load_preds_table(paths, id_col=ID_COL, pred_col=PRED_COL, prefix="S"):
    dfs = []
    for i, p in enumerate(paths):
        df = pd.read_csv(p)[[id_col, pred_col]].rename(columns={pred_col: f"{prefix}{i}"})
        dfs.append(df)
    m = reduce(lambda l, r: l.merge(r, on=id_col, how="inner"), dfs)
    return m

def power_mean_blend(paths, weights=None, p=8, clip_eps=1e-12):
    m = load_preds_table(paths, prefix="S")
    cols = [c for c in m.columns if c != ID_COL]
    P = m[cols].to_numpy(float)

    P = np.clip(P, clip_eps, 1.0 - clip_eps)

    k = P.shape[1]
    if weights is None:
        w = np.ones(k, dtype=float) / k
    else:
        w = np.array(weights, dtype=float)
        w = w / (w.sum() + 1e-12)

    # generalized mean (power mean)
    blend = (P ** p) @ w
    blend = np.clip(blend, clip_eps, None)
    blend = np.power(blend, 1.0 / p)

    out = pd.DataFrame({ID_COL: m[ID_COL].values, PRED_COL: blend})
    return out

paths = [
    "/kaggle/input/ps-s5e12/submission (70).csv",  
    "/kaggle/input/ps-s5e12/submission (71).csv",
    "/kaggle/input/ps-s5e12/submission (67).csv",
    "/kaggle/input/ps-s5e12/submission (66).csv",
    "/kaggle/input/ps-s5e12/submission (65).csv",
    "/kaggle/input/ps-s5e12/submission (61).csv"
]

weights = [2.99,0.2,0.1,0.1,0.1,-0.1] #extrapolative blending

final = power_mean_blend(paths, weights=weights, p=16)
final.to_csv("submission.csv", index=False)
print("✅ saved submission.csv | rows:", len(final))

✅ saved submission.csv | rows: 300000


In [2]:
final.head()

Unnamed: 0,id,diagnosed_diabetes
0,700000,0.448294
1,700001,0.610741
2,700002,0.73623
3,700003,0.394683
4,700004,0.854668
