In [1]:
# ---------------------------
# Import standard libraries
# ---------------------------
import pandas as pd

In [2]:
# ====================================================
# 🎵 Kaggle PS S5E9 - Ensemble Blender
# ====================================================
# This script blends multiple submissions into one final
# ensemble submission. You can assign ANY weight values,
# and they will be automatically normalized into valid 
# percentages (so no need to manually sum to 1).
#
# 📂 Sources:
# - Best submission v69
# - S5E9 Songs RMSE (Cage)
# - Playground S5E9 LBRACE v1
# - From Data to Dancefloor EDA BPM
#
# ⚙️ How it works:
# 1. Read all source submissions
# 2. Merge on "id"
# 3. Normalize weights
# 4. Compute weighted average BPM
# 5. Save final `submission.csv`
#
# 🏆 Output:
# - A blended ensemble submission ready for Kaggle upload
# ====================================================

In [3]:
# ---------------------------
# Define sources and weights
# ---------------------------
# Format: { "file_path": weight }
sources = {
    "/kaggle/input/predicting-the-beats-per-minute-of-songs-vault/submission (2).csv": 0.1,
    "/kaggle/input/predicting-the-beats-per-minute-of-songs-vault/submission (1).csv": 0.7,
    "/kaggle/input/predicting-the-beats-per-minute-of-songs-vault/submission.csv": 2.9,
}

# ---------------------------
# Read and merge submissions
# ---------------------------
dfs = []
for i, (src, w) in enumerate(sources.items(), start=1):
    df = pd.read_csv(src).rename(columns={"BeatsPerMinute": f"BeatsPerMinute_{i}"})
    dfs.append(df)

# Merge on 'id'
from functools import reduce
df = reduce(lambda left, right: left.merge(right, on="id"), dfs)

In [4]:
# ==================================================
# ⚖️ Blend Predictions (Normalized Weights)
# --------------------------------------------------
# • Set any weights (no need to sum to 1)
# • Auto-normalized into valid percentages
# • Compute weighted BPM across 4 sources
# ==================================================

In [5]:
# ---------------------------
# Normalize weights
# ---------------------------
weights = list(sources.values())
total = sum(weights)
norm_weights = [w / total for w in weights]

# ---------------------------
# Compute blended prediction
# ---------------------------
df["BeatsPerMinute"] = sum(
    norm_weights[i] * df[f"BeatsPerMinute_{i+1}"] for i in range(len(norm_weights))
)

# ---------------------------
# Keep only required columns
# ---------------------------
final = df[["id", "BeatsPerMinute"]]

In [6]:
# ==================================================
# 💾 Save Final Submission
# --------------------------------------------------
# • Export blended results to /kaggle/working
# • Prints normalized weights used
# • Ready for direct Kaggle upload 🚀
# ==================================================

In [7]:
# ---------------------------
# Save submission
# ---------------------------
final.to_csv("/kaggle/working/submission.csv", index=False)
print("✅ Ensemble submission saved as submission.csv")
print("Final normalized weights used:", 
      {f"src{i+1}": f"{w:.2%}" for i, w in enumerate(norm_weights)})

✅ Ensemble submission saved as submission.csv
Final normalized weights used: {'src1': '2.70%', 'src2': '18.92%', 'src3': '78.38%'}
