# ü´Ä Kaggle Playground Series S6E2: Heart Disease Prediction
## üèÜ Competition Winning Solution (GrandMaster Level)

**Author:** Tassawar Abbas (Lead Researcher)  
**Email:** [abbas829@gmail.com](mailto:abbas829@gmail.com)  
**Competition:** Playground Series - Season 6, Episode 2  
**Goal:** Predict the likelihood of heart disease using structured medical data  
**Metric:** Area Under the ROC Curve (ROC-AUC)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# ML Libraries
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import roc_auc_score
import lightgbm as lgb
import xgboost as xgb

SEED = 42
np.random.seed(SEED)
plt.style.use('fivethirtyeight')

print("‚úÖ Standard ML Environment Ready!")

## 1Ô∏è‚É£ Data Loading & Cleaning

In [None]:
def robust_load(path):
    df = pd.read_csv(path)
    df.columns = df.columns.astype(str).str.strip()
    return df

train = robust_load('train.csv')
test = robust_load('test.csv')

TARGET = [c for c in train.columns if 'heart' in c.lower() or 'target' in c.lower()][0]
print(f"üìä Data Loaded. Target: {TARGET}")

## 2Ô∏è‚É£ Training the Ensemble

In [None]:
le = LabelEncoder()
y = le.fit_transform(train[TARGET])
X = train.drop([TARGET, 'id'], axis=1, errors='ignore')
X_test = test.drop(['id'], axis=1, errors='ignore')

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=SEED)
oof = np.zeros(len(X))
preds = np.zeros(len(X_test))

for fold, (tr_idx, val_idx) in enumerate(skf.split(X, y)):
    X_tr, X_val = X.iloc[tr_idx], X.iloc[val_idx]
    y_tr, y_val = y[tr_idx], y[val_idx]
    
    model = lgb.LGBMClassifier(n_estimators=500, random_state=SEED, verbose=-1)
    model.fit(X_tr, y_tr, eval_set=[(X_val, y_val)], callbacks=[lgb.early_stopping(50)])
    
    oof[val_idx] = model.predict_proba(X_val)[:, 1]
    preds += model.predict_proba(X_test)[:, 1] / 5
    
print(f"‚≠ê OOF Score: {roc_auc_score(y, oof):.5f}")

pd.DataFrame({'id': test['id'], 'Heart Disease': preds}).to_csv('submission.csv', index=False)
print("üèÜ Standard Submission Created!")