In [None]:
# Imports & Config
import numpy as np
import pandas as pd
import os
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score
import xgboost as xgb
import lightgbm as lgb
from catboost import CatBoostClassifier
import warnings
warnings.filterwarnings('ignore')

SEED = 42
N_FOLDS = 5
np.random.seed(SEED)

In [None]:
# Load Data
INPUT = '/kaggle/input/playground-series-s5e3' if os.path.exists('/kaggle/input/playground-series-s5e3') else './data'
train = pd.read_csv(f'{INPUT}/train.csv')
test = pd.read_csv(f'{INPUT}/test.csv')
print(f"Train: {train.shape}, Test: {test.shape}")
print(f"Target: {train['rainfall'].mean():.1%} positive")

In [None]:
# Feature Engineering (~20 features)
def add_features(df):
    # Temperature features
    df['temp_range'] = df['maxtemp'] - df['mintemp']
    df['temp_dew_diff'] = df['temparature'] - df['dewpoint']
    
    # Wind components
    df['wind_x'] = df['windspeed'] * np.cos(np.radians(df['winddirection']))
    df['wind_y'] = df['windspeed'] * np.sin(np.radians(df['winddirection']))
    
    # Cloud/Sunshine
    df['sunshine_log'] = np.log1p(df['sunshine'])
    df['sun_frac'] = df['sunshine'] / (df['cloud'] + df['sunshine'] + 1)
    
    # Polynomials
    df['humidity_sq'] = df['humidity'] ** 2
    df['cloud_sq'] = df['cloud'] ** 2
    df['temp_sq'] = df['temparature'] ** 2
    
    # Interactions
    df['temp_x_humidity'] = df['temparature'] * df['humidity']
    df['pressure_x_humidity'] = df['pressure'] * df['humidity']
    df['cloud_x_humidity'] = df['cloud'] * df['humidity']
    
    # Ratios
    df['humidity_pressure_ratio'] = df['humidity'] / df['pressure']
    
    return df

train = add_features(train)
test = add_features(test)

# Define features
FEATURES = [c for c in train.columns if c not in ['id', 'day', 'rainfall']]
print(f"Features ({len(FEATURES)}): {FEATURES}")

X = train[FEATURES].values
y = train['rainfall'].values
X_test = test[FEATURES].values

In [None]:
# Cross-validation function
def train_model(name, model_fn, X, y, X_test):
    skf = StratifiedKFold(n_splits=N_FOLDS, shuffle=True, random_state=SEED)
    oof = np.zeros(len(X))
    preds = np.zeros(len(X_test))
    
    for fold, (tr_idx, va_idx) in enumerate(skf.split(X, y)):
        X_tr, X_va = X[tr_idx], X[va_idx]
        y_tr, y_va = y[tr_idx], y[va_idx]
        
        model = model_fn()
        
        if 'XGB' in name:
            model.fit(X_tr, y_tr, eval_set=[(X_va, y_va)], verbose=False)
        elif 'LGB' in name:
            model.fit(X_tr, y_tr, eval_set=[(X_va, y_va)],
                     callbacks=[lgb.early_stopping(50, verbose=False), lgb.log_evaluation(0)])
        elif 'Cat' in name:
            model.fit(X_tr, y_tr, eval_set=(X_va, y_va), verbose=False)
        else:
            model.fit(X_tr, y_tr)
        
        oof[va_idx] = model.predict_proba(X_va)[:, 1]
        preds += model.predict_proba(X_test)[:, 1] / N_FOLDS
        print(f"  Fold {fold+1}: {roc_auc_score(y_va, oof[va_idx]):.5f}")
    
    print(f"  OOF AUC: {roc_auc_score(y, oof):.5f}\n")
    return oof, preds

In [None]:
# Train XGBoost
print("XGBoost:")
xgb_oof, xgb_preds = train_model('XGB', lambda: xgb.XGBClassifier(
    n_estimators=1000, learning_rate=0.01, max_depth=6,
    subsample=0.8, colsample_bytree=0.8,
    early_stopping_rounds=50, random_state=SEED, verbosity=0
), X, y, X_test)

In [None]:
# Train LightGBM
print("LightGBM:")
lgb_oof, lgb_preds = train_model('LGB', lambda: lgb.LGBMClassifier(
    n_estimators=1000, learning_rate=0.01, max_depth=8, num_leaves=31,
    subsample=0.8, colsample_bytree=0.8,
    random_state=SEED, verbose=-1
), X, y, X_test)

In [None]:
# Train CatBoost
print("CatBoost:")
cat_oof, cat_preds = train_model('Cat', lambda: CatBoostClassifier(
    iterations=1000, learning_rate=0.03, depth=6,
    early_stopping_rounds=50, random_seed=SEED, verbose=False
), X, y, X_test)

In [None]:
# Ensemble & Submit
# Simple weighted average
oof_blend = 0.4 * xgb_oof + 0.3 * lgb_oof + 0.3 * cat_oof
test_blend = 0.4 * xgb_preds + 0.3 * lgb_preds + 0.3 * cat_preds

print(f"Ensemble OOF AUC: {roc_auc_score(y, oof_blend):.5f}")

# Create submission
submission = pd.DataFrame({'id': test['id'], 'rainfall': test_blend})
submission.to_csv('submission.csv', index=False)
print(f"\nSubmission saved: {submission.shape}")
submission.head()