In [2]:
!pip install xgboost catboost optuna lightgbm





[notice] A new release of pip is available: 23.0.1 -> 26.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
import pandas as pd
import numpy as np

df = pd.read_csv("C:\Personal\Educational\Projects\ICSSR-Tech-1st-Round-Submission\Customer_Churn.xlsx")

# Remove customerID
df.drop("customerID", axis=1, inplace=True)

# Fix TotalCharges
df["TotalCharges"] = pd.to_numeric(df["TotalCharges"], errors="coerce")
df["TotalCharges"].fillna(0, inplace=True)

# Convert target
df["Churn"] = df["Churn"].map({"Yes":1, "No":0})


In [None]:
# Avg monthly charge
df["avg_monthly"] = df["TotalCharges"] / (df["tenure"] + 1)

# Tenure buckets
df["tenure_group"] = pd.cut(df["tenure"],
                            bins=[0,12,24,48,72],
                            labels=[0,1,2,3])

# Count services
services = [
    "PhoneService","MultipleLines","InternetService",
    "OnlineSecurity","OnlineBackup",
    "DeviceProtection","TechSupport",
    "StreamingTV","StreamingMovies"
]

df["service_count"] = df[services].apply(
    lambda row: sum(row == "Yes"), axis=1
)


In [None]:
from sklearn.model_selection import train_test_split

X = df.drop("Churn", axis=1)
y = df["Churn"]

categorical_cols = X.select_dtypes(include="object").columns

X = pd.get_dummies(X, drop_first=True)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)


In [None]:
scale_pos_weight = (y_train == 0).sum() / (y_train == 1).sum()
print("Scale Pos Weight:", scale_pos_weight)


In [None]:
import optuna
from xgboost import XGBClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score

def objective(trial):
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 300, 1000),
        "max_depth": trial.suggest_int("max_depth", 3, 8),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.2),
        "subsample": trial.suggest_float("subsample", 0.6, 1.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.6, 1.0),
        "gamma": trial.suggest_float("gamma", 0, 5),
        "min_child_weight": trial.suggest_int("min_child_weight", 1, 10),
        "scale_pos_weight": scale_pos_weight,
        "tree_method": "gpu_hist",
        "predictor": "gpu_predictor",
        "eval_metric": "auc",
        "random_state": 42
    }

    model = XGBClassifier(**params)

    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    aucs = []

    for train_idx, val_idx in skf.split(X_train, y_train):
        X_t, X_v = X_train.iloc[train_idx], X_train.iloc[val_idx]
        y_t, y_v = y_train.iloc[train_idx], y_train.iloc[val_idx]

        model.fit(X_t, y_t)
        preds = model.predict_proba(X_v)[:,1]
        aucs.append(roc_auc_score(y_v, preds))

    return np.mean(aucs)

study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=40)

print("Best Params:", study.best_params)


In [None]:
best_params = study.best_params

best_params.update({
    "scale_pos_weight": scale_pos_weight,
    "tree_method": "gpu_hist",
    "predictor": "gpu_predictor",
    "eval_metric": "auc",
    "random_state": 42
})

final_xgb = XGBClassifier(**best_params)
final_xgb.fit(X_train, y_train)

y_prob = final_xgb.predict_proba(X_test)[:,1]

from sklearn.metrics import roc_auc_score
print("Final Test AUC:", roc_auc_score(y_test, y_prob))


In [None]:
from sklearn.metrics import f1_score

thresholds = np.arange(0.2, 0.7, 0.01)
best_f1 = 0
best_threshold = 0.5

for t in thresholds:
    preds = (y_prob > t).astype(int)
    f1 = f1_score(y_test, preds)
    if f1 > best_f1:
        best_f1 = f1
        best_threshold = t

print("Best Threshold:", best_threshold)


In [None]:
y_pred_final = (y_prob > best_threshold).astype(int)

from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred_final))

print("Final AUC:", roc_auc_score(y_test, y_prob))


In [None]:
from catboost import CatBoostClassifier

cat_model = CatBoostClassifier(
    iterations=800,
    depth=6,
    learning_rate=0.05,
    eval_metric='AUC',
    verbose=0
)

cat_model.fit(X_train, y_train)

cat_prob = cat_model.predict_proba(X_test)[:,1]

# Blend
final_prob = 0.6*y_prob + 0.4*cat_prob

print("Blended AUC:", roc_auc_score(y_test, final_prob))


In [1]:
!pip install xgboost catboost optuna lightgbm


Collecting xgboost
  Downloading xgboost-3.1.3-py3-none-win_amd64.whl (72.0 MB)
     ---------------------------------------- 0.0/72.0 MB ? eta -:--:--
     ---------------------------------------- 0.0/72.0 MB 1.3 MB/s eta 0:00:57
     --------------------------------------- 0.0/72.0 MB 393.8 kB/s eta 0:03:03
     --------------------------------------- 0.1/72.0 MB 409.6 kB/s eta 0:02:56
     --------------------------------------- 0.1/72.0 MB 409.6 kB/s eta 0:02:56
     --------------------------------------- 0.1/72.0 MB 504.4 kB/s eta 0:02:23
     --------------------------------------- 0.1/72.0 MB 500.5 kB/s eta 0:02:24
     --------------------------------------- 0.1/72.0 MB 500.5 kB/s eta 0:02:24
     --------------------------------------- 0.2/72.0 MB 476.3 kB/s eta 0:02:31
     --------------------------------------- 0.2/72.0 MB 479.2 kB/s eta 0:02:30
     --------------------------------------- 0.2/72.0 MB 479.2 kB/s eta 0:02:30
     --------------------------------------- 0.3/


[notice] A new release of pip is available: 23.0.1 -> 26.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


     ---------------- ---------------------- 31.2/72.0 MB 78.2 kB/s eta 0:08:43
     ---------------- ---------------------- 31.2/72.0 MB 78.2 kB/s eta 0:08:43
     ---------------- ---------------------- 31.2/72.0 MB 78.2 kB/s eta 0:08:43
     ---------------- ---------------------- 31.2/72.0 MB 78.2 kB/s eta 0:08:43
     ---------------- ---------------------- 31.2/72.0 MB 78.7 kB/s eta 0:08:39
     ---------------- ---------------------- 31.2/72.0 MB 78.7 kB/s eta 0:08:39
     ---------------- ---------------------- 31.2/72.0 MB 78.7 kB/s eta 0:08:39
     ---------------- ---------------------- 31.2/72.0 MB 78.7 kB/s eta 0:08:39
     ---------------- ---------------------- 31.2/72.0 MB 78.7 kB/s eta 0:08:39
     ---------------- ---------------------- 31.2/72.0 MB 79.0 kB/s eta 0:08:37
     ---------------- ---------------------- 31.2/72.0 MB 79.0 kB/s eta 0:08:37
     ---------------- ---------------------- 31.2/72.0 MB 79.0 kB/s eta 0:08:37
     ---------------- ------------------