In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report

# Models
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier

# =====================
# 1. Load Data
# =====================
df = pd.read_csv("Telco-Customer-Churn.csv")

# Drop ID column
df = df.drop("customerID", axis=1)

# Convert TotalCharges to numeric (handle missing)
df["TotalCharges"] = pd.to_numeric(df["TotalCharges"], errors="coerce")
df["TotalCharges"] = df["TotalCharges"].fillna(df["TotalCharges"].median())

# Encode categorical variables
le = LabelEncoder()
for col in df.select_dtypes(include="object").columns:
    df[col] = le.fit_transform(df[col])

# Split features & target
X = df.drop("Churn", axis=1)
y = df["Churn"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# =====================
# 2. Models
# =====================
models = {
    "Decision Tree": DecisionTreeClassifier(max_depth=6, random_state=42),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "AdaBoost": AdaBoostClassifier(n_estimators=100, random_state=42),
    "XGBoost": XGBClassifier(n_estimators=100, use_label_encoder=False, eval_metric="logloss", random_state=42),
    "CatBoost": CatBoostClassifier(iterations=100, verbose=0, random_state=42)
}

# =====================
# 3. Train & Evaluate
# =====================
results = {}

for name, model in models.items():
    print(f"\n================= {name} =================")

    # Train
    model.fit(X_train, y_train)

    # Predict
    y_pred = model.predict(X_test)

    # Accuracy
    acc = accuracy_score(y_test, y_pred)
    results[name] = acc

    # Report
    print(f"✅ Accuracy: {acc:.4f}")
    print(classification_report(y_test, y_pred))

# =====================
# 4. Compare
# =====================
print("\n🔎 Model Performance Summary:")
for model, acc in results.items():
    print(f"{model:15s} -> Accuracy: {acc:.4f}")


✅ Accuracy: 0.7842
              precision    recall  f1-score   support

           0       0.84      0.87      0.86      1035
           1       0.60      0.55      0.57       374

    accuracy                           0.78      1409
   macro avg       0.72      0.71      0.71      1409
weighted avg       0.78      0.78      0.78      1409


✅ Accuracy: 0.7921
              precision    recall  f1-score   support

           0       0.83      0.90      0.86      1035
           1       0.64      0.50      0.56       374

    accuracy                           0.79      1409
   macro avg       0.74      0.70      0.71      1409
weighted avg       0.78      0.79      0.78      1409


✅ Accuracy: 0.7977
              precision    recall  f1-score   support

           0       0.84      0.89      0.87      1035
           1       0.64      0.55      0.59       374

    accuracy                           0.80      1409
   macro avg       0.74      0.72      0.73      1409
weighted avg  

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


              precision    recall  f1-score   support

           0       0.83      0.87      0.85      1035
           1       0.60      0.52      0.55       374

    accuracy                           0.78      1409
   macro avg       0.71      0.69      0.70      1409
weighted avg       0.77      0.78      0.77      1409


✅ Accuracy: 0.8041
              precision    recall  f1-score   support

           0       0.84      0.91      0.87      1035
           1       0.67      0.51      0.58       374

    accuracy                           0.80      1409
   macro avg       0.75      0.71      0.73      1409
weighted avg       0.79      0.80      0.80      1409


🔎 Model Performance Summary:
Decision Tree   -> Accuracy: 0.7842
Random Forest   -> Accuracy: 0.7921
AdaBoost        -> Accuracy: 0.7977
XGBoost         -> Accuracy: 0.7786
CatBoost        -> Accuracy: 0.8041


In [4]:
import pickle
from sklearn.metrics import accuracy_score

# Assume you already have X_train, X_test, y_train, y_test and models dict as before

best_model_name = None
best_model = None
best_acc = 0

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)

    if acc > best_acc:
        best_acc = acc
        best_model = model
        best_model_name = name

print(f"🏆 Best Model: {best_model_name} with Accuracy: {best_acc:.4f}")

# Save best model as pickle file
with open("best_model.pkl", "wb") as f:
    pickle.dump(best_model, f)

print("✅ Best model saved as best_model.pkl")

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


🏆 Best Model: CatBoost with Accuracy: 0.8041
✅ Best model saved as best_model.pkl
