In [8]:
!pip install catboost


Collecting catboost
  Downloading catboost-1.2.8-cp312-cp312-manylinux2014_x86_64.whl.metadata (1.2 kB)
Downloading catboost-1.2.8-cp312-cp312-manylinux2014_x86_64.whl (99.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m99.2/99.2 MB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: catboost
Successfully installed catboost-1.2.8


In [9]:
import pandas as pd

df = pd.read_csv("/content/churn.csv")
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
df = df.dropna(subset=['TotalCharges']).reset_index(drop=True)

if 'customerID' in df.columns:
    df = df.drop(columns=['customerID'])

df['Churn'] = df['Churn'].map({'Yes':1, 'No':0})

df.head()


Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,Female,0,Yes,No,1,No,No phone service,DSL,No,Yes,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,0
1,Male,0,No,No,34,Yes,No,DSL,Yes,No,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,0
2,Male,0,No,No,2,Yes,No,DSL,Yes,Yes,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,1
3,Male,0,No,No,45,No,No phone service,DSL,Yes,No,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,0
4,Female,0,No,No,2,Yes,No,Fiber optic,No,No,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,1


In [10]:
cat_cols = df.select_dtypes(include=["object"]).columns.tolist()
num_cols = df.select_dtypes(include=['int64','float64']).columns.tolist()

X = df.drop("Churn", axis=1)
y = df["Churn"]

cat_cols, num_cols


(['gender',
  'Partner',
  'Dependents',
  'PhoneService',
  'MultipleLines',
  'InternetService',
  'OnlineSecurity',
  'OnlineBackup',
  'DeviceProtection',
  'TechSupport',
  'StreamingTV',
  'StreamingMovies',
  'Contract',
  'PaperlessBilling',
  'PaymentMethod'],
 ['SeniorCitizen', 'tenure', 'MonthlyCharges', 'TotalCharges', 'Churn'])

In [11]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


In [12]:
from catboost import CatBoostClassifier

model = CatBoostClassifier(
    loss_function="Logloss",
    eval_metric="AUC",
    depth=8,
    learning_rate=0.05,
    iterations=1000,
    l2_leaf_reg=5,
    random_seed=42,
    verbose=100,
    class_weights=[1, 3]   # handle imbalance strongly
)

model.fit(
    X_train, y_train,
    cat_features=cat_cols,
    eval_set=(X_test, y_test),
    use_best_model=True
)


0:	test: 0.8120098	best: 0.8120098 (0)	total: 151ms	remaining: 2m 31s
100:	test: 0.8354191	best: 0.8354813 (95)	total: 5.51s	remaining: 49s
200:	test: 0.8355977	best: 0.8359096 (107)	total: 8.61s	remaining: 34.2s
300:	test: 0.8353143	best: 0.8364079 (249)	total: 19.2s	remaining: 44.7s
400:	test: 0.8317281	best: 0.8364079 (249)	total: 38.3s	remaining: 57.2s
500:	test: 0.8279542	best: 0.8364079 (249)	total: 48.6s	remaining: 48.5s
600:	test: 0.8255134	best: 0.8364079 (249)	total: 52.2s	remaining: 34.6s
700:	test: 0.8252132	best: 0.8364079 (249)	total: 55.6s	remaining: 23.7s
800:	test: 0.8225730	best: 0.8364079 (249)	total: 1m	remaining: 15s
900:	test: 0.8205463	best: 0.8364079 (249)	total: 1m 4s	remaining: 7.07s
999:	test: 0.8193194	best: 0.8364079 (249)	total: 1m 7s	remaining: 0us

bestTest = 0.8364078977
bestIteration = 249

Shrink model to first 250 iterations.


<catboost.core.CatBoostClassifier at 0x7afd81f1fb00>

In [13]:
from sklearn.metrics import accuracy_score, roc_auc_score, classification_report, confusion_matrix

y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:,1]

print("Accuracy:", accuracy_score(y_test, y_pred))
print("AUC:", roc_auc_score(y_test, y_proba))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))


Accuracy: 0.736318407960199
AUC: 0.8364078976657987

Classification Report:
               precision    recall  f1-score   support

           0       0.91      0.71      0.80      1033
           1       0.50      0.80      0.62       374

    accuracy                           0.74      1407
   macro avg       0.71      0.76      0.71      1407
weighted avg       0.80      0.74      0.75      1407


Confusion Matrix:
 [[737 296]
 [ 75 299]]


In [14]:
import joblib
joblib.dump(model, "/content/catboost_churn.pkl")
print("Saved model at /content/catboost_churn.pkl")


Saved model at /content/catboost_churn.pkl
