In [38]:
import pandas as pd
from sklearn.model_selection import train_test_split


In [39]:
df = pd.read_csv("/content/preprocessed_customers.csv")

In [None]:
# Split features and target
X = df.drop(columns=['churn'])
y = df['churn']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [40]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

def evaluate_all_models(X_train, X_test, y_train, y_test):
    """Trains multiple models and prints metrics for easy comparison."""

    models = {
        "Logistic Regression": LogisticRegression(max_iter=1000, class_weight='balanced', random_state=42),
        "Decision Tree": DecisionTreeClassifier(random_state=42),
        "Random Forest": RandomForestClassifier(random_state=42),
        "Gradient Boosting": GradientBoostingClassifier(random_state=42),
        "XGBoost": XGBClassifier(eval_metric='logloss', random_state=42),
        "CatBoost": CatBoostClassifier(verbose=0, random_state=42)
    }

    for name, model in models.items():
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        y_proba = model.predict_proba(X_test)[:, 1]

        print(f"Model: {name}")
        print("Accuracy :", accuracy_score(y_test, y_pred))
        print("Precision:", precision_score(y_test, y_pred))
        print("Recall   :", recall_score(y_test, y_pred))
        print("F1 Score :", f1_score(y_test, y_pred))
        print("ROC-AUC  :", roc_auc_score(y_test, y_proba))
        print("-" * 40)



In [41]:
# Example usage:
evaluate_all_models(X_train, X_test, y_train, y_test)

Model: Logistic Regression
Accuracy : 0.7444996451383961
Precision: 0.5127737226277372
Recall   : 0.7513368983957219
F1 Score : 0.6095444685466378
ROC-AUC  : 0.8367033506419697
----------------------------------------
Model: Decision Tree
Accuracy : 0.7139815471965933
Precision: 0.46194225721784776
Recall   : 0.47058823529411764
F1 Score : 0.46622516556291393
ROC-AUC  : 0.6360045467462347
----------------------------------------
Model: Random Forest
Accuracy : 0.7856635911994322
Precision: 0.6224489795918368
Recall   : 0.4893048128342246
F1 Score : 0.5479041916167665
ROC-AUC  : 0.8189568317445556
----------------------------------------
Model: Gradient Boosting
Accuracy : 0.7977288857345636
Precision: 0.6606498194945848
Recall   : 0.4893048128342246
F1 Score : 0.5622119815668203
ROC-AUC  : 0.8423751582319358
----------------------------------------
Model: XGBoost
Accuracy : 0.7849538679914834
Precision: 0.6141479099678456
Recall   : 0.5106951871657754
F1 Score : 0.5576642335766423
ROC-

**Best Mode: Log Regression**

In [42]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# 1. Define hyperparameter grid
param_grid = {
    'C': [0.01, 0.1, 1, 10],
    'penalty': ['l1', 'l2'],
    'solver': ['liblinear', 'saga']
}

# 2. Initialize GridSearchCV with recall as scoring
grid = GridSearchCV(
    LogisticRegression(class_weight='balanced', max_iter=3000, random_state=42),
    param_grid=param_grid,
    scoring='recall',
    cv=5,
    n_jobs=-1
)

# 3. Fit on training data
grid.fit(X_train, y_train)

# 4. Get the best model
best_model = grid.best_estimator_

# 5. Predict on test set using threshold 0.5
y_proba = best_model.predict_proba(X_test)[:, 1]
y_pred = (y_proba >= 0.5).astype(int)

# 6. Evaluate metrics
print("Best hyperparameters:", grid.best_params_)
print("Accuracy :", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall   :", recall_score(y_test, y_pred))
print("F1 Score :", f1_score(y_test, y_pred))
print("ROC-AUC  :", roc_auc_score(y_test, y_proba))


Best hyperparameters: {'C': 1, 'penalty': 'l1', 'solver': 'liblinear'}
Accuracy : 0.7444996451383961
Precision: 0.5127272727272727
Recall   : 0.7540106951871658
F1 Score : 0.6103896103896104
ROC-AUC  : 0.83668268361363
