In [2]:
import pandas as pd

df = pd.read_csv("../data/customer_churn_preprocessed.csv")

# detect target column robustly
target_col = "Churn" if "Churn" in df.columns else "churn"

X = df.drop(columns=[target_col])
y = df[target_col]

print("Target column:", target_col)
print(X.shape, y.shape)
print(df.columns[:10])

Target column: Churn
(10348, 20) (10348,)
Index(['customerID', 'gender', 'SeniorCitizen', 'Partner', 'Dependents',
       'tenure', 'PhoneService', 'MultipleLines', 'InternetService',
       'OnlineSecurity'],
      dtype='object')


In [3]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)


In [4]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix

lr = LogisticRegression(max_iter=1000)
lr.fit(X_train, y_train)

y_pred_lr = lr.predict(X_test)

print("Logistic Regression")
print(classification_report(y_test, y_pred_lr))
print(confusion_matrix(y_test, y_pred_lr))


Logistic Regression
              precision    recall  f1-score   support

           0       0.86      0.77      0.81      1035
           1       0.79      0.88      0.83      1035

    accuracy                           0.82      2070
   macro avg       0.82      0.82      0.82      2070
weighted avg       0.82      0.82      0.82      2070

[[792 243]
 [128 907]]


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=1000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [5]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(
    n_estimators=200,
    random_state=42,
    class_weight="balanced"
)
rf.fit(X_train, y_train)

y_pred_rf = rf.predict(X_test)

print("Random Forest")
print(classification_report(y_test, y_pred_rf))
print(confusion_matrix(y_test, y_pred_rf))


Random Forest
              precision    recall  f1-score   support

           0       0.85      0.84      0.85      1035
           1       0.85      0.85      0.85      1035

    accuracy                           0.85      2070
   macro avg       0.85      0.85      0.85      2070
weighted avg       0.85      0.85      0.85      2070

[[874 161]
 [153 882]]


In [6]:
from sklearn.metrics import f1_score, precision_score, recall_score

models = {
    "Logistic Regression": y_pred_lr,
    "Random Forest": y_pred_rf
}

for name, preds in models.items():
    print(f"\n{name}")
    print("Precision:", precision_score(y_test, preds))
    print("Recall   :", recall_score(y_test, preds))
    print("F1-score :", f1_score(y_test, preds))



Logistic Regression
Precision: 0.788695652173913
Recall   : 0.8763285024154589
F1-score : 0.8302059496567505

Random Forest
Precision: 0.8456375838926175
Recall   : 0.8521739130434782
F1-score : 0.848893166506256


In [7]:
import pandas as pd

importances = pd.Series(
    rf.feature_importances_,
    index=X.columns
).sort_values(ascending=False)

importances.head(10)


Contract           0.137275
MonthlyCharges     0.112714
tenure             0.110175
TotalCharges       0.091461
customerID         0.084653
TechSupport        0.081349
OnlineSecurity     0.077160
OnlineBackup       0.043734
InternetService    0.037551
PaymentMethod      0.036011
dtype: float64

In [12]:
import os
import joblib

# absolute-safe path
base_dir = os.path.dirname(os.path.abspath(__file__)) if "__file__" in globals() else os.getcwd()

model_dir = os.path.join(base_dir, "models")
os.makedirs(model_dir, exist_ok=True)

model_path = os.path.join(model_dir, "customer_churn_rf.joblib")
joblib.dump(rf, model_path)

print("Model saved to:", model_path)


Model saved to: c:\Users\ASUS\ML_Engg_\ml_projects\customer_churn\models\customer_churn_rf.joblib
