## Import Libraries

In [42]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

## Load Data

In [14]:
df = pd.read_csv("customer_churn.csv")
df.head()

Unnamed: 0,CustomerID,Tenure,MonthlyCharges,TotalCharges,Contract,PaymentMethod,PaperlessBilling,SeniorCitizen,Churn
0,C00001,6,64,1540,One year,Credit Card,No,1,0
1,C00002,21,113,1753,Month-to-month,Electronic Check,Yes,1,0
2,C00003,27,31,1455,Two year,Credit Card,No,1,0
3,C00004,53,29,7150,Month-to-month,Electronic Check,No,1,0
4,C00005,16,185,1023,One year,Electronic Check,No,1,0


In [15]:
df.shape

(500, 9)

In [16]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 9 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   CustomerID        500 non-null    object
 1   Tenure            500 non-null    int64 
 2   MonthlyCharges    500 non-null    int64 
 3   TotalCharges      500 non-null    int64 
 4   Contract          500 non-null    object
 5   PaymentMethod     500 non-null    object
 6   PaperlessBilling  500 non-null    object
 7   SeniorCitizen     500 non-null    int64 
 8   Churn             500 non-null    int64 
dtypes: int64(5), object(4)
memory usage: 35.3+ KB


In [17]:
df.isnull().sum()

CustomerID          0
Tenure              0
MonthlyCharges      0
TotalCharges        0
Contract            0
PaymentMethod       0
PaperlessBilling    0
SeniorCitizen       0
Churn               0
dtype: int64

In [18]:
df.describe()

Unnamed: 0,Tenure,MonthlyCharges,TotalCharges,SeniorCitizen,Churn
count,500.0,500.0,500.0,500.0,500.0
mean,36.532,113.636,4237.882,0.498,0.106
std,20.667057,51.799903,2260.619837,0.500497,0.308146
min,1.0,20.0,159.0,0.0,0.0
25%,19.0,67.0,2237.25,0.0,0.0
50%,37.0,115.0,4182.5,0.0,0.0
75%,54.0,158.0,6266.75,1.0,0.0
max,71.0,199.0,7992.0,1.0,1.0


## Separate Features & Target

In [19]:
df = df.drop("CustomerID", axis=1)

In [20]:
X = df.drop("Churn", axis=1)
y = df["Churn"]

## One Hot Encoding 

In [21]:
X = pd.get_dummies(
    X,
    columns=["Contract", "PaymentMethod", "PaperlessBilling"],
    drop_first=True
)

## Train-Test Split

In [22]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [23]:
print(X_train.dtypes)

Tenure                            int64
MonthlyCharges                    int64
TotalCharges                      int64
SeniorCitizen                     int64
Contract_One year                  bool
Contract_Two year                  bool
PaymentMethod_Credit Card          bool
PaymentMethod_Electronic Check     bool
PaperlessBilling_Yes               bool
dtype: object


## Feature Scaling (Important for Logistic Regression)

In [24]:
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

## Train Models

In [43]:
#Logistic Regression
log_model = LogisticRegression(class_weight="balanced")
log_model.fit(X_train, y_train)

log_pred = log_model.predict(X_test)

In [44]:
#Decision Tree
tree_model = DecisionTreeClassifier(
    max_depth=5,
    random_state=42
)

tree_model.fit(X_train, y_train)

tree_pred = tree_model.predict(X_test)

In [45]:
#Random Forest
rf_model = RandomForestClassifier(
    n_estimators=100,
    random_state=42
)

rf_model.fit(X_train, y_train)

rf_pred = rf_model.predict(X_test)

## Compare Performance

In [46]:
from sklearn.metrics import classification_report

print("===== Logistic Regression =====")
print(classification_report(y_test, log_pred))

print("===== Decision Tree =====")
print(classification_report(y_test, tree_pred))

print("===== Random Forest =====")
print(classification_report(y_test, rf_pred))

===== Logistic Regression =====
              precision    recall  f1-score   support

           0       1.00      0.92      0.96        84
           1       0.70      1.00      0.82        16

    accuracy                           0.93       100
   macro avg       0.85      0.96      0.89       100
weighted avg       0.95      0.93      0.93       100

===== Decision Tree =====
              precision    recall  f1-score   support

           0       0.98      0.96      0.97        84
           1       0.82      0.88      0.85        16

    accuracy                           0.95       100
   macro avg       0.90      0.92      0.91       100
weighted avg       0.95      0.95      0.95       100

===== Random Forest =====
              precision    recall  f1-score   support

           0       0.97      0.99      0.98        84
           1       0.93      0.81      0.87        16

    accuracy                           0.96       100
   macro avg       0.95      0.90      0.92 