# **CUSTOMER CHURN PREDICTION**

### After descriptive churn analysis, I built a machine learning model to predict whether a customer is likely to churn, so that the business can take proactive retention actions.

### IMPORTING LIBRARIES

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

### LOADING DATASET

In [2]:
df = pd.read_csv("D:\customer churn\customer_churn_dataset-testing-master.csv")

  df = pd.read_csv("D:\customer churn\customer_churn_dataset-testing-master.csv")


### DATA PREPROCESSING

In [3]:
df.head()

Unnamed: 0,CustomerID,Age,Gender,Tenure,Usage Frequency,Support Calls,Payment Delay,Subscription Type,Contract Length,Total Spend,Last Interaction,Churn,Age Group,High Support Falge,Payment delay flag
0,1,22,Female,25,14,4,27,Basic,Monthly,598,9,1,<25,High,Delayed
1,2,41,Female,28,28,7,13,Standard,Monthly,584,20,0,35-44,High,Delayed
2,3,47,Male,27,10,2,29,Premium,Annual,757,21,0,45-54,Low,Delayed
3,4,35,Male,9,12,5,17,Premium,Quarterly,232,18,0,35-44,High,Delayed
4,5,53,Female,58,24,9,2,Standard,Annual,533,18,0,45-54,High,Delayed


In [4]:
df.shape

(64374, 15)

In [5]:
df.columns

Index(['CustomerID', 'Age', 'Gender', 'Tenure', 'Usage Frequency',
       'Support Calls', 'Payment Delay', 'Subscription Type',
       'Contract Length', 'Total Spend', 'Last Interaction', 'Churn',
       'Age Group', 'High Support Falge', 'Payment delay flag'],
      dtype='object')

In [6]:
df.dtypes

CustomerID             int64
Age                    int64
Gender                object
Tenure                 int64
Usage Frequency        int64
Support Calls          int64
Payment Delay          int64
Subscription Type     object
Contract Length       object
Total Spend            int64
Last Interaction       int64
Churn                  int64
Age Group             object
High Support Falge    object
Payment delay flag    object
dtype: object

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 64374 entries, 0 to 64373
Data columns (total 15 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   CustomerID          64374 non-null  int64 
 1   Age                 64374 non-null  int64 
 2   Gender              64374 non-null  object
 3   Tenure              64374 non-null  int64 
 4   Usage Frequency     64374 non-null  int64 
 5   Support Calls       64374 non-null  int64 
 6   Payment Delay       64374 non-null  int64 
 7   Subscription Type   64374 non-null  object
 8   Contract Length     64374 non-null  object
 9   Total Spend         64374 non-null  int64 
 10  Last Interaction    64374 non-null  int64 
 11  Churn               64374 non-null  int64 
 12  Age Group           64374 non-null  object
 13  High Support Falge  64374 non-null  object
 14  Payment delay flag  64374 non-null  object
dtypes: int64(9), object(6)
memory usage: 7.4+ MB


In [8]:
df.isnull().sum()

CustomerID            0
Age                   0
Gender                0
Tenure                0
Usage Frequency       0
Support Calls         0
Payment Delay         0
Subscription Type     0
Contract Length       0
Total Spend           0
Last Interaction      0
Churn                 0
Age Group             0
High Support Falge    0
Payment delay flag    0
dtype: int64

In [9]:
df.describe()

Unnamed: 0,CustomerID,Age,Tenure,Usage Frequency,Support Calls,Payment Delay,Total Spend,Last Interaction,Churn
count,64374.0,64374.0,64374.0,64374.0,64374.0,64374.0,64374.0,64374.0,64374.0
mean,32187.5,41.970982,31.994827,15.080234,5.40069,17.133952,541.023379,15.49885,0.473685
std,18583.317451,13.924911,17.098234,8.81647,3.114005,8.852211,260.874809,8.638436,0.499311
min,1.0,18.0,1.0,1.0,0.0,0.0,100.0,1.0,0.0
25%,16094.25,30.0,18.0,7.0,3.0,10.0,313.0,8.0,0.0
50%,32187.5,42.0,33.0,15.0,6.0,19.0,534.0,15.0,0.0
75%,48280.75,54.0,47.0,23.0,8.0,25.0,768.0,23.0,1.0
max,64374.0,65.0,60.0,30.0,10.0,30.0,1000.0,30.0,1.0


In [10]:
#Droping unwanted column

df = df.drop(['CustomerID', 'Last Interaction'], axis=1)


### FEATURE ENGINEERING

In [11]:
df = pd.get_dummies(df, drop_first=True)


### TRAIN-TEST SPLIT

In [12]:
X = df.drop('Churn', axis=1)
y = df['Churn']


In [13]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


### FEATURE SCALING

In [14]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


### MODEL TRAINING

In [15]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()
model.fit(X_train, y_train)


### MODEL EVALUATING

In [16]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))


Accuracy: 0.8401553398058252
[[5709 1084]
 [ 974 5108]]
              precision    recall  f1-score   support

           0       0.85      0.84      0.85      6793
           1       0.82      0.84      0.83      6082

    accuracy                           0.84     12875
   macro avg       0.84      0.84      0.84     12875
weighted avg       0.84      0.84      0.84     12875



### FEATURE IMPORTANCE

In [17]:
import numpy as np

feature_importance = pd.Series(
    model.coef_[0],
    index=df.drop('Churn', axis=1).columns
).sort_values(ascending=False)

feature_importance.head(10)


Payment Delay                 1.953065
Tenure                        0.611251
Support Calls                 0.499583
Age                           0.358247
Payment delay flag_On Time    0.212467
Contract Length_Monthly       0.165165
Age Group_<25                 0.084722
Age Group_55+                -0.031077
Subscription Type_Standard   -0.038529
Subscription Type_Premium    -0.040514
dtype: float64