### Overview
Customer churn, or customer attrition, refers to when a customer ceases their relationship with a company or service provider. In today's highly competitive business environment, retaining customers is a critical factor for long-term success. Predicting customer churn can help organizations take proactive steps to retain customers, thus minimizing revenue loss. This project aims to build a machine learning model that can predict whether a customer will churn based on their demographic, account, and service-related data.


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

import warnings
warnings.filterwarnings("ignore")

In [None]:
df = pd.read_csv('DataSets\data_file.csv')

df.head(10)

### Data Understanding

In [None]:
df.info()

In [None]:
# Drop the Total charges column

df = df.drop('TotalCharges', axis = 1)

In [None]:
df.SeniorCitizen = df.SeniorCitizen.apply(lambda x: 'No' if x == 0 else 'Yes')

In [None]:
# Drop the CustomerID column

df = df.drop('customerID', axis = 1)

In [None]:
df.head()

### Exploratory Data Analysis

In [None]:
cvars = [column for column in df.columns if df[column].dtypes != 'object']

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(15, 5))

for i, x in enumerate(cvars):
    ax[i].hist(df[x][df.Churn=='No'], label='Churn = 0', bins=30)
    ax[i].hist(df[x][df.Churn=='Yes'], label='Churn = 1', bins=30)
    ax[i].set(xlabel=x, ylabel='count')
    ax[i].legend() 

In [None]:
df.info()

In [None]:
# Cat Var and the impact on churn

cat_var = [column for column in df.columns if df[column].dtypes == 'object']

cat_var_noChurn = cat_var[:-1]

fig, ax = plt.subplots(4, 4, figsize=(20, 20))

for axi, var in zip(ax.flat, cat_var_noChurn):
    sns.countplot(x=df.Churn, hue=df[var], ax=axi)

### Data Preprocessing

In [None]:
le = LabelEncoder()

for x in [column for column in df.columns if len(df[column].unique())==2]:
    print(x, df[x].unique())
    df[x] = le.fit_transform(df[x])

In [None]:
df.head()

In [None]:
df = pd.get_dummies(df, columns = [column for column in df.columns if df[column].dtypes=='object'], drop_first=True)

In [None]:
df.head()

In [None]:
# Splitting the dataset

X = df.drop('Churn', axis = 1)
y = df['Churn']

### Model Building

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

#### Logistic Regression

In [None]:
log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)
lr_pred = log_reg.predict(X_test)

In [None]:
print("\nLogistic Regression Model:")
print(f"{round(accuracy_score(y_test, lr_pred)*100, 2)} %")
print(classification_report(y_test, lr_pred))

In [None]:
conf_mat = confusion_matrix(y_test, lr_pred)
sns.heatmap(conf_mat.T, annot = True, fmt='d', cbar=False,
           xticklabels=['No', 'Yes'],
           yticklabels=['No', 'Yes'])
plt.xlabel('Actuals')
plt.ylabel('Predicted')

#### Decision Tree

In [None]:
dtc = DecisionTreeClassifier()

dtc.fit(X_train, y_train)

In [None]:
dtc_pred = dtc.predict(X_test)

In [None]:
print("\nDecision Tree Model:")
print(f"{round(accuracy_score(y_test, dtc_pred)*100, 2)} %")
print(classification_report(y_test, dtc_pred))

In [None]:
conf_mat = confusion_matrix(y_test, dtc_pred)
sns.heatmap(conf_mat.T, annot = True, fmt='d', cbar=False,
           xticklabels=['No', 'Yes'],
           yticklabels=['No', 'Yes'])
plt.xlabel('Actuals')
plt.ylabel('Predicted')

### KNN

In [None]:
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
knn_pred = knn.predict(X_test)

In [None]:
print("\nK-Nearest Neighbors Model:")
print(f"{round(accuracy_score(y_test, knn_pred)*100, 2)} %")
print(classification_report(y_test, knn_pred))

In [None]:
conf_mat = confusion_matrix(y_test, knn_pred)
sns.heatmap(conf_mat.T, annot = True, fmt='d', cbar=False,
           xticklabels=['No', 'Yes'],
           yticklabels=['No', 'Yes'])
plt.xlabel('Actuals')
plt.ylabel('Predicted')

#### Support Vector Machines

In [None]:
svm = SVC(kernel='linear', random_state=42)
svm.fit(X_train, y_train)
svm_pred = svm.predict(X_test)


In [None]:
print("\nSupport Vector Machine Model:")
print(f"{round(accuracy_score(y_test, svm_pred)*100, 2)} %")
print(classification_report(y_test, svm_pred))