# CUSTOMER CHURN PREDICTION #
The aim of this project is to develop a predictive model for customer churn in a subscription-based service or business. Customer churn, or customer attrition, refers to the rate at which customers stop using a service. By analyzing historical customer data and utilizing machine learning algorithms such as Logistic Regression, Random Forests, or Gradient Boosting,aim to predict which customers are likely to churn in the future.

# Load the Data Set #

In [None]:
!pip install seaborn scikit-learn


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn


In [None]:
churn=pd.read_csv('/kaggle/input/bank-customer-churn-prediction/Churn_Modelling.csv')

In [None]:
churn.head()

# EDA #

In [None]:
churn.shape

In [None]:
churn.size

In [None]:
churn.columns=churn.columns.str.strip()

In [None]:
churn.columns=churn.columns.str.lower()

In [None]:
churn.columns

In [None]:
churn.isnull().sum()

In [None]:
churn[churn.duplicated(subset=['customerid'],keep=False)]

In [None]:
churn.info()

In [None]:
churn.describe()

In [None]:
plt.figure(figsize=(15,5))
sns.countplot(data=churn,x='exited')

In [None]:
churn['exited'].value_counts().to_frame()

In [None]:
from sklearn.utils import resample

In [None]:
churn_majority=churn[churn['exited']==0]
churn_minority=churn[churn['exited']==1]


In [None]:
churn_majority_downsample=resample(churn_majority,n_samples=2037,replace=False,random_state=42)

In [None]:
churn_df=pd.concat([churn_majority_downsample,churn_minority])

In [None]:
churn_df['exited'].value_counts().to_frame()

In [None]:
plt.figure(figsize=(15,5))
sns.countplot(data=churn_df,x='exited')

In [None]:
churn_df.columns

In [None]:
churn_df.drop(['rownumber', 'customerid', 'surname','geography','gender'],axis=1,inplace=True)

In [None]:
churn_df.corr()

In [None]:
plt.figure(figsize=(15,5))
sns.heatmap(churn_df.corr(),annot=True)

In [None]:
df_corr_exit=churn_df.corr()['exited'].to_frame()

In [None]:
plt.figure(figsize=(15,5))
sns.barplot(data=df_corr_exit,x=df_corr_exit.index,y='exited')

In [None]:
x=churn_df.drop(['exited'],axis=1)
y=churn_df['exited']

# Spliting the Data Set #

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,random_state=42)

In [None]:
x_train.shape,x_test.shape,y_train.shape,y_test.shape

# Modeling and Evaluation #

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
lr=LogisticRegression(max_iter=500)

In [None]:
lr.fit(x_train,y_train)

In [None]:
lr.score(x_train,y_train)

In [None]:
y_pred=lr.predict(x_test)

In [None]:
from sklearn.metrics import confusion_matrix,recall_score,precision_score,accuracy_score,f1_score,ConfusionMatrixDisplay

In [None]:
precision_score(y_test,y_pred)

In [None]:
recall_score(y_test,y_pred)

In [None]:
accuracy_score(y_test,y_pred)

In [None]:
f1_score(y_test,y_pred)

In [None]:
cmd=ConfusionMatrixDisplay(confusion_matrix=confusion_matrix(y_test,y_pred,labels=lr.classes_),display_labels=lr.classes_)
cmd.plot()

In [None]:
from sklearn.neighbors import KNeighborsClassifier

In [None]:
knn=KNeighborsClassifier(n_neighbors=3)

In [None]:
knn.fit(x_train,y_train)

In [None]:
knn.score(x_train,y_train)

In [None]:
knn.score(x_test,y_test)

In [None]:
y_pred=lr.predict(x_test)

In [None]:
precision_score(y_test,y_pred)

In [None]:
recall_score(y_test,y_pred)

In [None]:
accuracy_score(y_test,y_pred)

In [None]:
f1_score(y_test,y_pred)

In [None]:
from sklearn.svm import SVC

In [None]:
svc=SVC()

In [None]:
svc.fit(x_train,y_train)

In [None]:
svc.score(x_train,y_train)

In [None]:
svc.score(x_test,y_test)

In [None]:
precision_score(y_test,y_pred)

In [None]:
recall_score(y_test,y_pred)

In [None]:
accuracy_score(y_test,y_pred)

In [None]:
f1_score(y_test,y_pred)

In [None]:
cmd=ConfusionMatrixDisplay(confusion_matrix=confusion_matrix(y_test,y_pred,labels=svc.classes_),display_labels=svc.classes_)
cmd.plot()