# CUSTOMER CHURN PREDICTION

IMPORTING LIBRARIES

In [4]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

READING CSV FILE

In [6]:
df=pd.read_csv('customer.csv')


DISPLAYING THE ROWS

In [None]:
df.head() #Displaying the top 5 rows


In [None]:
df.tail() #Dispalying the bottom 5 rows

FINDING SHAPE,GETTING INFORMATION AND DESCRIBING THE DATASET

In [None]:
df.shape


In [None]:
print("Number of rows",df.shape[0])
print("Number of columns",df.shape[1])

In [None]:
df.info()


In [None]:
df.describe()


 CHECKING FOR NULL VALUES


In [None]:
df.isnull()


In [None]:
df.isnull().sum()

DROPPING IRRELEVANT DATAS


In [None]:
df.columns

In [20]:
df=df.drop(['RowNumber', 'CustomerId', 'Surname'],axis=1)


In [None]:
df.head()


In [None]:
df.columns

ENCODING CATEGORICAL DATA

In [None]:
df['Geography'].unique()


In [None]:
df['Gender'].unique()


In [26]:
df=pd.get_dummies(data=df,drop_first=True)


In [None]:
df.head()


In [None]:
df['Exited'].value_counts()   #Imbalanced Data


In [None]:
df.Exited.plot.hist()


In [None]:
sns.countplot(x='Exited', data=df)


In [None]:
X=df.drop('Exited',axis=1) #Dropping the exited column
X

In [None]:
X.corrwith(df['Exited']).plot.bar(figsize=(16,9), title='Correlated with Exited Column', rot = 45,grid = True)



In [33]:
corr=df.corr()


In [None]:
plt.figure(figsize=(16,9))
sns.heatmap(corr,annot=True)



In [35]:
X=df.drop('Exited',axis=1)
y=df['Exited']

HANDLING IMBALANCED DATA WITH SMOTE

In [37]:
from imblearn.over_sampling import SMOTE   #Handling imbalanced data with smote


In [None]:
X_res,y_res = SMOTE().fit_resample(X,y)


In [None]:
y_res.value_counts()


In [None]:
X_res.value_counts()

SPLITTING THE DATASET INTO TRAINING SET AND TEST SET

In [42]:
from sklearn.model_selection import train_test_split


In [43]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
     

In [None]:
X_test.shape


In [None]:
X_train.shape

In [None]:
y_train.shape

In [None]:
y_test.shape

FEATURE SCALING

In [49]:
from sklearn.preprocessing import StandardScaler


In [50]:
scaler = StandardScaler()


In [51]:
X_train= scaler.fit_transform(X_train)


In [52]:
X_test= scaler.transform(X_test)


In [None]:
X_train


In [None]:
X_test


# APPLYING ALGORITHMS FOR IMBALANCED DATA

LOGISTIC REGRESSION

In [57]:
from sklearn.linear_model import LogisticRegression


In [58]:
log=LogisticRegression()


In [None]:
log.fit(X_train,y_train)


In [60]:
clf = LogisticRegression(random_state=0).fit(X_train, y_train)


In [61]:
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, precision_score, recall_score

In [62]:
y_pred1= clf.predict(X_test)
acc=accuracy_score(y_test,y_pred1)
f1=f1_score(y_test,y_pred1)
prec=precision_score(y_test,y_pred1)
rec=recall_score(y_test,y_pred1)


In [None]:
results=pd.DataFrame([['Logistic regression',acc,f1,prec,rec]],columns=['Model','Accuracy','F1','Precision','Recall'])
results

In [None]:
print(confusion_matrix(y_test,y_pred1))


 SUPPORT VECTOR MACHINE

In [None]:
from sklearn import svm
svm=svm.SVC()
svm.fit(X_train,y_train)


In [67]:
y_pred2= svm.predict(X_test)
acc=accuracy_score(y_test,y_pred2)
f1=f1_score(y_test,y_pred2)
prec=precision_score(y_test,y_pred2)
rec=recall_score(y_test,y_pred2)


In [None]:
results=pd.DataFrame([['SVM',acc,f1,prec,rec]],columns=['Model','Accuracy','F1','Precision','Recall'])
results

In [None]:
print(confusion_matrix(y_test,y_pred2))


K-NEIGHBORS CLASSIFICATION


In [None]:
from sklearn.neighbors import KNeighborsClassifier
knn=KNeighborsClassifier()
knn.fit(X_train,y_train)

In [72]:
y_pred3= knn.predict(X_test)
acc=accuracy_score(y_test,y_pred3)
f1=f1_score(y_test,y_pred3)
prec=precision_score(y_test,y_pred3)
rec=recall_score(y_test,y_pred3)


In [None]:
results=pd.DataFrame([['KNN',acc,f1,prec,rec]],columns=['Model','Accuracy','F1','Precision','Recall'])
results

In [None]:
print(confusion_matrix(y_test,y_pred3))


 DECISION TREE CLASSIFIER


In [None]:
from sklearn.tree import DecisionTreeClassifier
dt=DecisionTreeClassifier()
dt.fit(X_train,y_train)

In [77]:
y_pred4= dt.predict(X_test)
acc=accuracy_score(y_test,y_pred4)
f1=f1_score(y_test,y_pred4)
prec=precision_score(y_test,y_pred4)
rec=recall_score(y_test,y_pred4)

In [None]:
results=pd.DataFrame([['Decision Tree Classifier',acc,f1,prec,rec]],columns=['Model','Accuracy','F1','Precision','Recall'])
results

In [None]:
print(confusion_matrix(y_test,y_pred4))


RANDOM FOREST CLASSIFIER

In [None]:
from sklearn.ensemble import RandomForestClassifier
rf=RandomForestClassifier()
rf.fit(X_train,y_train)


In [82]:
y_pred5= rf.predict(X_test)
acc=accuracy_score(y_test,y_pred5)
f1=f1_score(y_test,y_pred5)
prec=precision_score(y_test,y_pred5)
rec=recall_score(y_test,y_pred5)


In [None]:
results=pd.DataFrame([['Random Forest Classifier',acc,f1,prec,rec]],columns=['Model','Accuracy','F1','Precision','Recall'])
results

In [None]:
print(confusion_matrix(y_test,y_pred5))


 GRADIENT BOOSTING CLASSIFIER

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
gbc=GradientBoostingClassifier()
gbc.fit(X_train,y_train)

In [87]:
y_pred6= gbc.predict(X_test)
acc=accuracy_score(y_test,y_pred6)
f1=f1_score(y_test,y_pred6)
prec=precision_score(y_test,y_pred6)
rec=recall_score(y_test,y_pred6)


In [None]:
results=pd.DataFrame([['Gradient Boosting Classifier',acc,f1,prec,rec]],columns=['Model','Accuracy','F1','Precision','Recall'])
results


In [None]:
print(confusion_matrix(y_test,y_pred6))


REPRESENTATION OF VARIOUS MODELS USING PLOTS

In [91]:
models = ['Logistic Regression', 'Support Vector', 'KNearest Neighbor', 'Decision Tree', 'Random Forest', 'Gradient Boosting']
comparison_methods = pd.DataFrame({
    'Models': models,
    'Accuracy': [
        accuracy_score(y_test, y_pred1),
        accuracy_score(y_test, y_pred2),
        accuracy_score(y_test, y_pred3),
        accuracy_score(y_test, y_pred4),
        accuracy_score(y_test, y_pred5),
        accuracy_score(y_test, y_pred6)
    ]
})


In [None]:
comparison_methods

In [None]:
sns.barplot(x='Models', y='Accuracy', data=comparison_methods, dodge=False)
plt.xticks(rotation=65, ha='right')
plt.title('Plot of models based on Accuracy')
plt.show()


In [94]:
models = ['Logistic Regression', 'Support Vector', 'KNearest Neighbor', 'Decision Tree', 'Random Forest', 'Gradient Boosting']
comparison_methods = pd.DataFrame({
    'Models': models,
    'Precision': [
        precision_score(y_test, y_pred1),
        precision_score(y_test, y_pred2),
        precision_score(y_test, y_pred3),
        precision_score(y_test, y_pred4),
        precision_score(y_test, y_pred5),
        precision_score(y_test, y_pred6)
    ]
})



In [None]:
comparison_methods

In [None]:
sns.barplot(x='Models', y='Precision', data=comparison_methods, dodge=False)
plt.xticks(rotation=65, ha='right')
plt.title('Plot of models based on Precision')
plt.show()


# APPLYING ALGORITHMS FOR BALANCED DATA  

In [None]:
from imblearn.over_sampling import SMOTE   #Balanced 
X_res,y_res=SMOTE().fit_resample(X,y)
X_res.value_counts()
y_res.value_counts()


In [99]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_res, y_res, test_size=0.2, random_state=0)

In [100]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train= scaler.fit_transform(X_train)
X_test= scaler.transform(X_test)


In [None]:
X_train


In [None]:
X_test


LOGISTIC REGRESSION

In [None]:
from sklearn.linear_model import LogisticRegression
log=LogisticRegression()
log.fit(X_train,y_train)
clf = LogisticRegression(random_state=0).fit(X_train, y_train)
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, precision_score, recall_score
y_pred1= clf.predict(X_test)
acc=accuracy_score(y_test,y_pred1)
f1=f1_score(y_test,y_pred1)
prec=precision_score(y_test,y_pred1)
rec=recall_score(y_test,y_pred1)
results=pd.DataFrame([['Logistic regression',acc,f1,prec,rec]],columns=['Model','Accuracy','F1','Precision','Recall'])
results


SUPPORT VECTOR 

In [None]:
from sklearn import svm
svm=svm.SVC()
svm.fit(X_train,y_train)
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, precision_score, recall_score
y_pred2= svm.predict(X_test)
acc=accuracy_score(y_test,y_pred2)
f1=f1_score(y_test,y_pred2)
prec=precision_score(y_test,y_pred2)
rec=recall_score(y_test,y_pred2)
results=pd.DataFrame([['SVM',acc,f1,prec,rec]],columns=['Model','Accuracy','F1','Precision','Recall'])
results



K-NEIGHBOR CLASSIFICATION

In [None]:
from sklearn.neighbors import KNeighborsClassifier
knn=KNeighborsClassifier()
knn.fit(X_train,y_train)
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, precision_score, recall_score
y_pred3= knn.predict(X_test)
acc=accuracy_score(y_test,y_pred3)
f1=f1_score(y_test,y_pred3)
prec=precision_score(y_test,y_pred3)
rec=recall_score(y_test,y_pred3)
results=pd.DataFrame([['KNN',acc,f1,prec,rec]],columns=['Model','Accuracy','F1','Precision','Recall'])
results

DECISION TREE CLASSIFICATION

In [None]:
from sklearn.tree import DecisionTreeClassifier
dt=DecisionTreeClassifier()
dt.fit(X_train,y_train)
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, precision_score, recall_score
y_pred4= dt.predict(X_test)
acc=accuracy_score(y_test,y_pred4)
f1=f1_score(y_test,y_pred4)
prec=precision_score(y_test,y_pred4)
rec=recall_score(y_test,y_pred4)
results=pd.DataFrame([['Decision Tree Classifier',acc,f1,prec,rec]],columns=['Model','Accuracy','F1','Precision','Recall'])
results

RANDOM FOREST CLASSIFICATION

In [None]:
from sklearn.ensemble import RandomForestClassifier
rf=RandomForestClassifier()
rf.fit(X_train,y_train)
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, precision_score, recall_score
y_pred5= rf.predict(X_test)
acc=accuracy_score(y_test,y_pred5)
f1=f1_score(y_test,y_pred5)
prec=precision_score(y_test,y_pred5)
rec=recall_score(y_test,y_pred5)
results=pd.DataFrame([['Random Forest Classifier',acc,f1,prec,rec]],columns=['Model','Accuracy','F1','Precision','Recall'])
results


GRADIENT BOOSTING CLASSIFICATION

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
gbc=GradientBoostingClassifier()
gbc.fit(X_train,y_train)
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, precision_score, recall_score
y_pred6= gbc.predict(X_test)
acc=accuracy_score(y_test,y_pred6)
f1=f1_score(y_test,y_pred6)
prec=precision_score(y_test,y_pred6)
rec=recall_score(y_test,y_pred6)
results=pd.DataFrame([['Gradient Boosting Classifier',acc,f1,prec,rec]],columns=['Model','Accuracy','F1','Precision','Recall'])
results


REPRESENTAIION OF VARIOUS MODELS USING PLOTS FOR BALANCED DATA

In [116]:
models = ['Logistic Regression', 'Support Vector', 'KNearest Neighbor', 'Decision Tree', 'Random Forest', 'Gradient Boosting']
comparison_methods = pd.DataFrame({
    'Models': models,
    'Accuracy': [
        accuracy_score(y_test, y_pred1),
        accuracy_score(y_test, y_pred2),
        accuracy_score(y_test, y_pred3),
        accuracy_score(y_test, y_pred4),
        accuracy_score(y_test, y_pred5),
        accuracy_score(y_test, y_pred6)
    ]
})


In [None]:
comparison_methods

In [None]:
sns.barplot(x='Models', y='Accuracy', data=comparison_methods, dodge=False)
plt.xticks(rotation=65, ha='right')
plt.title('Plot of models based on Accuracy for balanced data')
plt.legend(title='Data Type', labels=['Balanced Data'])
plt.show()


In [119]:
models = ['Logistic Regression', 'Support Vector', 'KNearest Neighbor', 'Decision Tree', 'Random Forest', 'Gradient Boosting']
comparison_methods = pd.DataFrame({
    'Models': models,
    'Precision': [
        precision_score(y_test, y_pred1),
        precision_score(y_test, y_pred2),
        precision_score(y_test, y_pred3),
        precision_score(y_test, y_pred4),
        precision_score(y_test, y_pred5),
        precision_score(y_test, y_pred6)
    ]
})


In [None]:
comparison_methods

In [None]:
sns.barplot(x='Models', y='Precision', data=comparison_methods, dodge=False)
plt.xticks(rotation=65, ha='right')
plt.title('Plot of models based on Precision for balanced data')
plt.legend(title='Data Type', labels=['Balanced Data'])
plt.show()


# SAVING THE MODEL


In [None]:
X_res=scaler.fit_transform(X_res)
rf.fit(X_res,y_res)

In [124]:
import joblib


In [None]:
joblib.dump(rf,'churn_predict_model')


In [126]:
model=joblib.load('churn_predict_model')


In [None]:
df.columns

In [None]:
model.predict([[619,42,2,0.0,0,0,0,101348.88,0,0,0]])


In [None]:
row_12 = df.iloc[11]  #Displaying the datas of a specific row
print(row_12)

In [None]:
import tkinter as tk
from tkinter import Label, Entry, Button
from sklearn.preprocessing import StandardScaler
import joblib
#scaler = StandardScaler()
def show_entry_fields():
    p1=int(e1.get())
    p2=int(e2.get())
    p3=int(e3.get())
    p4=float(e4.get())
    p5=int(e5.get())
    p6=int(e6.get())
    p7=int(e7.get())
    p8=float(e8.get())
    p9=int(e9.get())
    if p9 == 1:
        Geography_Germany=1
        Geography_Spain=0
        Geography_France=0
    elif p9 == 2:
        Geography_Germany=0
        Geography_Spain=1
        Geography_France=0
    elif p9 == 3:
        Geography_Germany=0
        Geography_Spain=0
        Geography_France=1  
    p10=int(e10.get())
    #model = joblib.load('churn_model'
    model=joblib.load('churn_predict_model')
    result = model.predict(scaler.transform([[p1, p2, p3, p4, p5, p6, p7, p8, Geography_Germany, Geography_Spain, p10]]))

    
    if result == 0:
        Label(master, text="Did not Churn").grid(row=31)
    else:
        Label(master, text="Churned").grid(row=31)
    
    
master = tk.Tk()
master.title("Bank Customers Churn Prediction Using Machine Learning")


label = Label(master, text = "Customers Churn Prediction Using ML"
                          , bg = "black", fg = "white"). \
                               grid(row=0,columnspan=2)


Label(master, text="CreditScore").grid(row=1)
Label(master, text="Age").grid(row=2)
Label(master, text="Tenure").grid(row=3)
Label(master, text="Balance").grid(row=4)
Label(master, text="NumOfProducts").grid(row=5)
Label(master, text="HasCrCard").grid(row=6)
Label(master, text="IsActiveMember").grid(row=7)
Label(master, text="EstimatedSalary").grid(row=8)
Label(master, text="Geography").grid(row=9)
Label(master,text="Gender").grid(row=10)


e1 = Entry(master)
e2 = Entry(master)
e3 = Entry(master)
e4 = Entry(master)
e5 = Entry(master)
e6 = Entry(master)
e7 = Entry(master)
e8 = Entry(master)
e9 = Entry(master)
e10 = Entry(master)


e1.grid(row=1, column=1)
e2.grid(row=2, column=1)
e3.grid(row=3, column=1)
e4.grid(row=4, column=1)
e5.grid(row=5, column=1)
e6.grid(row=6, column=1)
e7.grid(row=7, column=1)
e8.grid(row=8, column=1)
e9.grid(row=9, column=1)
e10.grid(row=10,column=1)

Button(master, text='Predict', command=show_entry_fields).grid()

master.mainloop()