In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import ann_visualizer
from ann_visualizer.visualize import ann_viz
from IPython.display import display
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, Dropout
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report


class Bank:

    __name = "Churn_Modelling.csv"

    def load_dataset(self):
        self.dataset = pd.read_csv(self.__name)
    
    def show_dataset_head(self):
        display(self.dataset.head())
        print("Length of dataset: ", len(self.dataset))
    
    def eda(self):
        # print(self.dataset.info())
        # print("Unique values in Surname: ", self.dataset.Surname.unique())
        # print("Unique values in Geography: ", self.dataset.Geography.unique())
        # print("Unique values in Age: ", self.dataset.Age.unique())
        # print("Unique values in No. of Products: ", self.dataset.NumOfProducts.unique())
        print("Number of Exited and Non-Exited people: ", self.dataset.Exited.value_counts())
    
    def preprocess(self):
        self.X = self.dataset[[feature for feature in self.dataset.columns 
                                if ((feature!="RowNumber") & (feature!="CustomerId") & (feature!="Exited"))]]
        self.y = self.dataset["Exited"]

        self.le = LabelEncoder()
        self.X.Geography = self.le.fit_transform(self.X["Geography"])
        self.X.Surname = self.le.fit_transform(self.X["Surname"])
        self.X.Gender = self.le.fit_transform(self.X["Gender"])

        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(self.X, self.y, random_state=100, test_size=0.2)

        self.MinMaxScaler = MinMaxScaler()
        self.X_train_transformed = self.MinMaxScaler.fit_transform(self.X_train)
        self.X_test_transformed = self.MinMaxScaler.transform(self.X_test)
        
        self.df_train = pd.DataFrame({"Index": self.y_train.keys(), "Exited": self.y_train.values})
        self.df_test = pd.DataFrame({"Index": self.y_test.keys(), "Exited": self.y_test.values})
        # print("Number of Exited and Non-Exited people in training: ", self.df_train.Exited.value_counts())
        # print("Number of Exited and Non-Exited people in testing: ", self.df_test.Exited.value_counts())
        
    
    def train(self):
        self.model = Sequential()
        self.model.add(Dense(60, input_shape=(11,), activation="relu"))
        self.model.add(Dropout(0.5))
        self.model.add(Dense(30, activation="relu"))
        self.model.add(Dropout(0.5))
        self.model.add(Dense(15, activation="relu"))
        self.model.add(Dropout(0.5))
        self.model.add(Dense(1, activation="sigmoid"))

        self.model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

        self.model.fit(self.X_train_transformed, self.y_train, epochs=10)

    def predict(self):
        self.y_pred = self.model.predict(self.X_test)
        for i in range(len(self.y_pred)):
            if (self.y_pred[i] > 0.5):
                self.y_pred[i] = 1
            else:
                self.y_pred[i] = 0
    
    def evaluate(self):
        self.cm = confusion_matrix(self.y_test, self.y_pred)
        print(self.cm)
        print(classification_report(self.y_test, self.y_pred))
        _, accuracy = self.model.evaluate(self.X_test_transformed, self.y_test)
        print("Accuracy: %.2f"%(accuracy*100))
    
    def visualize_ann(self):
        ann_viz(self.model, title="Artificial Neural network - Model Visualization")



customer = Bank()
customer.load_dataset()
customer.show_dataset_head()
customer.eda()
customer.preprocess()
customer.train()
customer.predict()
customer.evaluate()
customer.visualize_ann()



Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


Length of dataset:  10000
Number of Exited and Non-Exited people:  0    7963
1    2037
Name: Exited, dtype: int64


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.X.Geography = self.le.fit_transform(self.X["Geography"])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.X.Surname = self.le.fit_transform(self.X["Surname"])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.X.Gender = self.le.fit_transform(self.X["Gender"])


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[[1588    0]
 [ 412    0]]
              precision    recall  f1-score   support

           0       0.79      1.00      0.89      1588
           1       0.00      0.00      0.00       412

    accuracy                           0.79      2000
   macro avg       0.40      0.50      0.44      2000
weighted avg       0.63      0.79      0.70      2000



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 79.40


ExecutableNotFound: failed to execute WindowsPath('dot'), make sure the Graphviz executables are on your systems' PATH