### Part A: Data Preprocessing

##### Step 1: Importing the libraries

In [2]:
!pip install pandas

Collecting pandas
  Downloading pandas-1.3.2-cp38-cp38-win_amd64.whl (10.2 MB)
Collecting pytz>=2017.3
  Downloading pytz-2021.1-py2.py3-none-any.whl (510 kB)
Installing collected packages: pytz, pandas
Successfully installed pandas-1.3.2 pytz-2021.1


In [3]:
import numpy as np
import pandas as pd
import tensorflow as tf

##### Step 2: Importing dataset

In [12]:
dataset = pd.read_csv('Churn_Modelling.csv')
dataset

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.00,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.80,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.00,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9996,15606229,Obijiaku,771,France,Male,39,5,0.00,2,1,0,96270.64,0
9996,9997,15569892,Johnstone,516,France,Male,35,10,57369.61,1,1,1,101699.77,0
9997,9998,15584532,Liu,709,France,Female,36,7,0.00,1,0,1,42085.58,1
9998,9999,15682355,Sabbatini,772,Germany,Male,42,3,75075.31,2,1,0,92888.52,1


##### Step 3: Creating feature vector and DVV

In [13]:
X = dataset.iloc[:,3:-1].values
Y = dataset.iloc[:,-1].values

In [14]:
X

array([[619, 'France', 'Female', ..., 1, 1, 101348.88],
       [608, 'Spain', 'Female', ..., 0, 1, 112542.58],
       [502, 'France', 'Female', ..., 1, 0, 113931.57],
       ...,
       [709, 'France', 'Female', ..., 0, 1, 42085.58],
       [772, 'Germany', 'Male', ..., 1, 0, 92888.52],
       [792, 'France', 'Female', ..., 1, 0, 38190.78]], dtype=object)

In [15]:
Y

array([1, 0, 1, ..., 1, 1, 0], dtype=int64)

##### Step 4: Encoding

In [8]:
!pip install sklearn

Collecting sklearn
  Downloading sklearn-0.0.tar.gz (1.1 kB)
Collecting scikit-learn
  Downloading scikit_learn-0.24.2-cp38-cp38-win_amd64.whl (6.9 MB)
Collecting threadpoolctl>=2.0.0
  Downloading threadpoolctl-2.2.0-py3-none-any.whl (12 kB)
Collecting joblib>=0.11
  Downloading joblib-1.0.1-py3-none-any.whl (303 kB)
Building wheels for collected packages: sklearn
  Building wheel for sklearn (setup.py): started
  Building wheel for sklearn (setup.py): finished with status 'done'
  Created wheel for sklearn: filename=sklearn-0.0-py2.py3-none-any.whl size=1317 sha256=b0b37ddd927274db4b546d59498d4dbe65a56b34922adbff86349a92518bab6a
  Stored in directory: c:\users\dell\appdata\local\pip\cache\wheels\22\0b\40\fd3f795caaa1fb4c6cb738bc1f56100be1e57da95849bfc897
Successfully built sklearn
Installing collected packages: threadpoolctl, joblib, scikit-learn, sklearn
Successfully installed joblib-1.0.1 scikit-learn-0.24.2 sklearn-0.0 threadpoolctl-2.2.0


In [16]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
X[:,2] = np.array(le.fit_transform(X[:,2]))

In [17]:
X

array([[619, 'France', 0, ..., 1, 1, 101348.88],
       [608, 'Spain', 0, ..., 0, 1, 112542.58],
       [502, 'France', 0, ..., 1, 0, 113931.57],
       ...,
       [709, 'France', 0, ..., 0, 1, 42085.58],
       [772, 'Germany', 1, ..., 1, 0, 92888.52],
       [792, 'France', 0, ..., 1, 0, 38190.78]], dtype=object)

In [19]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers = [('encoder', OneHotEncoder(),[1])],remainder = 'passthrough')
X = np.array(ct.fit_transform(X))

In [20]:
X

array([[1.0, 0.0, 1.0, ..., 1, 1, 101348.88],
       [1.0, 0.0, 0.0, ..., 0, 1, 112542.58],
       [1.0, 0.0, 1.0, ..., 1, 0, 113931.57],
       ...,
       [1.0, 0.0, 1.0, ..., 0, 1, 42085.58],
       [0.0, 1.0, 0.0, ..., 1, 0, 92888.52],
       [1.0, 0.0, 1.0, ..., 1, 0, 38190.78]], dtype=object)

##### Step 5: Replacing Missing data

In [22]:
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(missing_values = np.nan, strategy = 'mean')
imputer.fit(X[:,:])
X[:,:] = imputer.transform(X[:,:])

##### Step 6: Splitting of dataset into training and testing

In [23]:
from sklearn.model_selection import train_test_split
Xtrain,Xtest,Ytrain,Ytest = train_test_split(X,Y,test_size=0.2,random_state=1)

In [24]:
Xtrain

array([[0.0, 1.0, 0.0, ..., 0.0, 1.0, 124749.08],
       [1.0, 0.0, 1.0, ..., 0.0, 0.0, 41104.82],
       [0.0, 1.0, 0.0, ..., 1.0, 1.0, 45750.21],
       ...,
       [1.0, 0.0, 1.0, ..., 1.0, 1.0, 92027.69],
       [1.0, 0.0, 1.0, ..., 1.0, 1.0, 101168.9],
       [0.0, 1.0, 0.0, ..., 1.0, 0.0, 33462.94]], dtype=object)

###### Step 7: Feature Scaling

In [25]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
Xtrain = sc.fit_transform(Xtrain)
Xtest = sc.fit_transform(Xtest)

### Part B: Building Deep Learning model

##### Step 1: Building model

In [26]:
d1 = tf.keras.models.Sequential()

In [27]:
# First Hidden Layer
d1.add(tf.keras.layers.Dense(units=4,activation='relu'))
# Second Hidden Layer
d1.add(tf.keras.layers.Dense(units=4,activation='relu'))
# Output Layer
d1.add(tf.keras.layers.Dense(units=1,activation='sigmoid'))

##### Step 2: Compiling model

In [28]:
d1.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

##### Step 3: Training the model

In [29]:
d1.fit(Xtrain,Ytrain)



<tensorflow.python.keras.callbacks.History at 0x180955784c0>

##### Step 4: Testing the model

In [31]:
Yest = d1.predict(Xtest)

In [33]:
Ytest

array([0, 0, 0, ..., 0, 0, 0], dtype=int64)

In [36]:
Yest

array([[0.1919091 ],
       [0.2286303 ],
       [0.32684767],
       ...,
       [0.36327058],
       [0.2181969 ],
       [0.2753185 ]], dtype=float32)

In [38]:
Yest = (Yest>0.5)

In [40]:
Yest

array([[False],
       [False],
       [False],
       ...,
       [False],
       [False],
       [False]])

In [42]:
# Encoding the output vector
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
Yest = le.fit_transform(Yest)

In [43]:
Yest

array([0, 0, 0, ..., 0, 0, 0], dtype=int64)

### Part C : Performance Metric

###### 1. Confusion Matrix

In [44]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(Ytest,Yest)
print(cm)

[[1585    0]
 [ 415    0]]


In [45]:
from sklearn.metrics import accuracy_score
accuracy_score(Ytest,Yest)


0.7925

In [48]:
from sklearn.metrics import precision_score
precision_score(Ytest,Yest)

  _warn_prf(average, modifier, msg_start, len(result))


0.0