## 0. Dependencies

In [102]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense

## 1. Import and Explore the Dataset

In [24]:
# 1.1 loading dataset into dataframe

data = pd.read_csv("Data/Churn_Modelling.csv")
data.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [25]:
# 1.2 Information about dataset

data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 14 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   RowNumber        10000 non-null  int64  
 1   CustomerId       10000 non-null  int64  
 2   Surname          10000 non-null  object 
 3   CreditScore      10000 non-null  int64  
 4   Geography        10000 non-null  object 
 5   Gender           10000 non-null  object 
 6   Age              10000 non-null  int64  
 7   Tenure           10000 non-null  int64  
 8   Balance          10000 non-null  float64
 9   NumOfProducts    10000 non-null  int64  
 10  HasCrCard        10000 non-null  int64  
 11  IsActiveMember   10000 non-null  int64  
 12  EstimatedSalary  10000 non-null  float64
 13  Exited           10000 non-null  int64  
dtypes: float64(2), int64(9), object(3)
memory usage: 1.1+ MB


In [28]:
# 1.3 Looking for missing values in each column

data.isnull().sum()

RowNumber          0
CustomerId         0
Surname            0
CreditScore        0
Geography          0
Gender             0
Age                0
Tenure             0
Balance            0
NumOfProducts      0
HasCrCard          0
IsActiveMember     0
EstimatedSalary    0
Exited             0
dtype: int64

In [38]:
# 1.4 Looking for Duplicated Values(dataframe level)

data.duplicated().sum()

0

In [39]:
# 1.4.1 Looking for duplicated values(Column level)

data.CustomerId.duplicated().sum()

0

In [50]:
# If looking for duplicated values in certain columns(more than one) then

#data.duplicated(subset=[col1,col2,col3]).sum()

In [47]:
# 1.4.2 To identify which rows(dataframe level) or values(column level) are duplicated

duplicated_Age = data.loc[data.Age.duplicated(keep='first'),:]
duplicated_Age

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
2,3,15619304,Onio,502,France,Female,42,8,159660.80,3,1,0,113931.57,1
8,9,15792365,He,501,France,Male,44,4,142051.07,2,0,1,74940.50,0
17,18,15788218,Henderson,549,Spain,Female,24,9,0.00,2,1,1,14406.41,0
18,19,15661507,Muldrow,587,Spain,Male,45,6,0.00,1,0,0,158684.81,0
19,20,15568982,Hao,726,France,Female,24,6,0.00,2,1,1,54724.03,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9996,15606229,Obijiaku,771,France,Male,39,5,0.00,2,1,0,96270.64,0
9996,9997,15569892,Johnstone,516,France,Male,35,10,57369.61,1,1,1,101699.77,0
9997,9998,15584532,Liu,709,France,Female,36,7,0.00,1,0,1,42085.58,1
9998,9999,15682355,Sabbatini,772,Germany,Male,42,3,75075.31,2,1,0,92888.52,1


In [51]:
# 1.4.3 To drop duplicate rows/values

#data.drop_duplicates(keep='first',inplace=True) # dropping rows(dataframe level)
#data.drop_duplicates(subset='Surname',keep='first',inplace=True) # dropping values(column level)
#data.drop_duplicates(subset=['Surname','CustomerId'],keep='first',inplace=True) # dropping values(column level)(more than one column)


# This command drops all rows that are duplicated leaving the first rows

In [55]:
# 2. Checking if target variable is balanced
data['Exited'].value_counts()

0    7963
1    2037
Name: Exited, dtype: int64

In [62]:
# 3. Drop not needed columns, here we're dropping CustomerID,RowNumber, Surname
data.drop(columns=['RowNumber','CustomerId','Surname'],inplace=True)


KeyError: "['RowNumber', 'CustomerId', 'Surname'] not found in axis"

In [63]:
data

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.00,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.80,3,1,0,113931.57,1
3,699,France,Female,39,1,0.00,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...
9995,771,France,Male,39,5,0.00,2,1,0,96270.64,0
9996,516,France,Male,35,10,57369.61,1,1,1,101699.77,0
9997,709,France,Female,36,7,0.00,1,0,1,42085.58,1
9998,772,Germany,Male,42,3,75075.31,2,1,0,92888.52,1


In [75]:
# 4. Converting Categorical Varaibales into Numerical Variables

data = pd.get_dummies(data,columns=['Geography','Gender'],drop_first=True)

## 2. Split Dataset into Train, Test sets

In [65]:
# Splitting Dataset into Train, Test sets

In [76]:
X = data.drop('Exited',axis=1) # X = data.drop(columns=['Exited'])
y = data['Exited']


In [77]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=123)

In [78]:
X_train.shape

(8000, 11)

In [79]:
X_test.shape

(2000, 11)

## 3. Feature Scaling

In [80]:
# Feature Scaling -> Scaling to Normalize the independent variables


scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


## 4. Building ANN

In [83]:
# Our ANN model's architecture contains 1 input layer, 1 output layer, 1 hidden layers.

In [86]:
model = Sequential()

model.add(Dense(3,activation='sigmoid',input_dim = 11)) #input_dim is 11 because we've 11 features -> Input layer
model.add(Dense(1,activation='sigmoid')) # This is output layer.


In [87]:
# to get the summary of the model

model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_4 (Dense)             (None, 3)                 36        
                                                                 
 dense_5 (Dense)             (None, 1)                 4         
                                                                 
Total params: 40
Trainable params: 40
Non-trainable params: 0
_________________________________________________________________


### 4.1 Model Compilation

In [104]:
#Since we're solving a binary classification problem, loss function is binary_crossentropy

model.compile(loss='binary_crossentropy',optimizer='Adam',metrics=['accuracy']) 

### 4.2 Fitting data

In [106]:
history = model.fit(X_train_scaled,y_train,epochs=10,validation_split=0.2)

Epoch 1/10


2022-12-22 03:30:45.318210: W tensorflow/core/framework/op_kernel.cc:1830] OP_REQUIRES failed at xla_ops.cc:418 : NOT_FOUND: could not find registered platform with id: 0x14cbe6300
2022-12-22 03:30:45.318263: W tensorflow/core/framework/op_kernel.cc:1830] OP_REQUIRES failed at xla_ops.cc:418 : NOT_FOUND: could not find registered platform with id: 0x14cbe6300
2022-12-22 03:30:45.320894: W tensorflow/core/framework/op_kernel.cc:1830] OP_REQUIRES failed at xla_ops.cc:418 : NOT_FOUND: could not find registered platform with id: 0x14cbe6300
2022-12-22 03:30:45.320941: W tensorflow/core/framework/op_kernel.cc:1830] OP_REQUIRES failed at xla_ops.cc:418 : NOT_FOUND: could not find registered platform with id: 0x14cbe6300


NotFoundError: Graph execution error:

Detected at node 'StatefulPartitionedCall_2' defined at (most recent call last):
    File "/Users/geetha/DeepLearning/DLProjects/env/lib/python3.10/runpy.py", line 196, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "/Users/geetha/DeepLearning/DLProjects/env/lib/python3.10/runpy.py", line 86, in _run_code
      exec(code, run_globals)
    File "/Users/geetha/DeepLearning/DLProjects/env/lib/python3.10/site-packages/ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "/Users/geetha/DeepLearning/DLProjects/env/lib/python3.10/site-packages/traitlets/config/application.py", line 1041, in launch_instance
      app.start()
    File "/Users/geetha/DeepLearning/DLProjects/env/lib/python3.10/site-packages/ipykernel/kernelapp.py", line 711, in start
      self.io_loop.start()
    File "/Users/geetha/DeepLearning/DLProjects/env/lib/python3.10/site-packages/tornado/platform/asyncio.py", line 215, in start
      self.asyncio_loop.run_forever()
    File "/Users/geetha/DeepLearning/DLProjects/env/lib/python3.10/asyncio/base_events.py", line 603, in run_forever
      self._run_once()
    File "/Users/geetha/DeepLearning/DLProjects/env/lib/python3.10/asyncio/base_events.py", line 1899, in _run_once
      handle._run()
    File "/Users/geetha/DeepLearning/DLProjects/env/lib/python3.10/asyncio/events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "/Users/geetha/DeepLearning/DLProjects/env/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 510, in dispatch_queue
      await self.process_one()
    File "/Users/geetha/DeepLearning/DLProjects/env/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 499, in process_one
      await dispatch(*args)
    File "/Users/geetha/DeepLearning/DLProjects/env/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 406, in dispatch_shell
      await result
    File "/Users/geetha/DeepLearning/DLProjects/env/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 729, in execute_request
      reply_content = await reply_content
    File "/Users/geetha/DeepLearning/DLProjects/env/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 411, in do_execute
      res = shell.run_cell(
    File "/Users/geetha/DeepLearning/DLProjects/env/lib/python3.10/site-packages/ipykernel/zmqshell.py", line 530, in run_cell
      return super().run_cell(*args, **kwargs)
    File "/Users/geetha/DeepLearning/DLProjects/env/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 2940, in run_cell
      result = self._run_cell(
    File "/Users/geetha/DeepLearning/DLProjects/env/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 2995, in _run_cell
      return runner(coro)
    File "/Users/geetha/DeepLearning/DLProjects/env/lib/python3.10/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "/Users/geetha/DeepLearning/DLProjects/env/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3194, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "/Users/geetha/DeepLearning/DLProjects/env/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3373, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "/Users/geetha/DeepLearning/DLProjects/env/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3433, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "/var/folders/jn/qb0sg3lx5zl7vk985j5p8k_w0000gn/T/ipykernel_6403/2240118457.py", line 1, in <module>
      model.fit(X_train_scaled,y_train,epochs=10,validation_split=0.2)
    File "/Users/geetha/DeepLearning/DLProjects/env/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/Users/geetha/DeepLearning/DLProjects/env/lib/python3.10/site-packages/keras/engine/training.py", line 1650, in fit
      tmp_logs = self.train_function(iterator)
    File "/Users/geetha/DeepLearning/DLProjects/env/lib/python3.10/site-packages/keras/engine/training.py", line 1249, in train_function
      return step_function(self, iterator)
    File "/Users/geetha/DeepLearning/DLProjects/env/lib/python3.10/site-packages/keras/engine/training.py", line 1233, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/Users/geetha/DeepLearning/DLProjects/env/lib/python3.10/site-packages/keras/engine/training.py", line 1222, in run_step
      outputs = model.train_step(data)
    File "/Users/geetha/DeepLearning/DLProjects/env/lib/python3.10/site-packages/keras/engine/training.py", line 1027, in train_step
      self.optimizer.minimize(loss, self.trainable_variables, tape=tape)
    File "/Users/geetha/DeepLearning/DLProjects/env/lib/python3.10/site-packages/keras/optimizers/optimizer_experimental/optimizer.py", line 527, in minimize
      self.apply_gradients(grads_and_vars)
    File "/Users/geetha/DeepLearning/DLProjects/env/lib/python3.10/site-packages/keras/optimizers/optimizer_experimental/optimizer.py", line 1140, in apply_gradients
      return super().apply_gradients(grads_and_vars, name=name)
    File "/Users/geetha/DeepLearning/DLProjects/env/lib/python3.10/site-packages/keras/optimizers/optimizer_experimental/optimizer.py", line 634, in apply_gradients
      iteration = self._internal_apply_gradients(grads_and_vars)
    File "/Users/geetha/DeepLearning/DLProjects/env/lib/python3.10/site-packages/keras/optimizers/optimizer_experimental/optimizer.py", line 1166, in _internal_apply_gradients
      return tf.__internal__.distribute.interim.maybe_merge_call(
    File "/Users/geetha/DeepLearning/DLProjects/env/lib/python3.10/site-packages/keras/optimizers/optimizer_experimental/optimizer.py", line 1216, in _distributed_apply_gradients_fn
      distribution.extended.update(
    File "/Users/geetha/DeepLearning/DLProjects/env/lib/python3.10/site-packages/keras/optimizers/optimizer_experimental/optimizer.py", line 1211, in apply_grad_to_update_var
      return self._update_step_xla(grad, var, id(self._var_key(var)))
Node: 'StatefulPartitionedCall_2'
could not find registered platform with id: 0x14cbe6300
	 [[{{node StatefulPartitionedCall_2}}]] [Op:__inference_train_function_3457]

In [97]:
# to get weights

model.layers[0].get_weights()

[array([[-0.36928815],
        [ 0.16463912],
        [-0.69425446]], dtype=float32),
 array([0.], dtype=float32)]

In [100]:
# to predict

y_log = model.predict(X_test_scaled)



In [101]:
# to convert predicted values to 0 or 1

y_pred = np.where(y_log > 0.5,1,0)

### 4.3 Accuracy

In [103]:
accuracy_score(y_test,y_pred)

0.793

### 4.4 Plot History

In [107]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_accuracy'])


NameError: name 'history' is not defined