In [159]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler, LabelEncoder
import pickle

In [160]:
# loading the Dataset
data=pd.read_csv("Bank Customer Churn Prediction.csv")
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 12 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   customer_id       10000 non-null  int64  
 1   credit_score      10000 non-null  int64  
 2   country           10000 non-null  object 
 3   gender            10000 non-null  object 
 4   age               10000 non-null  int64  
 5   tenure            10000 non-null  int64  
 6   balance           10000 non-null  float64
 7   products_number   10000 non-null  int64  
 8   credit_card       10000 non-null  int64  
 9   active_member     10000 non-null  int64  
 10  estimated_salary  10000 non-null  float64
 11  churn             10000 non-null  int64  
dtypes: float64(2), int64(8), object(2)
memory usage: 937.6+ KB


In [161]:
data.head()
# customerid ,surname is not important

Unnamed: 0,customer_id,credit_score,country,gender,age,tenure,balance,products_number,credit_card,active_member,estimated_salary,churn
0,15634602,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,15647311,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,15619304,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,15701354,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,15737888,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [None]:
# preprocessing the data 
# Droping the Irrelevant columns
data.drop(columns=['customer_id'],axis=1, inplace=True)
data.head()

array([1, 3, 2, 4])

In [163]:
data.info()
data_columns=data.columns
data_columns

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 11 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   credit_score      10000 non-null  int64  
 1   country           10000 non-null  object 
 2   gender            10000 non-null  object 
 3   age               10000 non-null  int64  
 4   tenure            10000 non-null  int64  
 5   balance           10000 non-null  float64
 6   products_number   10000 non-null  int64  
 7   credit_card       10000 non-null  int64  
 8   active_member     10000 non-null  int64  
 9   estimated_salary  10000 non-null  float64
 10  churn             10000 non-null  int64  
dtypes: float64(2), int64(7), object(2)
memory usage: 859.5+ KB


Index(['credit_score', 'country', 'gender', 'age', 'tenure', 'balance',
       'products_number', 'credit_card', 'active_member', 'estimated_salary',
       'churn'],
      dtype='object')

In [164]:
data['country'].dtypes

dtype('O')

In [165]:
num_features=[feature for feature in data.columns if data[feature].dtypes != 'O']
cat_features=[feature for feature in data.columns if feature not in num_features]

In [166]:
## label encoding on gender
label_encoder_gender=LabelEncoder()
data['gender']=label_encoder_gender.fit_transform(data['gender'])

In [167]:
# One Hot Encoding on country.
from sklearn.preprocessing import OneHotEncoder
oh_encoder_country=OneHotEncoder()
country_encoded=oh_encoder_country.fit_transform(data[['country']])
country_encoded

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 10000 stored elements and shape (10000, 3)>

In [168]:
oh_encoder_country.get_feature_names_out(['country'])
country=oh_encoder_country.categories_
country[0]

array(['France', 'Germany', 'Spain'], dtype=object)

In [169]:
country=pd.DataFrame(country_encoded.toarray(),columns=['France','Germany','Spain'])
data.drop(columns=['country'],axis=1,inplace=True)

In [170]:
data=data.join(country)
data.head()

Unnamed: 0,credit_score,gender,age,tenure,balance,products_number,credit_card,active_member,estimated_salary,churn,France,Germany,Spain
0,619,0,42,2,0.0,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,0,42,8,159660.8,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.0,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,0.0,0.0,1.0


In [171]:
data.to_csv('bank_Churn.csv',index=False)

In [172]:
## Saving The encoder.
with open('label_encoder_gender.pkl','wb') as file:
    pickle.dump(label_encoder_gender,file)
with open('oh_encoder_country.pkl', 'wb') as file:
    pickle.dump(oh_encoder_country,file)

In [173]:
# Saving the data file.
df=pd.read_csv('bank_Churn.csv')
df.head()

Unnamed: 0,credit_score,gender,age,tenure,balance,products_number,credit_card,active_member,estimated_salary,churn,France,Germany,Spain
0,619,0,42,2,0.0,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,0,42,8,159660.8,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.0,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,0.0,0.0,1.0


In [174]:
# dividing the dataset into independent and dependent features
# independent features
x=data.drop(columns=['churn'],axis=1)
# indpendent featrue
y=data['churn']

In [175]:
# Train test split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2)
# standardization
scaler=StandardScaler()
x_train=scaler.fit_transform(x_train)
x_test=scaler.transform(x_test)

In [176]:
# saving scaler 
with open('scaler.pickle','wb') as file:
    pickle.dump(scaler,file)

In [177]:
# Everything is Ready for training our Deep Learning Model.

## ANN Implementation

In [178]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Input
from tensorflow.keras.callbacks import EarlyStopping,TensorBoard
import datetime

In [179]:
## Building ANN Model.
model=Sequential([
    Input(shape=(x_train.shape[1],)),
    Dense(64,activation='relu'), # first Hidden Layer connected with input layer.
    Dense(32,activation='relu'), # Second Hidden Layer Connected to First Hidden Layer
    Dense(1,activation='sigmoid') # output layer connected to second hidder layer.
])

In [180]:
model.summary()

In [181]:
# important parameters
opt=tf.keras.optimizers.Adam(learning_rate=0.01)
loss=tf.keras.losses.BinaryCrossentropy()
accuracy=tf.keras.metrics.Accuracy()

In [182]:
# Compile the Model.
model.compile(optimizer=opt,loss=loss,metrics=['accuracy'])

In [183]:
# Set up the tensorboard
log_dir="logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback=TensorBoard(log_dir=log_dir,histogram_freq=1)

In [184]:
# Set up Early Stopping
early_stopping_callback=EarlyStopping(monitor='val_loss',patience=15,restore_best_weights=True)

In [185]:
# Training The Model.
history=model.fit(x_train,y_train,validation_data=(x_test,y_test),epochs=100,callbacks=[tensorboard_callback,early_stopping_callback])

Epoch 1/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8095 - loss: 0.4351 - val_accuracy: 0.8570 - val_loss: 0.3478
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8561 - loss: 0.3548 - val_accuracy: 0.8660 - val_loss: 0.3450
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8583 - loss: 0.3479 - val_accuracy: 0.8665 - val_loss: 0.3387
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8606 - loss: 0.3415 - val_accuracy: 0.8625 - val_loss: 0.3482
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8580 - loss: 0.3450 - val_accuracy: 0.8670 - val_loss: 0.3373
Epoch 6/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8539 - loss: 0.3508 - val_accuracy: 0.8690 - val_loss: 0.3329
Epoch 7/100
[1m250/25

In [186]:
model.evaluate(x_test,y_test)

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8774 - loss: 0.3083


[0.32722723484039307, 0.8690000176429749]

In [187]:
model.save('model.h5')



In [188]:
## Load Tensorboard Extension.
%reload_ext tensorboard