# ****Customer Churn Prediction Using Naural Networks****

## ****Importing Libraries****

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential

2026-01-18 11:35:03.998714: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1768736104.206458      24 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1768736104.258458      24 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1768736104.714205      24 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1768736104.714246      24 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1768736104.714249      24 computation_placer.cc:177] computation placer alr

In [2]:
import warnings
warnings.filterwarnings('ignore')

## ****Import Dataframe****

In [3]:
df = pd.read_csv(r'/kaggle/input/bank-customer-churn-modeling/Churn_Modelling.csv')
df = df.drop(columns = ['RowNumber', 'CustomerId', 'Surname'], axis = 1)
df.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [4]:
df.columns = df.columns.astype(str).str.strip().str.lower()
df.columns
df.head(3)

Unnamed: 0,creditscore,geography,gender,age,tenure,balance,numofproducts,hascrcard,isactivemember,estimatedsalary,exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1


## ****Exploratory Data Analysis****

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 11 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   creditscore      10000 non-null  int64  
 1   geography        10000 non-null  object 
 2   gender           10000 non-null  object 
 3   age              10000 non-null  int64  
 4   tenure           10000 non-null  int64  
 5   balance          10000 non-null  float64
 6   numofproducts    10000 non-null  int64  
 7   hascrcard        10000 non-null  int64  
 8   isactivemember   10000 non-null  int64  
 9   estimatedsalary  10000 non-null  float64
 10  exited           10000 non-null  int64  
dtypes: float64(2), int64(7), object(2)
memory usage: 859.5+ KB


In [6]:
df.isnull().sum()

creditscore        0
geography          0
gender             0
age                0
tenure             0
balance            0
numofproducts      0
hascrcard          0
isactivemember     0
estimatedsalary    0
exited             0
dtype: int64

In [7]:
df.describe()

Unnamed: 0,creditscore,age,tenure,balance,numofproducts,hascrcard,isactivemember,estimatedsalary,exited
count,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0
mean,650.5288,38.9218,5.0128,76485.889288,1.5302,0.7055,0.5151,100090.239881,0.2037
std,96.653299,10.487806,2.892174,62397.405202,0.581654,0.45584,0.499797,57510.492818,0.402769
min,350.0,18.0,0.0,0.0,1.0,0.0,0.0,11.58,0.0
25%,584.0,32.0,3.0,0.0,1.0,0.0,0.0,51002.11,0.0
50%,652.0,37.0,5.0,97198.54,1.0,1.0,1.0,100193.915,0.0
75%,718.0,44.0,7.0,127644.24,2.0,1.0,1.0,149388.2475,0.0
max,850.0,92.0,10.0,250898.09,4.0,1.0,1.0,199992.48,1.0


In [8]:
df.describe(include = ['object', 'category'])

Unnamed: 0,geography,gender
count,10000,10000
unique,3,2
top,France,Male
freq,5014,5457


## ****Divide Data In training and Testing Data****

In [9]:
input_df = df.iloc[:, : -1]
output_df = df['exited']

## ****OneHotEncoding Using ColumnTransformer****

In [10]:
trf = ColumnTransformer([
    ('OneHotEncode_geography', OneHotEncoder(handle_unknown = 'ignore', sparse_output = False), [1]),
    ('OneHotEncode_gender', OneHotEncoder(handle_unknown = 'ignore', sparse_output = False, drop = 'first'), [2])
], remainder = 'passthrough')


input_df = pd.DataFrame(trf.fit_transform(input_df))


In [11]:
input_df.columns = trf.get_feature_names_out().tolist()
input_df.rename(columns = {'OneHotEncode_geography__geography_France' : 'from_france', 'OneHotEncode_geography__geography_Germany' : 'from_germany',
                 'OneHotEncode_geography__geography_Spain' : 'from_spain', 'OneHotEncode_gender__gender_Male' : 'male',
                 'remainder__creditscore' : 'creditscore', 'remainder__age' : 'age', 'remainder__tenure' : 'tenure', 'remainder__balance' : 'balance',
                 'remainder__numofproducts' : 'numofproducts', 'remainder__hascrcard' : 'hascrcard', 'remainder__isactivemember' : 'isactivemember', 
                 'remainder__estimatedsalary' : 'estimatedsalary'}, inplace = True)
input_df.head()

Unnamed: 0,from_france,from_germany,from_spain,male,creditscore,age,tenure,balance,numofproducts,hascrcard,isactivemember,estimatedsalary
0,1.0,0.0,0.0,0.0,619.0,42.0,2.0,0.0,1.0,1.0,1.0,101348.88
1,0.0,0.0,1.0,0.0,608.0,41.0,1.0,83807.86,1.0,0.0,1.0,112542.58
2,1.0,0.0,0.0,0.0,502.0,42.0,8.0,159660.8,3.0,1.0,0.0,113931.57
3,1.0,0.0,0.0,0.0,699.0,39.0,1.0,0.0,2.0,0.0,0.0,93826.63
4,0.0,0.0,1.0,0.0,850.0,43.0,2.0,125510.82,1.0,1.0,1.0,79084.1


## ****Differentiating into numerical and categorical columns****

In [12]:
num_col = input_df[['creditscore', 'age', 'tenure', 'balance', 'numofproducts', 'estimatedsalary']]
display(num_col.head(2))

cat_col = input_df[['from_france', 'from_germany', 'from_spain', 'male', 'hascrcard', 'isactivemember']]
display(cat_col.head(2))

Unnamed: 0,creditscore,age,tenure,balance,numofproducts,estimatedsalary
0,619.0,42.0,2.0,0.0,1.0,101348.88
1,608.0,41.0,1.0,83807.86,1.0,112542.58


Unnamed: 0,from_france,from_germany,from_spain,male,hascrcard,isactivemember
0,1.0,0.0,0.0,0.0,1.0,1.0
1,0.0,0.0,1.0,0.0,0.0,1.0


## ****Apply Standard Scalar Using Columntransformer****

In [13]:
trf1 = ColumnTransformer([
    ('StandardScalar', StandardScaler(), num_col.columns)
], remainder = 'passthrough')


num_col = pd.DataFrame(trf1.fit_transform(num_col), columns = num_col.columns)
num_col

Unnamed: 0,creditscore,age,tenure,balance,numofproducts,estimatedsalary
0,-0.326221,0.293517,-1.041760,-1.225848,-0.911583,0.021886
1,-0.440036,0.198164,-1.387538,0.117350,-0.911583,0.216534
2,-1.536794,0.293517,1.032908,1.333053,2.527057,0.240687
3,0.501521,0.007457,-1.387538,-1.225848,0.807737,-0.108918
4,2.063884,0.388871,-1.041760,0.785728,-0.911583,-0.365276
...,...,...,...,...,...,...
9995,1.246488,0.007457,-0.004426,-1.225848,0.807737,-0.066419
9996,-1.391939,-0.373958,1.724464,-0.306379,-0.911583,0.027988
9997,0.604988,-0.278604,0.687130,-1.225848,-0.911583,-1.008643
9998,1.256835,0.293517,-0.695982,-0.022608,0.807737,-0.125231


In [14]:
input_df = pd.concat([cat_col, num_col], axis = 1)

display(input_df.head())

display(input_df.shape)


Unnamed: 0,from_france,from_germany,from_spain,male,hascrcard,isactivemember,creditscore,age,tenure,balance,numofproducts,estimatedsalary
0,1.0,0.0,0.0,0.0,1.0,1.0,-0.326221,0.293517,-1.04176,-1.225848,-0.911583,0.021886
1,0.0,0.0,1.0,0.0,0.0,1.0,-0.440036,0.198164,-1.387538,0.11735,-0.911583,0.216534
2,1.0,0.0,0.0,0.0,1.0,0.0,-1.536794,0.293517,1.032908,1.333053,2.527057,0.240687
3,1.0,0.0,0.0,0.0,0.0,0.0,0.501521,0.007457,-1.387538,-1.225848,0.807737,-0.108918
4,0.0,0.0,1.0,0.0,1.0,1.0,2.063884,0.388871,-1.04176,0.785728,-0.911583,-0.365276


(10000, 12)

## ****Train Test Split****

In [15]:
X_train, X_test, y_train, y_test = train_test_split(input_df, output_df, test_size = 0.2, random_state = 42)

In [16]:
X_train.shape

(8000, 12)

## ****Applying Neural Network****

In [17]:
ann = Sequential()

In [18]:
ann.add(Dense(10, input_dim = 12, activation = 'relu'))
ann.add(Dense(6, activation = 'relu'))
ann.add(Dense(3, activation = 'relu'))
ann.add(Dense(1, activation = 'sigmoid'))

I0000 00:00:1768736116.798639      24 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13942 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5
I0000 00:00:1768736116.799301      24 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 13942 MB memory:  -> device: 1, name: Tesla T4, pci bus id: 0000:00:05.0, compute capability: 7.5


In [19]:
ann.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

In [20]:
ann.fit(X_train, y_train, batch_size = 100, epochs = 50)

Epoch 1/50


I0000 00:00:1768736119.198962      68 service.cc:152] XLA service 0x7a6094005900 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1768736119.198996      68 service.cc:160]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
I0000 00:00:1768736119.199000      68 service.cc:160]   StreamExecutor device (1): Tesla T4, Compute Capability 7.5
I0000 00:00:1768736119.526987      68 cuda_dnn.cc:529] Loaded cuDNN version 91002


[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.4375 - loss: 0.7318
Epoch 2/50


I0000 00:00:1768736120.577216      68 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7739 - loss: 0.6688
Epoch 3/50
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7922 - loss: 0.6478
Epoch 4/50
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7950 - loss: 0.6299
Epoch 5/50
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7992 - loss: 0.6120
Epoch 6/50
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7936 - loss: 0.5988
Epoch 7/50
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8071 - loss: 0.5799
Epoch 8/50
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8147 - loss: 0.5650
Epoch 9/50
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8194 - loss: 0.5501
Epoch 10/50
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [

<keras.src.callbacks.history.History at 0x7a6160f988f0>

In [21]:
y_pred = ann.predict(X_test)
pred = []
for i in y_pred:
    if i[0] > 0.5:
        pred.append(1)
    else:
        pred.append(0)

y_pred = pd.Series(pred)

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step


In [22]:
y_pred_train = ann.predict(X_train)
pred_train = []
for i in y_pred_train:
    if i[0] > 0.5:
        pred_train.append(1)
    else:
        pred_train.append(0)

y_pred_train = pd.Series(pred_train)

[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step


## ****Accuracy Score****

In [23]:
# Accuracy Score of Test Data

print(np.round(accuracy_score(y_test, y_pred) * 100, 2))

86.5


In [24]:
# Accuracy Of train Data
print(np.round(accuracy_score(y_train, y_pred_train) * 100, 2))

86.21
