## Customer Churn Prediction

#### mounting google drive

In [1]:
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


#### Importing Libraries

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

#### Loading Dataset

In [3]:
df = pd.read_csv('/content/drive/MyDrive/Churn_Modelling.csv')

#### First view

In [4]:
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


#### Number of rows and columns

In [5]:
df.shape

(10000, 14)

#### Handling Null Values

In [6]:
df.isnull().sum()

RowNumber          0
CustomerId         0
Surname            0
CreditScore        0
Geography          0
Gender             0
Age                0
Tenure             0
Balance            0
NumOfProducts      0
HasCrCard          0
IsActiveMember     0
EstimatedSalary    0
Exited             0
dtype: int64

#### Duplicate values

In [7]:
df.duplicated().sum()

0

#### Feature selection

In [8]:
df=df.drop(['RowNumber','Surname','Geography'],axis=1)

In [9]:
df.head()

Unnamed: 0,CustomerId,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,15634602,619,Female,42,2,0.0,1,1,1,101348.88,1
1,15647311,608,Female,41,1,83807.86,1,0,1,112542.58,0
2,15619304,502,Female,42,8,159660.8,3,1,0,113931.57,1
3,15701354,699,Female,39,1,0.0,2,0,0,93826.63,0
4,15737888,850,Female,43,2,125510.82,1,1,1,79084.1,0


#### Categorical encoding

In [10]:
df['Gender']=df['Gender'].map({'Female':0,'Male':1})

In [11]:
df.head()

Unnamed: 0,CustomerId,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,15634602,619,0,42,2,0.0,1,1,1,101348.88,1
1,15647311,608,0,41,1,83807.86,1,0,1,112542.58,0
2,15619304,502,0,42,8,159660.8,3,1,0,113931.57,1
3,15701354,699,0,39,1,0.0,2,0,0,93826.63,0
4,15737888,850,0,43,2,125510.82,1,1,1,79084.1,0


### Correlation Heatmap

In [41]:
import plotly.express as px
corr_matrix = df.corr()

# Create the correlation heatmap
fig = px.imshow(corr_matrix,
                text_auto=True,
                aspect="auto",
                labels=dict(color="Correlation"),
                x=corr_matrix.columns,
                y=corr_matrix.columns,
                color_continuous_scale='inferno',  # Set color scale to 'inferno'
                zmin=-1, zmax=1)

fig.update_layout(title='Correlation Heatmap',
                  xaxis_nticks=36)

# Show the heatmap
fig.show()

In [12]:
df['Gender'].unique()

array([0, 1])

In [13]:
df=df.drop('CustomerId',axis=1)

#### Independent and Dependent Variable

In [14]:
from sklearn.model_selection import train_test_split

In [15]:
X=df.drop('Exited',axis=1)
y=df['Exited']

In [16]:
X.shape

(10000, 9)

In [17]:
y.shape

(10000,)

#### Splitting Data into Training and Testing

In [18]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.25,random_state=240)

In [19]:
print('x train shape',X_train.shape)
print('x test shape',X_test.shape)
print('y train shape',y_train.shape)
print('y test shape',y_test.shape)

x train shape (7500, 9)
x test shape (2500, 9)
y train shape (7500,)
y test shape (2500,)


In [20]:
from sklearn.preprocessing import StandardScaler

#### Standardizing the data

In [21]:
scaler=StandardScaler()

In [22]:
X_train_scaled=scaler.fit_transform(X_train)
X_test_scaled=scaler.transform(X_test)

In [23]:
X_train_scaled

array([[-2.28987422, -1.0957665 , -1.1399744 , ...,  0.64759498,
         0.97056631, -1.32664916],
       [ 0.35380242, -1.0957665 ,  0.3920605 , ..., -1.54417503,
        -1.0303263 , -0.54860212],
       [ 1.23158568,  0.91260319, -0.85271786, ..., -1.54417503,
         0.97056631, -0.85963607],
       ...,
       [ 0.75655003,  0.91260319, -0.46970913, ..., -1.54417503,
        -1.0303263 ,  0.99135019],
       [-0.62724946, -1.0957665 ,  1.15807795, ...,  0.64759498,
         0.97056631,  1.57991669],
       [ 0.73589631,  0.91260319,  0.67931704, ...,  0.64759498,
         0.97056631, -0.10950967]])

#### Importing Deep learning Libraries

In [24]:
import tensorflow

In [25]:
from tensorflow import keras

In [26]:
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense

In [27]:
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import Accuracy

#### Regularization techniques to reduce overfitting

In [28]:
from tensorflow.keras.regularizers import l1

In [29]:
input_shape = (X_train_scaled.shape[1],)

#### Setting up the model

#### First hidden layer and activation function

In [30]:
model = Sequential()

# Add input layer and first hidden layer
model.add(Dense(units=128, activation='relu', input_shape=input_shape,kernel_regularizer=l1(0.001)))


#### Second Hidden layer

In [31]:
model.add(Dense(units=64, activation='relu'))

#### Output Layer

In [32]:
model.add(Dense(units=1, activation='sigmoid'))


#### Compiling the model

In [33]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 128)               1280      
                                                                 
 dense_1 (Dense)             (None, 64)                8256      
                                                                 
 dense_2 (Dense)             (None, 1)                 65        
                                                                 
Total params: 9601 (37.50 KB)
Trainable params: 9601 (37.50 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


#### Fitting training data into model

In [34]:
history = model.fit(X_train_scaled, y_train,
                    epochs=100,
                    batch_size=32,
                    validation_split=0.2)



Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [36]:
y_log=model.predict(X_test_scaled)



In [37]:
y_pred=np.where(y_log>0.5,'will churn','will not churn')

In [38]:
y_pred

array([['will not churn'],
       ['will not churn'],
       ['will not churn'],
       ...,
       ['will not churn'],
       ['will not churn'],
       ['will not churn']], dtype='<U14')

In [42]:
# Evaluate the model
test_loss, test_accuracy = model.evaluate(X_test_scaled, y_test)
print(f'Test accuracy: {test_accuracy}')

Test accuracy: 0.8492000102996826


## Here we are getting accuracy of 84.92%