In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [4]:
path = "/content/Churn_Modelling.csv"

df = pd.read_csv(path)
df

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.00,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.80,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.00,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9996,15606229,Obijiaku,771,France,Male,39,5,0.00,2,1,0,96270.64,0
9996,9997,15569892,Johnstone,516,France,Male,35,10,57369.61,1,1,1,101699.77,0
9997,9998,15584532,Liu,709,France,Female,36,7,0.00,1,0,1,42085.58,1
9998,9999,15682355,Sabbatini,772,Germany,Male,42,3,75075.31,2,1,0,92888.52,1


In [None]:
# We are selecting the columsn which is needed for the new data frame
selected_columns = ['CreditScore', 'Geography', 'Gender', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard', 'IsActiveMember', 'EstimatedSalary']
eda_df = df.loc[:, selected_columns]

In [None]:
# Display the new data frame
eda_df

In [None]:
# Group by 'Gender' and calculate the percentages
exit_percentages = df.groupby('Gender')['Exited'].value_counts(normalize=True).unstack() * 100
# unstack here does the job of getting the data in a cooumn to a header of the output.

# Display the percentages
print("Percentage of exits and non-exits by gender:")
print(exit_percentages)

Percentage of exits and non-exits by gender:
Exited          0          1
Gender                      
Female  74.928461  25.071539
Male    83.544072  16.455928


In [5]:
x = df.iloc[:,3:-1].values
y = df.iloc[:,-1].values

In [6]:
# Categorical data - Gender - Label Encode
# Have to apply on column index 2

from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
x[:, 2] = le.fit_transform(x[:, 2])

In [7]:
X = df.iloc[:, 3:-1].values
y = df.iloc[:, -1].values

#**Categorical Data - Gender - Label Encoder**

In [8]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
X[:, 2] = le.fit_transform(X[:, 2])

#**Categorical Data - Geography - OneHotEncoder**

In [9]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [1])], remainder='passthrough')
X = np.array(ct.fit_transform(X))

#**Train and Test Data**

In [10]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

#**Feature Selection**

In [11]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

#**Part II: Artificial Neural Network**

In [12]:
import tensorflow as tf

In [14]:
tf.__version__

'2.13.0'

#**Building ANN**

#**Initializing ANN**

In [15]:
# sets up a foundation for creating a neural network model using TensorFlow.
# You can then add layers, configure the model architecture, compile the model,
# and train it for a specific task
ann = tf.keras.models.Sequential()

#**Adding first hidden layer**

In [16]:
# ann.add(): This function is used to add a layer to the Sequential model (ann in this case).

# tf.keras.layers.Dense(): This line adds a fully connected layer (Dense layer) to the model.

# units=6: Specifies that this layer will have 6 units.
# In a Dense layer, each unit (or neuron) is connected to each neuron in the previous and the next layers.

# activation='relu': Defines the activation function for this layer, which is Rectified Linear Unit (ReLU).
# ReLU is a common activation function used in hidden layers of neural networks,
# helping introduce non-linearity.

ann.add(tf.keras.layers.Dense(units=6, activation='relu'))
# RELU FUNCTION GOES FROM 0-1
# lINEAR IN NATURE
# Adding a Dense hidden layer with 6 units and ReLU activation

#**Adding the second hidden layer**

In [17]:
ann.add(tf.keras.layers.Dense(units=6, activation='relu'))

#**Adding the third hidden layer**

In [18]:

# ann.add(): This function is used to add a layer to the Sequential model (ann in this case).

# tf.keras.layers.Dense(): This line adds a fully connected layer (Dense layer) to the model.

# units=1: Specifies that this layer will have 1 unit.
# In this case, since it's the output layer, a single unit is used to represent the output.

# activation='sigmoid': Defines the activation function for this layer,
#  which is the sigmoid activation function.
#  Sigmoid is commonly used in the output layer of a binary classification model to produce
#  probabilities that sum to 1. It's suitable for binary classification problems.
ann.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

#**Compile ANN**

In [19]:
# ann.compile():
  # This function configures the model for training.

# optimizer='adam':
    # Adam is a popular optimization algorithm used for training neural networks.
    # It adapts the learning rates for each parameter, leading to faster convergence.

# loss='binary_crossentropy':
    # Binary crossentropy is the loss function used for binary classification problems.
    # It measures the difference between the true labels and the predicted probabilities
    # for each sample.

# metrics=['accuracy']:
# During training, it calculates and displays the accuracy of the model as one of the metrics.
# Accuracy is a commonly used metric for classification tasks, indicating the proportion of
# correctly classified samples.

ann.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

#**Training the ANN on the Training set**

In [20]:
# ann.fit():
#   This function trains the model using the specified training data.

# X_train:
#   The input features for training the model.

# y_train:
#   The corresponding target labels for the training data.

# batch_size=32:
#   This is the number of samples per gradient update.
#   The model's weights are updated after processing each batch of 32 samples.

# epochs=15:
#   An epoch is one pass through the entire training dataset. In this case, the model will be trained for 15 epochs, meaning it will go through the entire training dataset 15 times during the training process.

ann.fit(X_train, y_train, batch_size=32, epochs=15)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.src.callbacks.History at 0x7ca1f8eb4eb0>

#**Part II: Artificial Neural Network**

In [22]:
import tensorflow as tf

In [23]:
tf.__version__

'2.13.0'

#**Building ANN**

#**Initializing ANN**

In [24]:
ann = tf.keras.models.Sequential()

#**Adding first hidden layer**

In [25]:
ann.add(tf.keras.layers.Dense(units=6, activation='sigmoid'))

#**Adding the second hidden layer**

In [26]:
ann.add(tf.keras.layers.Dense(units=6, activation='sigmoid'))

#**Adding the third hidden layer**

In [27]:
ann.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

#**Compile ANN**

In [28]:
ann.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

#**Training the ANN on the Training set**

In [29]:
ann.fit(X_train, y_train, batch_size=32, epochs=15)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.src.callbacks.History at 0x7ca1e63d4460>