In [24]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [25]:
df = pd.read_csv("../../../Data/Churn_Modelling.csv")
df.drop(columns=['RowNumber','CustomerId','Surname'],inplace=True)
df

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.00,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.80,3,1,0,113931.57,1
3,699,France,Female,39,1,0.00,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...
9995,771,France,Male,39,5,0.00,2,1,0,96270.64,0
9996,516,France,Male,35,10,57369.61,1,1,1,101699.77,0
9997,709,France,Female,36,7,0.00,1,0,1,42085.58,1
9998,772,Germany,Male,42,3,75075.31,2,1,0,92888.52,1


In [26]:
df.isnull().mean() * 100

CreditScore        0.0
Geography          0.0
Gender             0.0
Age                0.0
Tenure             0.0
Balance            0.0
NumOfProducts      0.0
HasCrCard          0.0
IsActiveMember     0.0
EstimatedSalary    0.0
Exited             0.0
dtype: float64

In [27]:
df['Geography'].value_counts()

Geography
France     5014
Germany    2509
Spain      2477
Name: count, dtype: int64

# 1. Data Preprocessing

In [28]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer

In [29]:

# Define numerical and categorical features
numerical_features = ['CreditScore', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'EstimatedSalary']
categorical_features = ['Geography', 'Gender']

In [30]:

# Pipelines
scaling_pipe = Pipeline([
    ("scaler", StandardScaler())
])

ohe_pipe = Pipeline([
    ("ohe", OneHotEncoder())
])

In [31]:

# Preprocessor
preprocessor = ColumnTransformer(transformers=[
    ("scaling", scaling_pipe, numerical_features),
    ("ohe", ohe_pipe, categorical_features)
], remainder='passthrough')


In [32]:
X, y = df.iloc[:,:-1],df.iloc[:,-1]
X

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,619,France,Female,42,2,0.00,1,1,1,101348.88
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58
2,502,France,Female,42,8,159660.80,3,1,0,113931.57
3,699,France,Female,39,1,0.00,2,0,0,93826.63
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.10
...,...,...,...,...,...,...,...,...,...,...
9995,771,France,Male,39,5,0.00,2,1,0,96270.64
9996,516,France,Male,35,10,57369.61,1,1,1,101699.77
9997,709,France,Female,36,7,0.00,1,0,1,42085.58
9998,772,Germany,Male,42,3,75075.31,2,1,0,92888.52


In [33]:
X = preprocessor.fit_transform(X)
X

array([[-0.32622142,  0.29351742, -1.04175968, ...,  0.        ,
         1.        ,  1.        ],
       [-0.44003595,  0.19816383, -1.38753759, ...,  0.        ,
         0.        ,  1.        ],
       [-1.53679418,  0.29351742,  1.03290776, ...,  0.        ,
         1.        ,  0.        ],
       ...,
       [ 0.60498839, -0.27860412,  0.68712986, ...,  0.        ,
         0.        ,  1.        ],
       [ 1.25683526,  0.29351742, -0.69598177, ...,  1.        ,
         1.        ,  0.        ],
       [ 1.46377078, -1.04143285, -0.35020386, ...,  0.        ,
         1.        ,  0.        ]], shape=(10000, 13))

In [34]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2)

In [35]:
X_train = np.asarray(X_train).astype(np.float32)
X_test = np.asarray(X_test).astype(np.float32)
y_train = np.asarray(y_train).astype(np.float32)
y_test = np.asarray(y_test).astype(np.float32)

In [36]:
type(X_train)

numpy.ndarray

# 2. Dataset

In [37]:
from torch.utils.data import Dataset, DataLoader

class CustomDataset(Dataset):
    def __init__(self,features, labels):
        self.features = torch.tensor(features, dtype=torch.float32)
        self.labels = torch.tensor(labels, dtype=torch.float32)
        
    def __len__(self):
        return self.features.shape[0]
    
    def __getitem__(self, index):
        return self.features[index], self.labels[index]

In [38]:
train_dataset = CustomDataset(X_train,y_train)
test_dataset = CustomDataset(X_test,y_test)

In [39]:
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=True)

# 3. Build Model

In [40]:
class BinaryClassifier(nn.Module):
    def __init__(self, X_train):
        super().__init__()
        
        self.network = nn.Sequential(
            nn.Linear(X_train.shape[1], 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(0.4),
            
            nn.Linear(64, 32),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.Dropout(0.4),
            
            nn.Linear(32, 1),
            nn.Sigmoid()
        )
        
    def forward(self,x):
        return self.network(x)

# 4. Train Model

In [41]:
epochs = 50
learning_rate = 0.01

In [42]:
model = BinaryClassifier(X_train)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

binaryCrossEntropy = nn.BCELoss()

optimizer = torch.optim.Adam(model.parameters(),lr=learning_rate,weight_decay=1e-4)

In [43]:
for epoch in range(epochs):
    epoch_loss = 0
    
    for batch_features, batch_labels in train_dataloader:
        batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)
        
        y_pred = model(batch_features)
        
        loss = binaryCrossEntropy(y_pred, batch_labels.reshape(32,1))
        
        optimizer.zero_grad()
        
        loss.backward()
        
        optimizer.step()
        
        epoch_loss += loss.item()
        
    print(f"Epoch {epoch+1}: Loss {epoch_loss/len(train_dataloader)}")

Epoch 1: Loss 0.4490091764330864
Epoch 2: Loss 0.40928164702653885
Epoch 3: Loss 0.393905785381794
Epoch 4: Loss 0.38843509274721144
Epoch 5: Loss 0.38554037004709246
Epoch 6: Loss 0.3900749136209488
Epoch 7: Loss 0.38406259459257125
Epoch 8: Loss 0.3865521128773689
Epoch 9: Loss 0.37991738539934156
Epoch 10: Loss 0.38038010501861574
Epoch 11: Loss 0.3782364728450775
Epoch 12: Loss 0.38140191447734834
Epoch 13: Loss 0.38003012084960935
Epoch 14: Loss 0.3796283766627312
Epoch 15: Loss 0.37858306473493575
Epoch 16: Loss 0.3790110367536545
Epoch 17: Loss 0.37679072642326356
Epoch 18: Loss 0.37814619463682175
Epoch 19: Loss 0.3778676562309265
Epoch 20: Loss 0.3767044889330864
Epoch 21: Loss 0.3793074035048485
Epoch 22: Loss 0.3761103297472
Epoch 23: Loss 0.37555671590566636
Epoch 24: Loss 0.37859697711467744
Epoch 25: Loss 0.38203589063882826
Epoch 26: Loss 0.3759334192276001
Epoch 27: Loss 0.3743606223464012
Epoch 28: Loss 0.3724740027785301
Epoch 29: Loss 0.3776696724891663
Epoch 30: Los

# 5. Evaluation

In [44]:
model.eval()

BinaryClassifier(
  (network): Sequential(
    (0): Linear(in_features=13, out_features=64, bias=True)
    (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.4, inplace=False)
    (4): Linear(in_features=64, out_features=32, bias=True)
    (5): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): Dropout(p=0.4, inplace=False)
    (8): Linear(in_features=32, out_features=1, bias=True)
    (9): Sigmoid()
  )
)

In [45]:
correct = 0
total = 0
with torch.no_grad():
    for batch_features, batch_labels in test_dataloader:
        batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)
        
        y_pred = model(batch_features)
        _, prediction = torch.max(y_pred, 1)
        
        correct += (prediction == batch_labels).float().sum().item()
        total += batch_features.shape[0]
    print("Testing Accuracy: ",correct/total)

Testing Accuracy:  0.8065


In [46]:
correct = 0
total = 0
with torch.no_grad():
    for batch_features, batch_labels in train_dataloader:
        batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)
        
        y_pred = model(batch_features)
        _, prediction = torch.max(y_pred, 1)
        
        correct += (prediction == batch_labels).float().sum().item()
        total += batch_features.shape[0]
    print("Training Accuracy: ",correct/total)

Training Accuracy:  0.79375
