## Model Building using Pytorch

In [75]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from torch.utils.data import DataLoader, TensorDataset

In [None]:
X_train = pd.read_csv('../data/processed/X_train_processed.csv')
X_test = pd.read_csv('../data/processed/X_test_processed.csv')
y_train = pd.read_csv('../data/processed/y_train.csv')
y_test = pd.read_csv('../data/processed/y_test.csv')

In [34]:
len(y_test[y_test['churn'] == 1])

407

In [None]:
# Convert Pandas to PyTorch Tensors
# We use .values to get the numpy array, then convert to float32
X_train_ts = torch.tensor(X_train.values.astype('float32'))
y_train_ts = torch.tensor(y_train.values.astype('float32'))
X_test_ts = torch.tensor(X_test.values.astype('float32'))
y_test_ts = torch.tensor(y_test.values.astype('float32'))

In [53]:
X_train_ts

tensor([[ 1.0586,  1.0000,  1.7151,  ...,  1.0421,  0.0000,  0.0000],
        [ 0.9136,  1.0000, -0.6599,  ..., -0.6236,  1.0000,  0.0000],
        [ 1.0793,  0.0000, -0.1849,  ...,  0.3081,  1.0000,  0.0000],
        ...,
        [ 0.1682,  0.0000, -0.1849,  ..., -0.5581,  0.0000,  0.0000],
        [ 0.3753,  1.0000, -0.3749,  ..., -1.3515,  0.0000,  1.0000],
        [ 1.5659,  1.0000,  1.1451,  ..., -1.0269,  0.0000,  1.0000]])

In [None]:
# Create DataLoaders
train_ds = TensorDataset(X_train_ts, y_train_ts)
train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)

In [None]:
class ChurnANN(nn.Module):
    def __init__(self, input_size):
        super().__init__()
        # Input Layer -> Hidden Layer 1 (64 neurons)
        self.fc1 = nn.Linear(input_size, 64)
        # Hidden Layer 1 -> Hidden Layer 2 (32 neurons)
        self.fc2 = nn.Linear(64, 32)
        # Hidden Layer 2 -> Output (1 neuron for probability)
        self.fc3 = nn.Linear(32, 1)
        
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.2) # Prevents overfitting

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

model = ChurnANN(X_train.shape[1])

In [56]:
print(model.fc1.weight.requires_grad) 

True


In [None]:
# Calculate weight (approx 1593 / 407 â‰ˆ 3.9)
# This tells the model to care ~4x more about getting class 1 right.
weight = torch.tensor([4.0]) 

# Note: Using BCEWithLogitsLoss is more numerically stable than BCELoss + Sigmoid
criterion = nn.BCEWithLogitsLoss(pos_weight=weight)

optimizer = optim.Adam(model.parameters(), lr=0.001)

epochs = 100
for epoch in range(epochs):
    model.train() # Tells PyTorch to "turn on" Dropout. It will randomly zero out some neurons to force the network to learn more robust patterns and prevent overfitting.
    for batch_X, batch_y in train_loader:
        optimizer.zero_grad()
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()
    
    if (epoch + 1) % 20 == 0:
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}")

Epoch [20/100], Loss: 0.7294
Epoch [40/100], Loss: 0.6904
Epoch [60/100], Loss: 0.6371
Epoch [80/100], Loss: 0.6397
Epoch [100/100], Loss: 0.6946


In [None]:
from sklearn.metrics import classification_report

model.eval() # Tells PyTorch to "turn off" Dropout. During testing or prediction, you want the model to use 100% of its learned power, not a randomized version of it.
with torch.no_grad():
    logits = model(X_test_ts)
    # Convert probabilities to 0 or 1
    y_pred_tags = (logits > 0).float() # Since we used BCEWithLogitsLoss, the output is logits. A logit of 0 corresponds to a probability of 0.5.
    
print(classification_report(y_test, y_pred_tags))

              precision    recall  f1-score   support

           0       0.93      0.77      0.84      1593
           1       0.46      0.77      0.58       407

    accuracy                           0.77      2000
   macro avg       0.69      0.77      0.71      2000
weighted avg       0.83      0.77      0.79      2000



This NN model provided a better Recall than the SVC model.

## Test on New Data Point

In [60]:
import pickle

with open('../models/scaler.pkl', 'rb') as f:
    loaded_scaler = pickle.load(f)

In [61]:
new_customer = {
    "credit_score": 300,
    "gender": 1,     # Male
    "age": 42,
    "tenure": 5,
    "balance": 75000,
    "products_number": 2,
    "credit_card": 1,
    "active_member" : 0,
    "estimated_salary": 45000,
    "country_Germany": 1,
    "country_Spain": 0
}

In [62]:
new_df = pd.DataFrame([new_customer])

num_features = ['credit_score', 'age', 'tenure', 'balance',
                'products_number', 'estimated_salary']

In [63]:
new_df[num_features] = loaded_scaler.transform(new_df[num_features])

In [65]:
new_customer_tensor = torch.tensor(new_df.values.astype('float32'))

In [71]:
model.eval() # Set to evaluation mode
with torch.no_grad():
    # Get raw logit
    logit = model(new_customer_tensor)
    print(logit)
    # Convert logit to probability (0 to 1)
    probability = torch.sigmoid(logit).item()
    
    # Determine class based on threshold 0 (or 0.5 for probability)
    prediction = 1 if logit.item() > 0 else 0

print(f"Churn Probability: {probability:.2%}")
print(f"Prediction: {prediction} {'Churn' if prediction == 1 else 'Stay'}")

tensor([[-0.6734]])
Churn Probability: 33.77%
Prediction: 0 Stay
