In [86]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from torch.utils.data import TensorDataset, DataLoader
le = LabelEncoder()

In [87]:
bank = pd.read_csv("Bank_Personal_Loan_Modelling.csv")
bank = bank.drop(columns=['ID', 'ZIP Code', 'Online'])
bank

Unnamed: 0,Age,Experience,Income,Family,CCAvg,Education,Mortgage,Personal Loan,Securities Account,CD Account,CreditCard
0,25,1,49,4,1.6,1,0,0,1,0,0
1,45,19,34,3,1.5,1,0,0,1,0,0
2,39,15,11,1,1.0,1,0,0,0,0,0
3,35,9,100,1,2.7,2,0,0,0,0,0
4,35,8,45,4,1.0,2,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...
4995,29,3,40,1,1.9,3,0,0,0,0,0
4996,30,4,15,4,0.4,1,85,0,0,0,0
4997,63,39,24,2,0.3,3,0,0,0,0,0
4998,65,40,49,3,0.5,2,0,0,0,0,0


# Pytorch instead of tensorflow

I've heard much better things about pythorch and I am more familiar with it from using it at a higher level through huggingface

They also support better hardware interface for example you can choose the exact device you want

In [88]:

X = bank.drop(columns=['Income'])
y = bank['Income']


X_bank_train, X_bank_test, y_bank_train, y_bank_test = train_test_split(
    X, y, test_size=0.2
)

scaler = StandardScaler()
X_bank_train = scaler.fit_transform(X_bank_train)
X_bank_test = scaler.transform(X_bank_test)


In [89]:
X_train_tensor = torch.tensor(X_bank_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_bank_train.values, dtype=torch.float32).view(-1, 1)

X_test_tensor = torch.tensor(X_bank_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_bank_test.values, dtype=torch.float32).view(-1, 1)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

batch_size = 32
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

In [90]:
import torch
import torch.nn as nn

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class IncomeRegressionModel(nn.Module):
    def __init__(self, input_size):
        super(IncomeRegressionModel, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_size, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512,512),
            nn.ReLU(),
            nn.Linear(512, 1)
        )
    
    def forward(self, x):
        return self.model(x)

input_size = X_train_tensor.shape[1]
print(f"Device: {device}")
model = IncomeRegressionModel(input_size).to(device)


Device: cuda


In [91]:
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


In [92]:
num_epochs = 100

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for inputs, targets in train_loader:
        inputs = inputs.to(device)
        targets = targets.to(device)
        
        optimizer.zero_grad()

        outputs = model(inputs)
        
        loss = criterion(outputs, targets)
        
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    epoch_loss = running_loss / len(train_loader)
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}')


Epoch [1/100], Loss: 7410.1813
Epoch [2/100], Loss: 6395.3807
Epoch [3/100], Loss: 2914.5045
Epoch [4/100], Loss: 1174.8389
Epoch [5/100], Loss: 1037.8655
Epoch [6/100], Loss: 994.7631
Epoch [7/100], Loss: 973.6826
Epoch [8/100], Loss: 955.4035
Epoch [9/100], Loss: 940.2548
Epoch [10/100], Loss: 926.2599
Epoch [11/100], Loss: 914.5617
Epoch [12/100], Loss: 904.5663
Epoch [13/100], Loss: 894.3154
Epoch [14/100], Loss: 884.5390
Epoch [15/100], Loss: 878.4860
Epoch [16/100], Loss: 869.3465
Epoch [17/100], Loss: 862.9129
Epoch [18/100], Loss: 856.8071
Epoch [19/100], Loss: 851.3903
Epoch [20/100], Loss: 847.3899
Epoch [21/100], Loss: 841.9245
Epoch [22/100], Loss: 837.7283
Epoch [23/100], Loss: 834.7691
Epoch [24/100], Loss: 830.7172
Epoch [25/100], Loss: 828.7064
Epoch [26/100], Loss: 825.4128
Epoch [27/100], Loss: 823.6163
Epoch [28/100], Loss: 820.5045
Epoch [29/100], Loss: 818.8256
Epoch [30/100], Loss: 817.1426
Epoch [31/100], Loss: 814.0615
Epoch [32/100], Loss: 811.6760
Epoch [33/10

In [93]:
model.eval()
with torch.no_grad():
    test_loss = 0.0
    predictions = []
    actuals = []
    
    for inputs, targets in test_loader:
        inputs = inputs.to(device)
        targets = targets.to(device)
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        test_loss += loss.item()
        
        predictions.append(outputs.cpu().numpy())
        actuals.append(targets.cpu().numpy())
    
    avg_test_loss = test_loss / len(test_loader)
    print(f'Test Loss (MSE): {avg_test_loss:.4f}')
    
    predictions = np.vstack(predictions)
    actuals = np.vstack(actuals)
    
    print("First 10 predicted values:")
    print(predictions[:10])

    print("\nFirst 10 actual values:")
    print(actuals[:10])


Test Loss (MSE): 818.9819
First 10 predicted values:
[[ 46.537117]
 [ 57.886566]
 [ 46.78124 ]
 [156.31871 ]
 [ 85.493965]
 [ 59.028194]
 [158.71599 ]
 [ 74.17402 ]
 [ 39.79357 ]
 [ 81.053665]]

First 10 actual values:
[[ 19.]
 [ 40.]
 [ 11.]
 [170.]
 [ 43.]
 [ 40.]
 [195.]
 [ 64.]
 [ 33.]
 [ 84.]]


In [94]:
from sklearn.metrics import mean_absolute_error, r2_score
import numpy as np

predictions = np.vstack(predictions)
actuals = np.vstack(actuals)

mae = mean_absolute_error(actuals, predictions)
r2 = r2_score(actuals, predictions)

print(f'Test MAE: {mae:.4f}')
print(f'Test R² Score: {r2:.4f}')


Test MAE: 22.3456
Test R² Score: 0.6369


In [192]:
def predict_income(age, experience, family, ccavg, education, mortgage, personal_loan, securities_account, cd_account, credit_card):

    input_data = pd.DataFrame({
        'Age': [age],
        'Experience': [experience],
        'Family': [family],
        'CCAvg': [ccavg],
        'Education': [education],
        'Mortgage': [mortgage],
        'Personal Loan': [personal_loan],
        'Securities Account': [securities_account],
        'CD Account': [cd_account],
        'CreditCard': [credit_card]
    })

    input_scaled = scaler.transform(input_data)
    

    input_tensor = torch.tensor(input_scaled, dtype=torch.float32)
    

    model.eval()
    with torch.no_grad():
        input_tensor = input_tensor.to(device)
        predicted_income = model(input_tensor)
    
    return predicted_income.item()

example_income = predict_income(
    age=59, 
    experience=1, 
    family=4, 
    ccavg=5, 
    education=3, 
    mortgage=150000, 
    personal_loan=20000, 
    securities_account=0, 
    cd_account=0,
    credit_card=1
)

print(f"Predicted Income: {example_income}")

Predicted Income: 2445347.25
