In [176]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from torch.utils.data import TensorDataset, DataLoader
le = LabelEncoder()

In [177]:
bank = pd.read_csv("Bank_Personal_Loan_Modelling.csv")
bank = bank.drop(columns=['ID', 'ZIP Code', 'Online'])
bank

Unnamed: 0,Age,Experience,Income,Family,CCAvg,Education,Mortgage,Personal Loan,Securities Account,CD Account,CreditCard
0,25,1,49,4,1.6,1,0,0,1,0,0
1,45,19,34,3,1.5,1,0,0,1,0,0
2,39,15,11,1,1.0,1,0,0,0,0,0
3,35,9,100,1,2.7,2,0,0,0,0,0
4,35,8,45,4,1.0,2,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...
4995,29,3,40,1,1.9,3,0,0,0,0,0
4996,30,4,15,4,0.4,1,85,0,0,0,0
4997,63,39,24,2,0.3,3,0,0,0,0,0
4998,65,40,49,3,0.5,2,0,0,0,0,0


# Pytorch instead of tensorflow

I've heard much better things about pythorch and I am more familiar with it from using it at a higher level through huggingface

They also support better hardware interface for example you can choose the exact device you want

In [178]:

X = bank.drop(columns=['Income'])
y = bank['Income']


X_bank_train, X_bank_test, y_bank_train, y_bank_test = train_test_split(
    X, y, test_size=0.2
)

scaler = StandardScaler()
X_bank_train = scaler.fit_transform(X_bank_train)
X_bank_test = scaler.transform(X_bank_test)


In [179]:
X_train_tensor = torch.tensor(X_bank_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_bank_train.values, dtype=torch.float32).view(-1, 1)

X_test_tensor = torch.tensor(X_bank_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_bank_test.values, dtype=torch.float32).view(-1, 1)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

batch_size = 32
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

In [180]:
import torch.nn as nn

class IncomeRegressionModel(nn.Module):
    def __init__(self, input_size):
        super(IncomeRegressionModel, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_size, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )
    
    def forward(self, x):
        return self.model(x)

input_size = X_train_tensor.shape[1]
model = IncomeRegressionModel(input_size)


In [181]:
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


In [182]:
num_epochs = 100

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for inputs, targets in train_loader:
        optimizer.zero_grad()

        outputs = model(inputs)
        
        loss = criterion(outputs, targets)
        
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    epoch_loss = running_loss / len(train_loader)
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}')


Epoch [1/100], Loss: 6717.2926
Epoch [2/100], Loss: 2010.7825
Epoch [3/100], Loss: 956.1518
Epoch [4/100], Loss: 912.4029
Epoch [5/100], Loss: 885.9154
Epoch [6/100], Loss: 865.2589
Epoch [7/100], Loss: 850.5456
Epoch [8/100], Loss: 837.0414
Epoch [9/100], Loss: 826.4286
Epoch [10/100], Loss: 816.4517
Epoch [11/100], Loss: 810.4098
Epoch [12/100], Loss: 805.1911
Epoch [13/100], Loss: 798.9734
Epoch [14/100], Loss: 794.6756
Epoch [15/100], Loss: 787.7231
Epoch [16/100], Loss: 784.8422
Epoch [17/100], Loss: 781.5832
Epoch [18/100], Loss: 777.4230
Epoch [19/100], Loss: 773.1186
Epoch [20/100], Loss: 770.7693
Epoch [21/100], Loss: 766.2480
Epoch [22/100], Loss: 764.1809
Epoch [23/100], Loss: 760.4811
Epoch [24/100], Loss: 760.2668
Epoch [25/100], Loss: 755.1181
Epoch [26/100], Loss: 755.0453
Epoch [27/100], Loss: 750.4252
Epoch [28/100], Loss: 748.0126
Epoch [29/100], Loss: 747.0129
Epoch [30/100], Loss: 744.5207
Epoch [31/100], Loss: 742.9684
Epoch [32/100], Loss: 741.8725
Epoch [33/100],

In [183]:
model.eval()
with torch.no_grad():
    test_loss = 0.0
    predictions = []
    actuals = []
    
    for inputs, targets in test_loader:
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        test_loss += loss.item()
        
        predictions.append(outputs.numpy())
        actuals.append(targets.numpy())
    
    avg_test_loss = test_loss / len(test_loader)
    print(f'Test Loss (MSE): {avg_test_loss:.4f}')
    
    predictions = np.vstack(predictions)
    actuals = np.vstack(actuals)
    
    print("First 10 predicted values:")
    print(predictions[:10])

    print("\nFirst 10 actual values:")
    print(actuals[:10])


Test Loss (MSE): 752.8246
First 10 predicted values:
[[181.98416 ]
 [ 50.539593]
 [ 96.194466]
 [ 41.102676]
 [ 40.248135]
 [155.61789 ]
 [120.75014 ]
 [ 78.2981  ]
 [ 76.46817 ]
 [ 37.227688]]

First 10 actual values:
[[160.]
 [ 12.]
 [ 72.]
 [ 88.]
 [ 23.]
 [165.]
 [140.]
 [ 38.]
 [ 80.]
 [ 19.]]


In [184]:
from sklearn.metrics import mean_absolute_error, r2_score
import numpy as np

predictions = np.vstack(predictions)
actuals = np.vstack(actuals)

mae = mean_absolute_error(actuals, predictions)
r2 = r2_score(actuals, predictions)

print(f'Test MAE: {mae:.4f}')
print(f'Test R² Score: {r2:.4f}')


Test MAE: 21.7097
Test R² Score: 0.6325


In [189]:
def predict_income(age, experience, family, ccavg, education, mortgage, personal_loan, securities_account, cd_account, credit_card):

    input_data = pd.DataFrame({
        'Age': [age],
        'Experience': [experience],
        'Family': [family],
        'CCAvg': [ccavg],
        'Education': [education],
        'Mortgage': [mortgage],
        'Personal Loan': [personal_loan],
        'Securities Account': [securities_account],
        'CD Account': [cd_account],
        'CreditCard': [credit_card]
    })

    input_scaled = scaler.transform(input_data)
    

    input_tensor = torch.tensor(input_scaled, dtype=torch.float32)
    

    model.eval()
    with torch.no_grad():
        predicted_income = model(input_tensor)
    
    return predicted_income.item()

example_income = predict_income(
    age=40, 
    experience=10, 
    family=0, 
    ccavg=1.0, 
    education=2, 
    mortgage=0, 
    personal_loan=0, 
    securities_account=0, 
    cd_account=0,
    credit_card=0
)

print(f"Predicted Income: {example_income}")

Predicted Income: 120508.1171875
