In [4]:
import numpy as np
import pandas as pd

## Dataset preparation

In [5]:
df = pd.read_csv('Bank_Personal_Loan_Modelling.csv')

In [6]:
df.head()

Unnamed: 0,ID,Age,Experience,Income,ZIP Code,Family,CCAvg,Education,Mortgage,Personal Loan,Securities Account,CD Account,Online,CreditCard
0,1,25,1,49,91107,4,1.6,1,0,0,1,0,0,0
1,2,45,19,34,90089,3,1.5,1,0,0,1,0,0,0
2,3,39,15,11,94720,1,1.0,1,0,0,0,0,0,0
3,4,35,9,100,94112,1,2.7,2,0,0,0,0,0,0
4,5,35,8,45,91330,4,1.0,2,0,0,0,0,0,1


In [7]:
df.drop(['ID'], inplace=True, axis=1)

In [8]:
df.head()

Unnamed: 0,Age,Experience,Income,ZIP Code,Family,CCAvg,Education,Mortgage,Personal Loan,Securities Account,CD Account,Online,CreditCard
0,25,1,49,91107,4,1.6,1,0,0,1,0,0,0
1,45,19,34,90089,3,1.5,1,0,0,1,0,0,0
2,39,15,11,94720,1,1.0,1,0,0,0,0,0,0
3,35,9,100,94112,1,2.7,2,0,0,0,0,0,0
4,35,8,45,91330,4,1.0,2,0,0,0,0,0,1


In [9]:
df.columns

Index(['Age', 'Experience', 'Income', 'ZIP Code', 'Family', 'CCAvg',
       'Education', 'Mortgage', 'Personal Loan', 'Securities Account',
       'CD Account', 'Online', 'CreditCard'],
      dtype='object')

In [10]:
X = df[['Age', 'Experience', 'Income', 'Family', 'CCAvg',
       'Education', 'Mortgage', 'Securities Account',
       'CD Account', 'Online', 'CreditCard']]
y = df[['Personal Loan']]

In [11]:
X.head()

Unnamed: 0,Age,Experience,Income,Family,CCAvg,Education,Mortgage,Securities Account,CD Account,Online,CreditCard
0,25,1,49,4,1.6,1,0,1,0,0,0
1,45,19,34,3,1.5,1,0,1,0,0,0
2,39,15,11,1,1.0,1,0,0,0,0,0
3,35,9,100,1,2.7,2,0,0,0,0,0
4,35,8,45,4,1.0,2,0,0,0,0,1


In [12]:
y.head()

Unnamed: 0,Personal Loan
0,0
1,0
2,0
3,0
4,0


In [13]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X.values, y.values, stratify=y.values, random_state=1)

In [14]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

## PyTorch

In [120]:
import torch
import torch.nn as nn

from torch.optim import SGD
from torch.utils.data import DataLoader, TensorDataset

In [13]:
X_train = torch.tensor(X_train).to(torch.float32)
X_test  = torch.tensor(X_test).to(torch.float32)
y_train = torch.tensor(y_train).to(torch.float32)
y_test  = torch.tensor(y_test).to(torch.float32)

In [14]:
X_train.shape

torch.Size([3750, 11])

In [15]:
y_train

tensor([[0.],
        [0.],
        [0.],
        ...,
        [0.],
        [0.],
        [0.]])

In [16]:
dataset = TensorDataset(X_train, y_train)
data = DataLoader(dataset, batch_size=32, shuffle=True)

In [17]:
model = nn.Sequential(
    nn.Linear(11, 8),
    nn.ReLU(),
    nn.Linear(8, 4),
    nn.ReLU(),
    nn.Linear(4, 1),
    nn.Sigmoid()
)

In [18]:
model

Sequential(
  (0): Linear(in_features=11, out_features=8, bias=True)
  (1): ReLU()
  (2): Linear(in_features=8, out_features=4, bias=True)
  (3): ReLU()
  (4): Linear(in_features=4, out_features=1, bias=True)
  (5): Sigmoid()
)

In [19]:
loss_fn = nn.BCELoss()
optimizer = SGD(model.parameters(), lr=0.1)

In [20]:
len(data.dataset)

3750

In [21]:
def train(model, epoch, data):
    train_loss = [0] * epoch
    train_acc = [0] * epoch
    
    for i in range(epoch):
        for x_batch, y_batch in data:
            
            pred = model(x_batch)
            loss = loss_fn(pred, y_batch)
            
            
            loss.backward()
            
            optimizer.step()
            optimizer.zero_grad()
            
            train_loss[i] += loss.item() * x_batch.size(0)
            crt_cnt = (torch.where(model(x_batch)>=0.5, 1, 0) == y_batch).sum()
            train_acc[i] += crt_cnt
            
        train_loss[i] = train_loss[i]/len(data.dataset)
        train_acc[i] = train_acc[i]/len(data.dataset)
        
        print(f'Epoch: {i+1}, Loss: {train_loss[i]}, Accuracy: {train_acc[i]}')

    return train_loss, train_acc
    

In [22]:
train_loss, train_acc = train(model, 20, data)

Epoch: 1, Loss: 0.309899601517121, Accuracy: 0.9039999842643738
Epoch: 2, Loss: 0.21964769684473673, Accuracy: 0.9039999842643738
Epoch: 3, Loss: 0.17161383893489837, Accuracy: 0.9039999842643738
Epoch: 4, Loss: 0.14272996454238893, Accuracy: 0.9039999842643738
Epoch: 5, Loss: 0.12123005695740383, Accuracy: 0.9567999839782715
Epoch: 6, Loss: 0.1048418641726176, Accuracy: 0.9728000164031982
Epoch: 7, Loss: 0.09350881623203555, Accuracy: 0.9783999919891357
Epoch: 8, Loss: 0.0864280216495196, Accuracy: 0.9783999919891357
Epoch: 9, Loss: 0.08023100702390075, Accuracy: 0.9818666577339172
Epoch: 10, Loss: 0.07516056815385819, Accuracy: 0.9826666712760925
Epoch: 11, Loss: 0.0714331739783287, Accuracy: 0.9832000136375427
Epoch: 12, Loss: 0.06893293479975934, Accuracy: 0.9842666387557983
Epoch: 13, Loss: 0.06599080271919569, Accuracy: 0.9845333099365234
Epoch: 14, Loss: 0.06390585209131242, Accuracy: 0.9847999811172485
Epoch: 15, Loss: 0.06208723174049519, Accuracy: 0.9866666793823242
Epoch: 16

### Evaluation on test data

In [23]:
testdata = TensorDataset(X_test, y_test)
data_t = DataLoader(testdata, batch_size=1250)

In [24]:
with torch.no_grad():
    for x_whole, y_whole in data_t:
        prediction = model(x_whole)
        crt_cnt = (torch.where(prediction>=0.5, 1, 0) == y_whole).sum()
    print(f'Accuracy: {crt_cnt/len(data_t.dataset)}')

Accuracy: 0.9728000164031982


## Tensorflow

In [15]:
from tensorflow.keras import layers, Sequential, losses, optimizers, metrics


In [16]:
model = Sequential()
model.add(layers.Dense(units=8, input_dim=11, activation='relu'))
model.add(layers.Dense(units=4, activation='relu'))
model.add(layers.Dense(units=1, activation='sigmoid'))

loss_fn = losses.BinaryCrossentropy()
opt = optimizers.SGD(learning_rate=0.1)

model.compile(optimizer=opt, loss=loss_fn, metrics=[metrics.BinaryAccuracy()])

model.fit(X_train, y_train, batch_size=32, epochs=20)

Epoch 1/20


2023-03-24 18:08:29.645486: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2023-03-24 18:08:29.881137: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x28d142bb0>

In [17]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 8)                 96        
                                                                 
 dense_4 (Dense)             (None, 4)                 36        
                                                                 
 dense_5 (Dense)             (None, 1)                 5         
                                                                 
Total params: 137
Trainable params: 137
Non-trainable params: 0
_________________________________________________________________


In [18]:
prediction = model.predict(X_test)



2023-03-24 18:08:48.557232: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


In [19]:
accuracy = (np.where(prediction >= 0.5, 1, 0) == y_test).sum()/len(X_test)

In [17]:
accuracy

0.9752

## From scratch (Experimental)
- It currently highly unstable, i.e I get results sometimes, sometime not as the gradients explode(leading to sigmoid returning NaN)
    - Sometime the accuracy is 30%, 60%, 70%, even 90% for the same hyper-parameters. So I am debugging it.

In [20]:
X_train = X_train.T
X_train.shape

(11, 3750)

In [21]:
y_train = y_train.T
y_train.shape

(1, 3750)

In [22]:
def sigmoid(z):
    return 1/(1+np.exp(-z))

def delta_sigmoid(z):
    return z*(1-z)

def relu(z):
    return np.maximum(0,z)

def delta_relu(z):
    return np.where(z > 0, 1, 0)

def loss_fn(pred, y):
    return np.mean((pred - y)**2, axis=1)

In [23]:
class Net_scratch:
    def __init__(self, in_features, out_features):
        # Hidden Linear layer with 3 neurons
        self.W1 = np.random.rand(3, in_features) * 0.3
        self.B1 = np.random.randn(3,1) 
        
        # Output layer with 1 neuron
        self.W2 = np.random.rand(out_features,3) * 0.3
        self.B2 = np.random.randn(out_features,1) 
        
    def forward(self, x):
        self.Z1 = self.W1 @ x + self.B1
        self.A1 = relu(self.Z1)
        self.Z2 = self.W2 @ self.A1 + self.B2
        self.A2 = sigmoid(self.Z2)
        return self.A2
    
    def backward(self, X, Y, lr=0.01):
        '''
        Calculate gradients and update parameters
        '''
        m = len(X)
        # Gradients calculation
        dZ2 = (self.A2 - Y) * delta_sigmoid(self.Z2)
        dW2 = (1/m)* (dZ2 @ self.A2.T)
        dB2 = (1/m)* (np.sum(dZ2, axis=1).reshape(-1,1)) # repeat
        
        dZ1 = delta_relu(self.Z1) * (self.W2.T @ dZ2) # repeat
        dW1 = (1/m)* (dZ1 @ X.T)
        dB1 = (1/m)* (np.sum(dZ1, axis=1).reshape(-1,1)) # repeat

#         print(dW1)
#         print(dW2)
        # Weights and bias updation
        self.W1 -= lr * dW1
        self.B1 -= lr * dB1
        self.W2 -= lr * dW2
        self.B2 -= lr * dB2
    
    def predict(self, X):
        return self.forward(X)
        

In [35]:
scratch = Net_scratch(11,1)

In [36]:
def train(model, epoch, X, y):
    for i in range(epoch):    
        pred = model.forward(X)
        model.backward(X,y)
        loss = loss_fn(pred, y)
        print(loss[0])


In [37]:
train(scratch, 20, X_train, y_train)

0.5350397376806574
0.7542625703312705
0.9039993742726025
0.904
0.904
0.904
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan


  return z*(1-z)
  dZ2 = (self.A2 - Y) * delta_sigmoid(self.Z2)
  dZ1 = delta_relu(self.Z1) * (self.W2.T @ dZ2) # repeat
  dZ1 = delta_relu(self.Z1) * (self.W2.T @ dZ2) # repeat
  dW1 = (1/m)* (dZ1 @ X.T)


In [38]:
(np.where(scratch.predict(X_train)>=0.5, 1,0)==y_train).mean()

0.904

### Test data

In [42]:
X_test = X_test.T
X_test.shape

(11, 1250)

In [40]:
y_test = y_test.reshape(-1,1).T
y_test.shape

(1, 1250)

In [43]:
(np.where(scratch.predict(X_test)>=0.5, 1,0)==y_test).mean()

0.904

**Weirdly** getting 90% accuracy when my model returns NaN. Debugging the reason, but clearly the gradients are exploding.