In [1]:
!pip install -r requirements.txt --quiet

In [2]:
import numpy as np
import torch
from sklearn.metrics import classification_report

## Read in datasets

In [44]:
train_dataset = np.load("data/train_binary_data.npz")
validation_dataset = np.load("data/validation_binary_data.npz")
test_dataset = np.load("data/test_binary_data.npz")

# Create Pytorch Dataset class

In [45]:
class NumpyDataset(torch.utils.data.Dataset):
    def __init__(self, X, y, transform=None, target_transform=None):
        self.X = X
        self.y = y
        self.transform = transform
        self.target_transform = target_transform

        self.X = self.X.astype("float32")
        self.y = self.y.astype("float32")
        self.y = np.reshape(self.y, (len(self.y), -1))
        
    def  __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        sample, target = self.X[idx], self.y[idx]
        if self.transform:
            self.transform(sample)
            self.target_transform(target)
        return sample, target

In [46]:
train_dataset = NumpyDataset(train_dataset['x'], train_dataset['y'])
train_dataset.__len__()

6400

In [47]:
validation_dataset = NumpyDataset(validation_dataset['x'], validation_dataset['y'])
validation_dataset.__len__()

1600

In [48]:
test_dataset = NumpyDataset(test_dataset['x'], test_dataset['y'])
test_dataset.__len__()

2000

## Model Arch Creation

In [49]:
class ANNModel(torch.nn.Module):
    def __init__(self, n_features):
        super().__init__()

        self.fc1 = torch.nn.Linear(n_features, out_features=6)
        torch.nn.init.kaiming_uniform_(self.fc1.weight, nonlinearity='relu')
        self.act1 = torch.nn.ReLU()

        self.fc2 = torch.nn.Linear(6, out_features=6)
        torch.nn.init.kaiming_uniform_(self.fc2.weight, nonlinearity='relu')
        self.act2 = torch.nn.ReLU()

        self.fc3 = torch.nn.Linear(6, out_features=1)
        torch.nn.init.xavier_normal(self.fc3.weight)
    def forward(self, x):
        x = self.fc1(x)
        x = self.act1(x)

        x = self.fc2(x)
        x = self.act2(x)

        x = self.fc3(x)

        return x

In [50]:

train_dl = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=32,
                            shuffle=True)
validation_dl = torch.utils.data.DataLoader(dataset=validation_dataset, batch_size=32,
                            shuffle=False)

In [51]:
test_dl = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=1, shuffle=False)

In [52]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cpu


In [53]:
model = ANNModel(train_dataset.X.shape[1])
model.to(device)

  torch.nn.init.xavier_normal(self.fc3.weight)


ANNModel(
  (fc1): Linear(in_features=13, out_features=6, bias=True)
  (act1): ReLU()
  (fc2): Linear(in_features=6, out_features=6, bias=True)
  (act2): ReLU()
  (fc3): Linear(in_features=6, out_features=1, bias=True)
)

In [54]:
epochs=100
learning_rate = 0.01
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = torch.nn.BCEWithLogitsLoss()

In [55]:
def binary_acc(y_pred, y_test):
    y_pred_tag = torch.round(torch.sigmoid(y_pred))

    correct_results_sum = (y_pred_tag == y_test).sum().float()
    acc = correct_results_sum/y_test.shape[0]
    acc = torch.round(acc * 100)
    
    return acc

model.train()
for epoch in range(epochs):
    epoch_loss = 0
    epoch_acc = 0
    # go through all the batches generated by dataloader
    for i, (inputs, targets) in enumerate(train_dl):
        inputs, targets = inputs.to(device), targets.to(device)
        # clear the gradients
        optimizer.zero_grad()
        # compute the model output
        yhat = model(inputs)
        targets = torch.reshape(targets, (-1, 1))
        # print(yhat.shape, targets.shape)
        # calculate loss
        loss = criterion(yhat, targets)
        acc = binary_acc(yhat, targets)
        # credit assignment
        loss.backward()
        # update model weights
        optimizer.step()
        epoch_loss += loss.item()
        epoch_acc += acc.item()

    print(f'Epoch {epoch+1:03}: | Train Loss: {epoch_loss/len(train_dl):.5f} | Train Acc: {epoch_acc/len(train_dl):.3f}', end="\n\t")
    epoch_loss = 0.0
    epoch_acc = 0
    model.eval()     # Optional when not using Model Specific layer
    for inputs, targets in validation_dl:
        if torch.cuda.is_available():
            inputs, targets = inputs.cuda(), targets.cuda()
        
        yhat = model(inputs)
        loss = criterion(yhat, targets)
        epoch_loss += loss.item()
        acc = binary_acc(yhat, targets)
        epoch_acc += acc.item()

    print(f'Epoch {epoch+1} | Validation Loss: {epoch_loss / len(validation_dl)} | Validation Acc: {epoch_acc/len(validation_dl):.3f}')
    


Epoch 001: | Train Loss: 0.46130 | Train Acc: 80.045
	Epoch 1 | Validation Loss: 0.4165504276752472 | Validation Acc: 82.220
Epoch 002: | Train Loss: 0.37647 | Train Acc: 85.180
	Epoch 2 | Validation Loss: 0.36576306104660034 | Validation Acc: 84.800
Epoch 003: | Train Loss: 0.35814 | Train Acc: 85.640
	Epoch 3 | Validation Loss: 0.36493451744318006 | Validation Acc: 85.060
Epoch 004: | Train Loss: 0.34631 | Train Acc: 86.275
	Epoch 4 | Validation Loss: 0.3589263278245926 | Validation Acc: 85.040
Epoch 005: | Train Loss: 0.34528 | Train Acc: 86.290
	Epoch 5 | Validation Loss: 0.3546120375394821 | Validation Acc: 85.200
Epoch 006: | Train Loss: 0.34076 | Train Acc: 86.240
	Epoch 6 | Validation Loss: 0.35958221077919006 | Validation Acc: 84.400
Epoch 007: | Train Loss: 0.34122 | Train Acc: 86.345
	Epoch 7 | Validation Loss: 0.35563632160425185 | Validation Acc: 84.860
Epoch 008: | Train Loss: 0.33750 | Train Acc: 86.655
	Epoch 8 | Validation Loss: 0.36434072464704514 | Validation Acc: 84

## Eval Test set

In [None]:
y_pred_list = []
model.eval()
with torch.no_grad():
    for X_batch, y_batch in test_dl:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        y_test_pred = model(X_batch)
        y_test_pred = torch.sigmoid(y_test_pred)
        y_pred_tag = torch.round(y_test_pred)
        y_pred_list.append(y_pred_tag.cpu().numpy())

y_pred_list = [a.squeeze().tolist() for a in y_pred_list]

In [59]:
y_true = test_dataset.y

In [58]:
print(classification_report(y_true, y_pred_list))

              precision    recall  f1-score   support

         0.0       0.87      0.96      0.91      1585
         1.0       0.75      0.43      0.55       415

    accuracy                           0.85      2000
   macro avg       0.81      0.70      0.73      2000
weighted avg       0.84      0.85      0.84      2000

