### CONSTANTS

In [3]:
PATHES = ['data/X_train.csv', 'data/X_test.csv', 'data/y_train.csv', 'data/y_test.csv']

### Importing Libraries

In [50]:
import numpy as np
import pandas as pd

import torch
from torch import nn
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import classification_report
from imblearn.over_sampling import RandomOverSampler

from tqdm import tqdm

### Functions

In [22]:
def get_train_test_data(pathes):
    data = []
    for path in pathes:
        df = pd.read_csv(path)
        data.append(df)
    return data

def train_step(model, data_loader, loss_fn, optimizer):
    train_loss, train_acc = 0, 0
    model.train()
    for X, y in data_loader:
        y_logit = model(X)

        loss = loss_fn(y_logit, y)
        train_loss += loss
        
        optimizer.zero_grad()

        loss.backward()

        optimizer.step()

        predicted_classes = torch.sigmoid(y_logit).round()
        train_acc += (predicted_classes == y).sum().item()/len(predicted_classes)
    
    train_loss /= len(data_loader)
    train_acc /= len(data_loader)
    return train_loss, train_acc

def test_step(model, data_loader, loss_fn):
    test_loss, test_acc = 0, 0
    model.eval()
    with torch.inference_mode():
        for X, y in data_loader:
            y_logit = model(X)

            test_loss += loss_fn(y_logit, y)

            predicted_classes = torch.sigmoid(y_logit).round()
            test_acc += (predicted_classes == y).sum().item()/len(predicted_classes)

        test_loss /= len(data_loader)
        test_acc /= len(data_loader)
        return test_loss, test_acc
    
def train(model, train_dataloader, test_dataloader, loss_fn, optimizer, epochs):
    result = {
        'train_loss': [],
        'train_acc': [],
        'test_loss': [],
        'test_acc': []
    }
    print("=================================================")

    for epoch in tqdm(range(epochs)):
        train_loss, train_acc = train_step(model,
                                           train_dataloader,
                                           loss_fn,
                                           optimizer)
        
        test_loss, test_acc = test_step(model,
                                        test_dataloader,
                                        loss_fn)
        
        result['train_loss'].append(train_loss.item())
        result['train_acc'].append(train_acc)
        result['test_loss'].append(test_loss.item())
        result['test_acc'].append(test_acc)
        if epoch%10==1:
            print(f" Epoch: {epoch+1}")
            print(f"Train Loss = {train_loss:.4f} || Train Accuracy = {train_acc*100:.2f}%")
            print(f"Test Loss = {test_loss:.4f} || Test Accuracy = {test_acc*100:.2f}%")
            print("=================================================")
        
    return result

### Reading Data

In [51]:
X_train, X_test, y_train, y_test = get_train_test_data(PATHES)
over_sampler = RandomOverSampler()
X_train, y_train = over_sampler.fit_resample(X_train, y_train)
print(f'X_train shape: {X_train.shape}')
print(f'X_test shape: {X_test.shape}')
print(f'y_train shape: {y_train.shape}')
print(f'y_test shape: {y_test.shape}')

X_train shape: (15458, 12)
X_test shape: (2000, 12)
y_train shape: (15458, 1)
y_test shape: (2000, 1)


### Creating Datasets & Dataloaders

In [52]:
X_train = torch.tensor(X_train.values, dtype=torch.float32)
X_test = torch.tensor(X_test.values, dtype=torch.float32)
y_train = torch.tensor(y_train.values, dtype=torch.float32)
y_test = torch.tensor(y_test.values, dtype=torch.float32)

train_data = TensorDataset(X_train, y_train)
test_data = TensorDataset(X_test, y_test)

train_loader = DataLoader(train_data, batch_size=256, shuffle=True, drop_last=True)
test_loader = DataLoader(test_data, batch_size=256, shuffle=True, drop_last=True)

### Model V1

In [53]:
class ModelV1(nn.Module):
    def __init__(self, input_shape, hidden_units, output_shape):
        super().__init__()
        self.input_layer = nn.Sequential(
            nn.Linear(input_shape, hidden_units),
            nn.ReLU()
        )
        self.hidden_block = nn.Sequential(
            nn.Linear(hidden_units, 4*hidden_units),
            nn.ReLU(),
            nn.Linear(4*hidden_units, 2*hidden_units),
            nn.ReLU(),
            nn.Linear(2*hidden_units, hidden_units),
            nn.ReLU(),
            nn.Linear(hidden_units, int(0.5*hidden_units)),
            nn.ReLU()
        )
        self.classifier = nn.Linear(int(0.5*hidden_units), output_shape)
    
    def forward(self, x):
        x = self.input_layer(x)
        x = self.hidden_block(x)
        x = self.classifier(x)
        return x


### Initiating Model, defining loss function and optimizer

In [60]:
model = ModelV1(12, 16, 1)
model

ModelV1(
  (input_layer): Sequential(
    (0): Linear(in_features=12, out_features=16, bias=True)
    (1): ReLU()
  )
  (hidden_block): Sequential(
    (0): Linear(in_features=16, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=32, bias=True)
    (3): ReLU()
    (4): Linear(in_features=32, out_features=16, bias=True)
    (5): ReLU()
    (6): Linear(in_features=16, out_features=8, bias=True)
    (7): ReLU()
  )
  (classifier): Linear(in_features=8, out_features=1, bias=True)
)

In [61]:
loss_fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

### Building Training and Testing loops

In [62]:
train(model, train_loader, test_loader, loss_fn, optimizer, epochs=50)



  4%|▍         | 2/50 [00:02<00:51,  1.08s/it]

 Epoch: 2
Train Loss = 0.3651 || Train Accuracy = 84.40%
Test Loss = 0.3192 || Test Accuracy = 85.21%


 24%|██▍       | 12/50 [00:10<00:29,  1.28it/s]

 Epoch: 12
Train Loss = 0.1616 || Train Accuracy = 94.34%
Test Loss = 0.2154 || Test Accuracy = 90.62%


 44%|████▍     | 22/50 [00:18<00:22,  1.25it/s]

 Epoch: 22
Train Loss = 0.1174 || Train Accuracy = 95.89%
Test Loss = 0.1582 || Test Accuracy = 93.92%


 64%|██████▍   | 32/50 [00:26<00:14,  1.21it/s]

 Epoch: 32
Train Loss = 0.0920 || Train Accuracy = 96.70%
Test Loss = 0.1888 || Test Accuracy = 93.47%


 84%|████████▍ | 42/50 [00:34<00:06,  1.27it/s]

 Epoch: 42
Train Loss = 0.0745 || Train Accuracy = 97.41%
Test Loss = 0.1463 || Test Accuracy = 95.54%


100%|██████████| 50/50 [00:41<00:00,  1.20it/s]


{'train_loss': [0.6566299796104431,
  0.3651098906993866,
  0.2706199884414673,
  0.2520017921924591,
  0.2307157814502716,
  0.21749404072761536,
  0.20432783663272858,
  0.19326980412006378,
  0.18371564149856567,
  0.17640969157218933,
  0.1711719036102295,
  0.1616159826517105,
  0.1574690341949463,
  0.14979267120361328,
  0.14565584063529968,
  0.13932619988918304,
  0.13743846118450165,
  0.13103479146957397,
  0.12475506216287613,
  0.12295477837324142,
  0.1180606558918953,
  0.11743919551372528,
  0.11098087579011917,
  0.11021895706653595,
  0.10750296711921692,
  0.10732867568731308,
  0.10373719036579132,
  0.10289754718542099,
  0.09804850071668625,
  0.09647664427757263,
  0.09404963999986649,
  0.09202377498149872,
  0.08879117667675018,
  0.08683337271213531,
  0.08354757726192474,
  0.08084697276353836,
  0.08361440151929855,
  0.07916594296693802,
  0.08100221306085587,
  0.07751110196113586,
  0.07381367683410645,
  0.07453098893165588,
  0.07248295098543167,
  0.07

In [63]:
with torch.inference_mode():
        true_ys = []
        pred_ys = []
        for X, y in test_loader:
            true_ys.append(y)
            y_logit = model(X)
            predicted_classes = torch.sigmoid(y_logit).round()
            pred_ys.append(predicted_classes)

In [64]:
y_pred = torch.concat(pred_ys).squeeze().numpy()
y_true = torch.concat(true_ys).squeeze().numpy()

In [65]:
print(classification_report(y_true, y_pred))

              precision    recall  f1-score   support

         0.0       0.99      0.95      0.97      1727
         1.0       0.35      0.72      0.47        65

    accuracy                           0.94      1792
   macro avg       0.67      0.84      0.72      1792
weighted avg       0.97      0.94      0.95      1792

