In [53]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [54]:
import sklearn
import torch
import random
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
from tqdm.auto import tqdm

In [55]:
def set_seed(seed_value = 42):
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed_value)
set_seed()

In [56]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [57]:
X, y = make_classification(
    n_samples = 10000,
    n_features = 40,
    n_informative = 35,
    n_classes = 2    
)

In [58]:
X_train, X_test, y_train, y_test = train_test_split(X, y , test_size = 0.3, stratify = y)

In [59]:
class customDataset(Dataset):
    def __init__(self, X_type, y_type):
        self.X_type = X_type
        self.y_type = y_type
        
    def __len__(self):
        return len(self.X_type)
    
    def __getitem__(self, index):
        return {
            "x": torch.Tensor(self.X_type[index]),
            "y": torch.Tensor([self.y_type[index]])
            }

In [60]:
train_dataset = customDataset(X_train, y_train)
test_dataset  = customDataset(X_test, y_test)

In [61]:
train_dataloader = DataLoader(
    dataset = train_dataset,
    batch_size = 1000
)
test_dataloader = DataLoader(
    dataset = test_dataset,
    batch_size = 1000
)

In [78]:
W = torch.nn.Parameter(torch.randn((X.shape[1], 1), dtype = torch.float32, device = device))
b = torch.nn.Parameter(torch.randn(1, dtype = torch.float32, device = device))

In [79]:
# with BCELogitsLoss()
model = lambda X,W, b: torch.matmul(X,W) + b
# with BCELoss
# model = lambda X,W,b : 1/(1 + torch.exp(-1*(torch.matmul(X,W) + b)))

In [80]:
num_epochs = 1000
learning_rate = 1e-2
loss_function = torch.nn.BCEWithLogitsLoss()
for epoch in tqdm(range(num_epochs)):
    epoch_loss = 0
    for batch in train_dataloader:
        batch["x"] = batch["x"].to(device)
        batch["y"] = batch["y"].to(device)
        y_pred = model(X = batch["x"], W = W, b = b)
        loss = loss_function(y_pred, batch["y"])
        
        loss.backward()
        with torch.no_grad():
            W = W - learning_rate*W.grad
            b = b - learning_rate*b.grad
        W.requires_grad_(True)
        b.requires_grad_(True)
        epoch_loss += loss.item()
    if epoch%50 == 0:
        print(f"epoch = {epoch}, loss = {epoch_loss/len(train_dataloader)}")

  0%|          | 0/1000 [00:00<?, ?it/s]

epoch = 0, loss = 7.973633084978376
epoch = 50, loss = 1.3847300836018153
epoch = 100, loss = 0.8227459532873971
epoch = 150, loss = 0.6204696042197091
epoch = 200, loss = 0.5169368812016079
epoch = 250, loss = 0.45032103998320444
epoch = 300, loss = 0.4012710962976728
epoch = 350, loss = 0.36261168122291565
epoch = 400, loss = 0.3317890592983791
epoch = 450, loss = 0.3082729365144457
epoch = 500, loss = 0.29186379058020456
epoch = 550, loss = 0.2816256156989506
epoch = 600, loss = 0.2758222967386246
epoch = 650, loss = 0.27266697798456463
epoch = 700, loss = 0.2709180691412517
epoch = 750, loss = 0.26989799312182833
epoch = 800, loss = 0.26927116938999723
epoch = 850, loss = 0.2688694213117872
epoch = 900, loss = 0.26860320355210987
epoch = 950, loss = 0.2684217883007867


#### Evaluating on Test Dataset

In [81]:
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score

y_true = torch.Tensor()
y_pred = torch.Tensor()
sigmoid = torch.nn.Sigmoid()
for batch in test_dataloader:
    with torch.no_grad():
        batch["x"] = batch["x"].to(device)
        batch["y"] = batch["y"].to(device)
        predictions = model(X = batch["x"] , W = W, b = b)
        y_true = torch.cat([y_true, batch["y"].cpu()], dim =0)
        y_pred = torch.cat([y_pred, sigmoid(predictions.cpu())], dim =0)


In [82]:
def convert(array, threshold = 0.5):
    array[array >= threshold] = 1
    array[array < threshold] = 0
    return array

In [83]:
for metric in [f1_score, precision_score, recall_score, accuracy_score]:
    print(f"{metric.__name__} :- {metric(y_true.numpy(), convert(y_pred.numpy()))}")

f1_score :- 0.8857994041708044
precision_score :- 0.8808426596445029
recall_score :- 0.8908122503328895
accuracy_score :- 0.885


#### Evaluating on Train Set

In [84]:
y_true = torch.Tensor()
y_pred = torch.Tensor()
for batch in train_dataloader:
    with torch.no_grad():
        batch["x"] = batch["x"].to(device)
        batch["y"] = batch["y"].to(device)
        predictions = model(X = batch["x"] , W = W, b = b)
        y_true = torch.cat([y_true, batch["y"].cpu()], dim =0)
        y_pred = torch.cat([y_pred, sigmoid(predictions.cpu())], dim =0)

In [86]:
for metric in [f1_score, precision_score, recall_score, accuracy_score]:
    print(f"{metric.__name__} :- {metric(y_true.numpy(), convert(y_pred.numpy()))}")

f1_score :- 0.8852366109379427
precision_score :- 0.8787623066104079
recall_score :- 0.8918070225520982
accuracy_score :- 0.8842857142857142
