In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

## Classification

- binary classification : BCELoss used will provide the sigmodi value to this loss.
- multiclass classification : CrossEntropyLoss used, we will provide the raw value because internally it implemented the softmax

In [2]:
X,y = load_breast_cancer(return_X_y=True)

X_train,X_test, y_train, y_test = train_test_split(X,y, test_size = .2, random_state = 42, stratify = y)
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

X_train = torch.from_numpy(X_train).type(torch.float32)
y_train = torch.from_numpy(y_train).type(torch.float32)
X_test = torch.from_numpy(X_test).type(torch.float32)
y_test = torch.from_numpy(y_test).type(torch.float32)
y_train = y_train.view(y_train.shape[0],1)
y_test = y_test.view(y_test.shape[0],1)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(455, 30) (455,)
(114, 30) (114,)
torch.Size([455, 30]) torch.Size([455, 1])
torch.Size([114, 30]) torch.Size([114, 1])


In [3]:
## Create dataset
class CustomDataset(Dataset):
    
    def __init__(self, X,y):
        self.X = X
        self.y = y
        self.samples = X.shape[0]
    
    ## mandatory
    def __getitem__(self, index):
        return self.X[index], self.y[index]
    
    ## mandatory
    def __len__(self):
        return self.samples
    
    
    ## optional
    def __str__(self):
        return f"Dataset having samples : {self.samples}"

In [4]:
train_dataset = CustomDataset(X_train,y_train)
test_dataset = CustomDataset(X_test,y_test)

In [5]:
first_sample = train_dataset[0]
features, labels = first_sample
print(features)
print(labels)

tensor([1.0320e+01, 1.6350e+01, 6.5310e+01, 3.2490e+02, 9.4340e-02, 4.9940e-02,
        1.0120e-02, 5.4950e-03, 1.8850e-01, 6.2010e-02, 2.1040e-01, 9.6700e-01,
        1.3560e+00, 1.2970e+01, 7.0860e-03, 7.2470e-03, 1.0120e-02, 5.4950e-03,
        1.5600e-02, 2.6060e-03, 1.1250e+01, 2.1770e+01, 7.1120e+01, 3.8490e+02,
        1.2850e-01, 8.8420e-02, 4.3840e-02, 2.3810e-02, 2.6810e-01, 7.3990e-02])
tensor([1.])


In [6]:
train_dataloader = DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=2)
test_dataloader = DataLoader(test_dataset, batch_size=4, shuffle=False, num_workers=0)

In [7]:
examples = iter(train_dataloader)
samples = examples.next()
features, labels = samples
print(features)
print(labels)

tensor([[1.5530e+01, 3.3560e+01, 1.0370e+02, 7.4490e+02, 1.0630e-01, 1.6390e-01,
         1.7510e-01, 8.3990e-02, 2.0910e-01, 6.6500e-02, 2.4190e-01, 1.2780e+00,
         1.9030e+00, 2.3020e+01, 5.3450e-03, 2.5560e-02, 2.8890e-02, 1.0220e-02,
         9.9470e-03, 3.3590e-03, 1.8490e+01, 4.9540e+01, 1.2630e+02, 1.0350e+03,
         1.8830e-01, 5.5640e-01, 5.7030e-01, 2.0140e-01, 3.5120e-01, 1.2040e-01],
        [2.3210e+01, 2.6970e+01, 1.5350e+02, 1.6700e+03, 9.5090e-02, 1.6820e-01,
         1.9500e-01, 1.2370e-01, 1.9090e-01, 6.3090e-02, 1.0580e+00, 9.6350e-01,
         7.2470e+00, 1.5580e+02, 6.4280e-03, 2.8630e-02, 4.4970e-02, 1.7160e-02,
         1.5900e-02, 3.0530e-03, 3.1010e+01, 3.4510e+01, 2.0600e+02, 2.9440e+03,
         1.4810e-01, 4.1260e-01, 5.8200e-01, 2.5930e-01, 3.1030e-01, 8.6770e-02],
        [2.0310e+01, 2.7060e+01, 1.3290e+02, 1.2880e+03, 1.0000e-01, 1.0880e-01,
         1.5190e-01, 9.3330e-02, 1.8140e-01, 5.5720e-02, 3.9770e-01, 1.0330e+00,
         2.5870e+00, 5.234

In [8]:
## Linear Model
class RegressionNet(nn.Module):
    
    def __init__(self, input_size, output_size):
        super(RegressionNet, self).__init__()
        self.fc1 = nn.Linear(input_size, output_size)
        
        
    def forward(self, x):
        x = torch.sigmoid(self.fc1(x))
        return x

In [9]:
sampels, features = X_train.shape
input_size = features
output_size = 1
lr = 0.00001
n_iters = 100

model = RegressionNet(input_size, output_size)
criteria = nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr = lr)


def evaluate(model, test_dataloader):
    
    total_correct = 0
    total_samples = 0
    accuracy= 0.0
    
    model.eval()
    
    for i,(inputs, targets) in enumerate(test_dataloader):
        y_preds = model(inputs)
        y_preds_cls = y_preds.round()

        total_correct += y_preds_cls.eq(targets).sum() 
        total_samples += float(targets.shape[0])
    
    accuracy = total_correct/total_samples
    print(f"Epoch : {epoch+1}, Validation Accuracy : {accuracy:.4f}")

## Training loop
for epoch in range(n_iters):
    model.train()
    for i,(inputs, targets) in enumerate(train_dataloader):
        y_preds = model(inputs)
        loss = criteria(y_preds,targets)
        
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        
    if (epoch+1)%10==0:
        print(f"Training loss : {loss.item():.4f}")
        evaluate(model, test_dataloader)

Training loss : 0.7504
Epoch : 10, Validation Accuracy : 0.9211
Training loss : 0.0000
Epoch : 20, Validation Accuracy : 0.8860
Training loss : 0.5261
Epoch : 30, Validation Accuracy : 0.9123
Training loss : 0.0431
Epoch : 40, Validation Accuracy : 0.9298
Training loss : 0.0491
Epoch : 50, Validation Accuracy : 0.9123
Training loss : 0.0302
Epoch : 60, Validation Accuracy : 0.7982
Training loss : 1.3932
Epoch : 70, Validation Accuracy : 0.8070
Training loss : 0.0000
Epoch : 80, Validation Accuracy : 0.9123
Training loss : 0.0129
Epoch : 90, Validation Accuracy : 0.9035
Training loss : 0.0046
Epoch : 100, Validation Accuracy : 0.9386
