# About this implementation

Uses BCELoss

$y_n$ is the example's label. $\hat{y}_n$ is the prediction probability (the posterior)


$$ \ell_n = -w_n [y_n \cdot log \; \hat{y}_n + (1-y_n) \cdot log(1- \hat{y}_n)]$$

## Step 1: Import dataset and prepare data

In [15]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split

In [16]:
# load dataset
dataset = datasets.load_breast_cancer()

In [17]:
dataset.feature_names

array(['mean radius', 'mean texture', 'mean perimeter', 'mean area',
       'mean smoothness', 'mean compactness', 'mean concavity',
       'mean concave points', 'mean symmetry', 'mean fractal dimension',
       'radius error', 'texture error', 'perimeter error', 'area error',
       'smoothness error', 'compactness error', 'concavity error',
       'concave points error', 'symmetry error',
       'fractal dimension error', 'worst radius', 'worst texture',
       'worst perimeter', 'worst area', 'worst smoothness',
       'worst compactness', 'worst concavity', 'worst concave points',
       'worst symmetry', 'worst fractal dimension'], dtype='<U23')

In [28]:
# split test train
X_train, X_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=0.2, random_state=42)

In [29]:
# normalize the data myself, rather than with sklearn, for practice

mu = np.mean(X_train, axis=0)
std = np.std(X_train, axis=0)

X_train = (X_train - mu) / std
X_test = (X_test - mu) / std     # fit it to the same distribution params gathered from train

### Convert to pytorch tensors (!)

In [30]:
import torch
from torch.utils.data import TensorDataset, DataLoader

In [31]:
X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)

y_train = torch.tensor(y_train, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

In [89]:
# create a tensor dataset for convenience
train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)


# also create a dataloader to nicely load your tensor data into the epochs

train_loader = DataLoader(dataset = train_dataset, batch_size=24, shuffle=True)
test_loader = DataLoader(dataset = test_dataset, batch_size=24)

## Create the model

In [90]:
import torch

In [91]:
class MyMLP(torch.nn.Module):
    
    def __init__(self, num_features, num_hidden, num_classes):    # calls init function of nn.module
        
        super(MyMLP, self).__init__()
        
        self.num_classes = num_classes
        
        # one hidden layer
        self.linear_combination_1 = torch.nn.Linear(num_features, num_hidden)
        self.relu = torch.nn.ReLU()
        
        # output layer
        self.output_layer = torch.nn.Linear(num_hidden, self.num_classes)
        self.sigmoid = torch.nn.Sigmoid()
        

    def forward(self, x):
        # define the forward pass flow
            
        out = self.linear_combination_1(x)
        out = self.relu(out)
            
        out = self.output_layer(out)
        out = self.sigmoid(out)
            
        return out

In [92]:
num_features = X_train.shape[1]

In [93]:
# add model properties and initialize

# for the binary classification, either cancer/not cancer. one class is being detected
# if we did 2 classes we would need a softmax for the presence of two classes, which we could also do.

model = MyMLP(num_features=num_features, num_hidden=1000, num_classes=1)

# BCE loss is most stable and most abstracted as well (compare to nll etc.)
loss_criterion = torch.nn.BCELoss()

# optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # i guess, try 0.001 for now.
optimizer = torch.optim.SGD(model.parameters(), lr=0.0001)

## Training

In [94]:
num_epochs = 50

for epoch in range(num_epochs):
    
    # set the model into train mode
    model.train()
    
    for batch_index, (features, labels) in enumerate(train_loader):
        
        # do one forward pass
        predicted_probabilities = model(features)
        
        # y_preds.squeeze() removes the matrix dimension (ie. 3 x 1) into just an array of length 3
        loss = loss_criterion(predicted_probabilities.squeeze(), labels)
        
        # use adam backward solver
        optimizer.zero_grad() # don't make the computation graph, we dont need since we throw away after
        loss.backward()
        optimizer.step()
        
        # print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
        
        
        ### LOGGING
        if not batch_index % 4:
            print('Epoch: %02d/%02d | Batch %02d/%02d | Cost: %.4f'  %(epoch+1, num_epochs, batch_index, len(train_loader), loss))
    
    

Epoch: 01/50 | Batch 00/19 | Cost: 0.6571
Epoch: 01/50 | Batch 04/19 | Cost: 0.6667
Epoch: 01/50 | Batch 08/19 | Cost: 0.6266
Epoch: 01/50 | Batch 12/19 | Cost: 0.6385
Epoch: 01/50 | Batch 16/19 | Cost: 0.6832
Epoch: 02/50 | Batch 00/19 | Cost: 0.6800
Epoch: 02/50 | Batch 04/19 | Cost: 0.6543
Epoch: 02/50 | Batch 08/19 | Cost: 0.6598
Epoch: 02/50 | Batch 12/19 | Cost: 0.6561
Epoch: 02/50 | Batch 16/19 | Cost: 0.6596
Epoch: 03/50 | Batch 00/19 | Cost: 0.6358
Epoch: 03/50 | Batch 04/19 | Cost: 0.6458
Epoch: 03/50 | Batch 08/19 | Cost: 0.6119
Epoch: 03/50 | Batch 12/19 | Cost: 0.6550
Epoch: 03/50 | Batch 16/19 | Cost: 0.6286
Epoch: 04/50 | Batch 00/19 | Cost: 0.6108
Epoch: 04/50 | Batch 04/19 | Cost: 0.6223
Epoch: 04/50 | Batch 08/19 | Cost: 0.6293
Epoch: 04/50 | Batch 12/19 | Cost: 0.6133
Epoch: 04/50 | Batch 16/19 | Cost: 0.6649
Epoch: 05/50 | Batch 00/19 | Cost: 0.6382
Epoch: 05/50 | Batch 04/19 | Cost: 0.6184
Epoch: 05/50 | Batch 08/19 | Cost: 0.6308
Epoch: 05/50 | Batch 12/19 | Cost:

Epoch: 43/50 | Batch 12/19 | Cost: 0.4068
Epoch: 43/50 | Batch 16/19 | Cost: 0.4229
Epoch: 44/50 | Batch 00/19 | Cost: 0.4472
Epoch: 44/50 | Batch 04/19 | Cost: 0.4091
Epoch: 44/50 | Batch 08/19 | Cost: 0.4216
Epoch: 44/50 | Batch 12/19 | Cost: 0.4346
Epoch: 44/50 | Batch 16/19 | Cost: 0.3692
Epoch: 45/50 | Batch 00/19 | Cost: 0.4206
Epoch: 45/50 | Batch 04/19 | Cost: 0.3883
Epoch: 45/50 | Batch 08/19 | Cost: 0.4252
Epoch: 45/50 | Batch 12/19 | Cost: 0.4014
Epoch: 45/50 | Batch 16/19 | Cost: 0.3760
Epoch: 46/50 | Batch 00/19 | Cost: 0.4398
Epoch: 46/50 | Batch 04/19 | Cost: 0.3907
Epoch: 46/50 | Batch 08/19 | Cost: 0.4159
Epoch: 46/50 | Batch 12/19 | Cost: 0.4326
Epoch: 46/50 | Batch 16/19 | Cost: 0.4304
Epoch: 47/50 | Batch 00/19 | Cost: 0.3811
Epoch: 47/50 | Batch 04/19 | Cost: 0.4069
Epoch: 47/50 | Batch 08/19 | Cost: 0.4429
Epoch: 47/50 | Batch 12/19 | Cost: 0.3910
Epoch: 47/50 | Batch 16/19 | Cost: 0.4131
Epoch: 48/50 | Batch 00/19 | Cost: 0.4132
Epoch: 48/50 | Batch 04/19 | Cost:

In [83]:
# put it back in test mode

## Evaluate the final model

In [95]:
model.eval()

correct = 0
total = 0

with torch.no_grad():
    
    for features, labels in test_loader:
        
        logits = model(features)
        y_preds = (logits.squeeze() > 0.5).float() # squeeze creates a tensor of True/False values
                                                        # which are evaluated to 1.0 and 0.0
            
        total += labels.size(0)
        correct += (y_preds == labels).sum().item()

In [96]:
accuracy = 100* correct/total
accuracy

96.49122807017544