## In this notebook an implementation of a logistic regression model is presented.

In [1]:
import torch
import torch.nn as nn
import numpy as np
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

## Step 0: Data preparation

In [2]:
bc = datasets.load_breast_cancer()
X, y = bc.data, bc.target

n_samples, n_features = X.shape
print(f"Dataset consists of {n_samples} samples and {n_features} features.")

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)

# scale the data - after the operation, they will have mean=0 and variance=1
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)
#convert the data from NumPy ndarrays to PyTorch tensors
X_train = torch.from_numpy(X_train.astype(np.float32))
X_test = torch.from_numpy(X_test.astype(np.float32))
y_train = torch.from_numpy(y_train.astype(np.float32))
y_test = torch.from_numpy(y_test.astype(np.float32))
# make y_train and y_test column vectors
y_train = y_train.view(y_train.shape[0],1)
y_test = y_test.view(y_test.shape[0],1)

Dataset consists of 569 samples and 30 features.


## Step 1: Design model
- input size
- output size
- forward pass

In logistic regression, our function looks as following:<br>
*f = wx + b*,<br>and we apply a **sigmoid** function to it at the end.<br>
Because of that, we need to make our own model.

In [3]:
class LogisticRegressionModel(nn.Module):
    
    def __init__(self, n_input_features):
        super(LogisticRegressionModel, self).__init__()
        self.linear = nn.Linear(n_input_features, 1)
        
    def forward(self, x):
        y_predicted = torch.sigmoid(self.linear(x))
        return y_predicted

## Step 2: Construct loss and optimizer

In [4]:
model = LogisticRegressionModel(n_features)
learning_rate = 0.2
loss_fcn = nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

## Step 3: Training loop
- forward pass: compute prediction and loss
- backward pass: compute gradients
- update weights

In [5]:
epochs = 300

for epoch in range(epochs+1):
    # forward pass and loss computation
    y_predicted = model(X_train)
    loss = loss_fcn(y_predicted, y_train)
    # backward pass
    loss.backward()
    # updates
    optimizer.step()
    # zero gradients
    optimizer.zero_grad()
    
    if (epoch % 10) == 0:
        print(f"Epoch: {epoch}, loss = {loss.item():.8f}")
        
with torch.no_grad():
    y_predicted = model(X_test)  # test run to evaluate the model via the accurracy criterion
    y_predicted_cls = y_predicted.round()  # conversion to class labels - 0 (no breast cancer) or 1 (breast cancer) - sigmoid function returns probability between 0 and 1, so we want to round it go get the answer
    acc = y_predicted_cls.eq(y_test).sum() / float(y_test.shape[0])
    print(f"Accurracy: {acc:.4f}")
    
    

Epoch: 0, loss = 0.78482479
Epoch: 10, loss = 0.17491864
Epoch: 20, loss = 0.12759049
Epoch: 30, loss = 0.10691942
Epoch: 40, loss = 0.09497438
Epoch: 50, loss = 0.08698919
Epoch: 60, loss = 0.08116893
Epoch: 70, loss = 0.07667901
Epoch: 80, loss = 0.07307404
Epoch: 90, loss = 0.07009244
Epoch: 100, loss = 0.06756939
Epoch: 110, loss = 0.06539532
Epoch: 120, loss = 0.06349424
Epoch: 130, loss = 0.06181150
Epoch: 140, loss = 0.06030681
Epoch: 150, loss = 0.05894960
Epoch: 160, loss = 0.05771627
Epoch: 170, loss = 0.05658824
Epoch: 180, loss = 0.05555066
Epoch: 190, loss = 0.05459152
Epoch: 200, loss = 0.05370095
Epoch: 210, loss = 0.05287078
Epoch: 220, loss = 0.05209415
Epoch: 230, loss = 0.05136529
Epoch: 240, loss = 0.05067926
Epoch: 250, loss = 0.05003184
Epoch: 260, loss = 0.04941937
Epoch: 270, loss = 0.04883870
Epoch: 280, loss = 0.04828704
Epoch: 290, loss = 0.04776196
Epoch: 300, loss = 0.04726129
Accurracy: 0.9649
