# Logistic Regression

We have spent the last couple of notebooks already implementing linear regression, we will follow the same training pipeline steps and implement logistic regression now. It should be quite similar to the previous example, with only slight modifications.

In [1]:
# Imports
import torch
import torch.nn as nn
import numpy as np
from sklearn import datasets  # Binary classification dataset
from sklearn.preprocessing import StandardScaler  # Scale features
from sklearn.model_selection import train_test_split  # Split data

In [2]:
# DATASET GENERATION
# Binary classification problem based on the popular breast cancer dataset
bc = datasets.load_breast_cancer()
x, y = bc.data, bc.target  # Features and labels

n_samples, n_features = x.shape
print(n_samples, n_features)

x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.2, random_state=1234)
# 20% of the data will be used for testing

# Scale the features to have 0 mean and unit variance
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

# convert to torch tensors
x_train = torch.from_numpy(x_train.astype(np.float32))
x_test = torch.from_numpy(x_test.astype(np.float32))
y_train = torch.from_numpy(y_train.astype(np.float32))
y_test = torch.from_numpy(y_test.astype(np.float32))

# reshape y_train and y_test to be 2D tensors
y_train = y_train.view(y_train.shape[0], 1)  # Make it a column vector
y_test = y_test.view(y_test.shape[0], 1)  # Make it a column vector


# MODEL SETUP
class LogisticRegression(nn.Module):
    def __init__(self, n_input_features):
        super(LogisticRegression, self).__init__()
        # One output only as it is binary classification
        self.linear = nn.Linear(n_input_features, 1)

    def forward(self, x):
        # Sigmoid activation function
        y_predicted = torch.sigmoid(self.linear(x))
        return y_predicted


model = LogisticRegression(n_features)


# LOSS AND OPTIMIZER
learning_rate = 0.01
loss = nn.BCELoss()  # Binary cross entropy loss
# Stochastic gradient descent
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

# TRAINING LOOP
num_epochs = 500
for epoch in range(num_epochs):
    # forward pass
    y_predicted = model(x_train)
    # loss calculation
    loss_value = loss(y_predicted, y_train)
    # backward pass
    loss_value.backward()
    # update parameters
    optimizer.step()
    # zero the gradients
    optimizer.zero_grad()

    if (epoch + 1) % 50 == 0:
        print(f'Epoch {epoch+1}: Loss = {loss_value.item():.4f}')


# Evaluate the model
# This shouldn't be part of the computational graph
with torch.no_grad():
    y_predicted = model(x_test)
    # Predicted classes
    # Since sigmoid returns values b/w 0 and 1
    y_predicted_cls = y_predicted.round()

    # divide by number of test samples
    accuracy = y_predicted_cls.eq(y_test).sum() / float(y_test.shape[0])
    print(f'Accuracy: {accuracy * 100:.4f}%')


569 30
Epoch 50: Loss = 0.3225
Epoch 100: Loss = 0.2403
Epoch 150: Loss = 0.2001
Epoch 200: Loss = 0.1753
Epoch 250: Loss = 0.1580
Epoch 300: Loss = 0.1453
Epoch 350: Loss = 0.1354
Epoch 400: Loss = 0.1275
Epoch 450: Loss = 0.1209
Epoch 500: Loss = 0.1155
Accuracy: 92.9825%
