In [1]:
# 1) Design model (input size, output size, forward pass)
# 2) Costruct the Loss and Optimizer
# 3) Training loop
#       - Forward pass: compute prediction
#       - Backward pass: compure gradients
#       - Update our weights
#       - Iterate this a couple of times (epochs)

import torch
import torch.nn as nn
import numpy as np
from sklearn import datasets
from sklearn.preprocessing import StandardScaler # we want to scale our features
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [3]:
# 0) Prepare the data

bc = datasets.load_breast_cancer() # This is a binary classification problem
X, y = bc.data, bc.target

print(X.shape) # 569 samples and 30 features [aka independent variables]
n_samples, n_features = X.shape

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234) # 20% test size

# scale our features (PS: we do not do this for Y as it it will be applied the Sigmoid function)
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test) # fit_transform VS transform: https://towardsdatascience.com/what-and-why-behind-fit-transform-vs-transform-in-scikit-learn-78f915cf96fe

X_train = torch.from_numpy(X_train.astype(np.float32))
X_test = torch.from_numpy(X_test.astype(np.float32))
y_train = torch.from_numpy(y_train.astype(np.float32))
y_test = torch.from_numpy(y_test.astype(np.float32))

# Reshape Y - From (r, ) to (r, 1)
y_train = y_train.view(y_train.shape[0], 1)
y_test = y_test.view(y_test.shape[0], 1)


(569, 30)


In [4]:
# 1) Model

# f = weight*x + bias (Then apply a sigmoid function at the end)
class LogisticRegression(nn.Module):

    def __init__(self, n_input_features):
        super(LogisticRegression, self).__init__()
        self.linear = nn.Linear(n_input_features, 1) # output size is just 1

    def forward(self, x):
        y_predicted = torch.sigmoid(self.linear(x))
        return y_predicted

model = LogisticRegression(n_features)

In [5]:
# 2) Loss and Optimizer

learning_rate = 0.01
criterion = nn.BCELoss() # Binary Cross-entropy loss
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [6]:
# 3) Training Loop

num_epochs = 100

for epoch in range(num_epochs):
    # Forward pass and loss
    y_predicted = model(X_train)
    loss = criterion(y_predicted, y_train)

    # Backward pass (backpropagation)
    loss.backward()

    # update
    optimizer.step() # update the weights
    optimizer.zero_grad() # empty the gradients

    if (epoch + 1) % 10 == 0:
        print(f"epoch: {epoch+1}, loss: {loss.item():.4f}")

epoch: 10, loss: 0.6862
epoch: 20, loss: 0.5371
epoch: 30, loss: 0.4497
epoch: 40, loss: 0.3927
epoch: 50, loss: 0.3523
epoch: 60, loss: 0.3220
epoch: 70, loss: 0.2983
epoch: 80, loss: 0.2792
epoch: 90, loss: 0.2633
epoch: 100, loss: 0.2499


In [7]:
# Evaluate our model

# The evaluation should not be part of our computation graph (calculate loss and udpdate weights and gradients). That's why we do the below!
with torch.no_grad():
    y_predicted = model(X_test) # we will get values between 0 and 1 (sigmoid)
    y_predicted_class = y_predicted.round()
    accuracy = y_predicted_class.eq(y_test).sum() / float(y_test.shape[0])  # eq() = equal function
    print(f'accuracy = {accuracy:.4f}')

accuracy = 0.9123
