In [None]:
# Logistic regression
# Pytorch pipeline
# 1) design model (input size, output size, forward pass)
# 2) Construct loss and optimizer
# 3) Set up training loop
#     - forward pass: compute prediction and loss
#     - backward pass: gradient calculation
#     - update weights
# 4) Evaluate

import random
import torch
import torch.nn as nn
import numpy as np
import sklearn.datasets as datasets
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# 0) prepare data

bc = datasets.load_breast_cancer()
X, Y = bc.data, bc.target
n_samples, n_features = X.shape
random_number = int(random.random()*100)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=random_number)

# -- Data standardization is the process of rescaling the attributes so that they have mean as 0 and 
#    variance as 1.
# -- The ultimate goal to perform standardization is to bring down all the features to a common scale 
#    without distorting the differences in the range of the values.
# https://towardsdatascience.com/what-and-why-behind-fit-transform-vs-transform-in-scikit-learn-78f915cf96fe
sc = StandardScaler()

# The fit part is calculating the mean and variance of each of the features present in our data.
# The transform part is transforming all the features using the respective mean and variance.
X_train = sc.fit_transform(X_train)

# Using the transform method we can use the same mean and variance as it is calculated from our training data to transform our test data
X_test = sc.transform(X_test)

X_train = torch.from_numpy(X_train.astype(np.float32))
X_test = torch.from_numpy(X_test.astype(np.float32))
Y_train = torch.from_numpy(Y_train.astype(np.float32))
Y_test = torch.from_numpy(Y_test.astype(np.float32))

Y_train = Y_train.view(Y_train.shape[0], 1)
Y_test = Y_test.view(Y_test.shape[0], 1)

# 1) design model (input size, output size, forward pass)
# f(x) = wx + b, sigmoid at the end.

class LogisticRegression(nn.Module):
    def __init__(self, n_input_features):
        super(LogisticRegression, self).__init__()
        self.linear = nn.Linear(n_input_features, 1)

    def forward(self, x):
        # linear followed by sigmoid
        y_predicted = torch.sigmoid(self.linear(x))
        return y_predicted

model = LogisticRegression(n_features)

# 2) Construct loss and optimizer

learning_rate = 0.01
# cross entropy works better for classification problems
criterion = nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

# 3) Set up training loop
#     - forward pass: compute prediction and loss
#     - backward pass: gradient calculation
#     - update weights

n_iters = 100
for epoch in range(n_iters):
#     - forward pass: compute prediction and loss
    y_predicted = model(X_train)
    loss = criterion(y_predicted, Y_train)
    
#     - backward pass: gradient calculation
    loss.backward()

#     - update weights
    optimizer.step()
    optimizer.zero_grad()

    if epoch % 10 == 0:
        print(f"epoch {epoch}, loss = {loss.item():.4f}")

# 4) Evaluate the model
with torch.no_grad():
    y_predicted = model(X_test)
    y_predicted_cls = y_predicted.round()
    accuracy = y_predicted_cls.eq(Y_test).sum() / float(Y_test.shape[0])

    print(f"Accuracy is {accuracy:4f} %")
