In [1]:
#Design the model - input size, output size, forward pass
# loss and optimiser
# training loop - forward, backward, weight updation

In [4]:
import torch
from torch import nn as nn
from sklearn import datasets
from sklearn.preprocessing import StandardScaler #to scale
from sklearn.model_selection import train_test_split #to split th dataset into train and test
import numpy as np
import matplotlib.pyplot as plt

In [5]:
bc = datasets.load_breast_cancer() #binary classification problem
x,y = bc.data, bc.target

In [9]:
n_sample, n_features = x.shape
print(f"Number of data samples - {n_sample}, Number of features - {n_features}")

Number of data samples - 569, Number of features - 30


In [11]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=47)

In [14]:
#Scaling the feature
sc = StandardScaler() #zero mean, unit variance (do this when dealing with logistic regression, why?)

x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

In [15]:
x_train = torch.from_numpy(x_train.astype(np.float32))
y_train = torch.from_numpy(y_train.astype(np.float32))
x_test = torch.from_numpy(x_test.astype(np.float32))
y_test = torch.from_numpy(y_test.astype(np.float32))

In [16]:
# reshape y to column vector
y_train = y_train.view(-1,1)
y_test = y_test.view(-1,1)

In [18]:
#check the dimensions of datasets
print(f"X train - {x_train.shape}")
print(f"Y train - {y_train.shape}")
print(f"X test - {x_test.shape}")
print(f"Y test - {y_test.shape}")

X train - torch.Size([455, 30])
Y train - torch.Size([455, 1])
X test - torch.Size([114, 30])
Y test - torch.Size([114, 1])


In [19]:
# Logistic regression function - f = wx+b, sigmoid at the end

class LogisticReg(nn.Module):
    def __init__(self,n_features_input):
        super(LogisticReg,self).__init__()
        self.lin = nn.Linear(in_features=n_features_input,out_features=1)

    def forward(self,x):
        y_pred = torch.sigmoid(self.lin(x))
        return y_pred



In [21]:
model = LogisticReg(x_train.shape[1])
loss_func = nn.BCELoss() #binary cross entropy loss, why??
learning_rate = 0.01
optimiser = torch.optim.SGD(model.parameters(),lr = learning_rate)

epochs = 200

for epoch in range(epochs) :
    #forward pass
    y_pred = model(x_train)
    loss = loss_func(y_pred,y_train)

    #backward oass
    loss.backward()

    #weight updation
    optimiser.step()
    optimiser.zero_grad()

    if epoch%10 ==0:
        print(f"loss at epoch {epoch+1} is {loss.item():.2f}")


loss at epoch 1 is 0.65
loss at epoch 11 is 0.53
loss at epoch 21 is 0.45
loss at epoch 31 is 0.40
loss at epoch 41 is 0.36
loss at epoch 51 is 0.33
loss at epoch 61 is 0.31
loss at epoch 71 is 0.29
loss at epoch 81 is 0.27
loss at epoch 91 is 0.26
loss at epoch 101 is 0.25
loss at epoch 111 is 0.24
loss at epoch 121 is 0.23
loss at epoch 131 is 0.22
loss at epoch 141 is 0.22
loss at epoch 151 is 0.21
loss at epoch 161 is 0.21
loss at epoch 171 is 0.20
loss at epoch 181 is 0.20
loss at epoch 191 is 0.19


In [24]:
#model evaluation

with torch.no_grad() : #we dont want evaluatio to be part of computation graph
    y_pred = model(x_test)
    
    #converting sigmoid outputs into zero and one
    y_pred_classes = y_pred.round() #less than 0.5, class 0. else class 1

    acc = y_pred_classes.eq(y_test).sum() / float(y_test.shape[0]) #eq is equals
    print(f"Model accuracy is {acc:.4f}")

Model accuracy is 0.9561


In [None]:
# try to make it work for test train validation, plot the loss history, accuracy history etc stuff 