In [10]:
#pytorch pipeline
# 1) Design the model(input,output size, forward prop)
# 2) loss and optimizer
# 3) Train the model
#  -> forward prop = prediction
#  -> loss and gradient
#  -> update weights

import torch
import torch.nn as nn
import numpy as np
from sklearn import datasets
from sklearn.preprocessing import StandardScaler   #bcz we want to scale our features
from sklearn.model_selection import train_test_split  # bcz we want to split our data for train and testing

# 0) preprocessing of the data
bc = datasets.load_breast_cancer() #breast cancer dataset is a binary classification problem
X, y= bc.data, bc.target

n_samples,n_features = X.shape
#split our data
X_train, x_test,y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=1234)
#scale our features will make our features to have zero mean and unit variance. this is always needed when we deal with logistic regression

sc = StandardScaler()
X_train = sc.fit_transform(X_train)
x_test = sc.fit_transform(x_test)

#convert our data to torch tensor
X_train = torch.from_numpy(X_train.astype(np.float32))
x_test = torch.from_numpy(x_test.astype(np.float32))
y_train = torch.from_numpy(y_train.astype(np.float32))
y_test = torch.from_numpy(y_test.astype(np.float32))

y_train = y_train.view(y_train.shape[0],1)
y_test = y_test.view(y_test.shape[0],1)

# 1) model
# f= w*x + b, sigmoid at the end
#we need to derive our own class
class LogisticRegression(nn.Module):
  def __init__(self, n_input_features):
    super(LogisticRegression, self).__init__()
    self.linear = nn.Linear(n_input_features,1) #1=outputfeaturesno.


  def forward(self,x):
    y_predicted = torch.sigmoid(self.linear(x))
    return y_predicted

model = LogisticRegression(n_features)


# 2) loss and optimizer\
learning_rate = 0.01
criterion = nn.BCELoss()  #binary cross entropy loss
optimizer = torch.optim.SGD(params = model.parameters(),lr = learning_rate)

# 3) training loop
iterations= 100
for epoch in range(iterations):
  #forwardprop = prediction
  y_pred = model(X_train)

  #loss
  loss = criterion(y_pred,y_train)

  #gradients
  loss.backward()

  #update the weights
  optimizer.step()

  #zero grad
  optimizer.zero_grad()
  
  if((epoch+1)%10==0):
    
    print(f'epoch= {epoch+1}, loss= {loss:.3f}')
    

with torch.no_grad():
  y_predicted = model(x_test)
  y_predicted_cls = y_predicted.round()  #if we had not written touch.nograd, then this statement would have changed the gradients in this y_predicted
  #we need accuracy of this y_predicted_cls
  accuracy = y_predicted_cls.eq(y_test).sum() / float(y_test.shape[0])
  print(f'accuracy = {accuracy:.4f}')




epoch= 10, loss= 0.578
epoch= 20, loss= 0.474
epoch= 30, loss= 0.408
epoch= 40, loss= 0.362
epoch= 50, loss= 0.329
epoch= 60, loss= 0.303
epoch= 70, loss= 0.283
epoch= 80, loss= 0.266
epoch= 90, loss= 0.252
epoch= 100, loss= 0.240
accuracy = 0.8860
