Logistic Regression

In [279]:
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification



In [280]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = 'cpu'

Create Dataset

In [281]:
X, y = make_classification(
    n_samples = 1000, #set num samples
    n_features = 17,
    n_informative = 10,
    n_redundant = 7,
    n_classes = 2,
    random_state=21
)


In [282]:
X.shape

n_samples, n_features = X.shape

Create Training and test sets

In [283]:
X_train, X_test,y_train, y_test = train_test_split(X,y, train_size=0.8, random_state=42)

#scale data

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

Convert numpy arrays to tensors

In [284]:
X_train = torch.from_numpy(X_train).type(torch.float32).to(device)
X_test = torch.from_numpy(X_test).type(torch.float32).to(device)
y_train = torch.from_numpy(y_train).type(torch.float32).view(-1,1).to(device) #need to convert y to single column
y_test = torch.from_numpy(y_test).type(torch.float32).view(-1,1).to(device)

Create class for Logistic Regression

In [285]:
class LogisticRegression(nn.Module):
    def __init__(self, n_input_features):
        super(LogisticRegression, self).__init__()
        self.linear1 = nn.Linear(n_input_features,20)
        self.linear2 = nn.Linear(20,1) #only want one output hence the reshape of y_train and test
        #use activation function
        self.elu = nn.ELU() #alwats call elu first within constructor and not within forward passs, sigmoid can call in forward pass

#very commoon to put elu between two linear layers as it delinerises data
    def forward(self,x):
        x = self.elu(self.linear1(x)) #run the x throuygh the elu activation function
        y_predicted = torch.sigmoid(self.linear2(x)) #pass x to the y_predicted thpoguh the sigmoid function
        return y_predicted

In [286]:
model = LogisticRegression(n_features).to(device)

In [287]:
criterion = nn.BCELoss() #binary cross entropy
optimiser = torch.optim.SGD(model.parameters(), lr = 0.1)

Training Loop

In [288]:
num_epochs = 1000

for epoch in range(num_epochs):
    model.train()

    y_predicted = model(X_train)

    loss = criterion(y_predicted, y_train) #difference between test and train
    
    
    #ack pass
    loss.backward()
    optimiser.step()
    optimiser.zero_grad()

    if (epoch + 1)%100 == 0:
        print(f'epoch {epoch+1}, loss = {loss.item(): .2f}')

epoch 100, loss =  0.31
epoch 200, loss =  0.26
epoch 300, loss =  0.22
epoch 400, loss =  0.19
epoch 500, loss =  0.17
epoch 600, loss =  0.15
epoch 700, loss =  0.14
epoch 800, loss =  0.13
epoch 900, loss =  0.12
epoch 1000, loss =  0.12


Evaluate the Model

In [289]:
model.eval()

with torch.inference_mode():
    y_predicted = model(X_test)
    y_predicted_cls = y_predicted.round()

    acc = y_predicted_cls.eq(y_test).sum()/float(y_test.shape[0])

    #.eq is a boolean comparator therefore if y_test is the same as y_predicted then will retrun true then this is divided by the shape of y_test which is the 
    #20% of the data defined in the test train split function 

    print(f'accuracy: {acc:.2f}')

accuracy: 0.89
