# Logistic Regression
---

In [1]:
import torch
import torch.nn as nn
import numpy as np
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

  return f(*args, **kwds)


In [12]:
# Prepare the dataset
cancer_dataset = datasets.load_breast_cancer()
X, y = cancer_dataset.data, cancer_dataset.target
n_samples, n_features = X.shape
print(f"Feature names: {cancer_dataset.feature_names}")
print(f"No. of Samples: {n_samples}\nNo. of Features: {n_features}")
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)
print(f"Original X Shape: {X_train.shape} y Shape: {y_train.shape}")
# Scale the dataset for 0 mean and 1 variance
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# Convert to tensors
X_train = torch.from_numpy(X_train.astype(np.float32))
X_test = torch.from_numpy(X_test.astype(np.float32))
y_train = torch.from_numpy(y_train.astype(np.float32))
y_test = torch.from_numpy(y_test.astype(np.float32))
y_train = y_train.view(y_train.shape[0], 1)
y_test = y_test.view(y_test.shape[0], 1)
print(f"Final X Shape: {X_train.shape} y Shape: {y_train.shape}")

Feature names: ['mean radius' 'mean texture' 'mean perimeter' 'mean area'
 'mean smoothness' 'mean compactness' 'mean concavity'
 'mean concave points' 'mean symmetry' 'mean fractal dimension'
 'radius error' 'texture error' 'perimeter error' 'area error'
 'smoothness error' 'compactness error' 'concavity error'
 'concave points error' 'symmetry error' 'fractal dimension error'
 'worst radius' 'worst texture' 'worst perimeter' 'worst area'
 'worst smoothness' 'worst compactness' 'worst concavity'
 'worst concave points' 'worst symmetry' 'worst fractal dimension']
No. of Samples: 569
No. of Features: 30
Original X Shape: (455, 30) y Shape: (455,)
Final X Shape: torch.Size([455, 30]) y Shape: torch.Size([455, 1])


In [21]:
# Model Building
class LogisticModel(nn.Module):
    def __init__(self, n_input_features):
        super(LogisticModel, self).__init__()
        self.linear = nn.Linear(n_input_features, 1)
        
    def forward(self, x):
        y_pred = torch.sigmoid(self.linear(x))
        return y_pred

In [22]:
model = LogisticModel(n_features)
criterion = nn.BCELoss()
alpha = 0.01
epochs = 200
optimizer = torch.optim.SGD(model.parameters(), lr=alpha)
# Training loop
for epoch in range(epochs):
    # forward pass
    y_pred = model(X_train)
    loss = criterion(y_pred, y_train)
    # Backward pass
    loss.backward()
    # update parameters
    optimizer.step()
    # delete the gradients
    optimizer.zero_grad()
    
    if (epoch+1) % 10 == 0:
        print(f"Epoch: {epoch+1} Loss: {loss.item():.4f}")
    

Epoch: 10 Loss: 0.5762
Epoch: 20 Loss: 0.4713
Epoch: 30 Loss: 0.4059
Epoch: 40 Loss: 0.3612
Epoch: 50 Loss: 0.3285
Epoch: 60 Loss: 0.3033
Epoch: 70 Loss: 0.2833
Epoch: 80 Loss: 0.2668
Epoch: 90 Loss: 0.2530
Epoch: 100 Loss: 0.2412
Epoch: 110 Loss: 0.2309
Epoch: 120 Loss: 0.2219
Epoch: 130 Loss: 0.2140
Epoch: 140 Loss: 0.2069
Epoch: 150 Loss: 0.2004
Epoch: 160 Loss: 0.1946
Epoch: 170 Loss: 0.1893
Epoch: 180 Loss: 0.1844
Epoch: 190 Loss: 0.1799
Epoch: 200 Loss: 0.1757


In [23]:
# Evaluate Model
with torch.no_grad():
    # Since no gradient computation required now, we use no grad to let the graph know.
    y_pred = model(X_test)
    y_pred_class = y_pred.round()
    accuracy = y_pred_class.eq(y_test).sum() / float(y_test.shape[0])
    print(f"Accuracy: {accuracy:.4f}")

Accuracy: 0.9298


## Note -
- `Fit transform` and `Transform` use cases. Since train and test mean and variance are different. How this works?