## Breast cancer prediction

In [2]:
import torch
import torch.nn as nn
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

### Load and preprocess breast cancer dataset

In [3]:
"""Load and preprocess breast cancer dataset."""
# Load dataset
data = load_breast_cancer()
X, y = data.data, data.target

### Understand inputs

In [4]:
X.shape

(569, 30)

In [5]:
X[0, :]

array([1.799e+01, 1.038e+01, 1.228e+02, 1.001e+03, 1.184e-01, 2.776e-01,
       3.001e-01, 1.471e-01, 2.419e-01, 7.871e-02, 1.095e+00, 9.053e-01,
       8.589e+00, 1.534e+02, 6.399e-03, 4.904e-02, 5.373e-02, 1.587e-02,
       3.003e-02, 6.193e-03, 2.538e+01, 1.733e+01, 1.846e+02, 2.019e+03,
       1.622e-01, 6.656e-01, 7.119e-01, 2.654e-01, 4.601e-01, 1.189e-01])

In [6]:
y.shape

(569,)

In [7]:
y[0]

np.int64(0)

 ### Split dataset into training and testing

In [8]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=1234
)

In [9]:
X_train.shape

(455, 30)

In [10]:
X_test.shape

(114, 30)

### Scale fetures
Scaling features, as done in the code with StandardScaler, transforms the input data so that each feature has a mean of 0 and a standard deviation of 1. This is also known as standardization. The purpose of scaling features in this context is to:

- Improve Model Convergence: Many machine learning algorithms, including neural networks optimized with gradient-based methods like SGD, converge faster when features are on a similar scale. Unscaled features with different ranges can cause gradients to vary widely, slowing down or destabilizing training.
- Ensure Fair Feature Influence: Features with larger numerical ranges could disproportionately influence the model compared to features with smaller ranges. Standardization ensures all features contribute equally to the model's predictions.
- Enhance Numerical Stability: Large or highly variable feature values can lead to numerical instability in computations, especially in deep learning frameworks like PyTorch. Scaling mitigates this risk.

In [11]:
# Scale features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Convert to PyTorch tensors
X_train = torch.from_numpy(X_train.astype(np.float32))
X_test = torch.from_numpy(X_test.astype(np.float32))
y_train = torch.from_numpy(y_train.astype(np.float32)).view(-1, 1)
y_test = torch.from_numpy(y_test.astype(np.float32)).view(-1, 1)

In [12]:
X_train.shape

torch.Size([455, 30])

In [13]:
X_train[0,:]

tensor([-0.3618, -0.2652, -0.3172, -0.4671,  1.8038,  1.1817, -0.5169,  0.1065,
        -0.3901,  1.3914,  0.1437, -0.1208,  0.1601, -0.1326, -0.5863, -0.1248,
        -0.5787,  0.1091, -0.2819, -0.1889, -0.2571, -0.2403, -0.2442, -0.3669,
         0.5449,  0.2481, -0.7109, -0.0797, -0.5280,  0.2506])

### Binary Classifier model

In [14]:
class BinaryClassifier(nn.Module):
    """Simple neural network for binary classification."""
    def __init__(self, input_features):
        super(BinaryClassifier, self).__init__()
        self.linear = nn.Linear(input_features, 1)
    
    def forward(self, x):
        return torch.sigmoid(self.linear(x))

In [15]:
X_train.shape

torch.Size([455, 30])

### show binary classification model 
- the number of input features
- the number of output features

In [16]:
n_features = X_train.shape[1]
model = BinaryClassifier(n_features)
model

BinaryClassifier(
  (linear): Linear(in_features=30, out_features=1, bias=True)
)

### Train the model with given parameters.

- forward pass: prediction
- loss: error
- autograd: weight change direction
- stochastic gradient descent (optimizer): update weights
- optimizer.zero_grad()

In [17]:
num_epochs=100
learning_rate=0.01

"""Train the model with given parameters."""
criterion = nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

for epoch in range(num_epochs):
    # Forward pass
    y_pred = model(X_train)
    loss = criterion(y_pred, y_train)
    
    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    # Log progress
    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


Epoch [10/100], Loss: 0.4627
Epoch [20/100], Loss: 0.4105
Epoch [30/100], Loss: 0.3721
Epoch [40/100], Loss: 0.3424
Epoch [50/100], Loss: 0.3186
Epoch [60/100], Loss: 0.2990
Epoch [70/100], Loss: 0.2825
Epoch [80/100], Loss: 0.2683
Epoch [90/100], Loss: 0.2560
Epoch [100/100], Loss: 0.2452


### Evaluate model performance on test set

In [18]:
with torch.no_grad():
    y_pred = model(X_test)
    y_pred_classes = y_pred.round() # Values 𝑥 ≥ 0.5 are rounded to 1, else  0
    accuracy = y_pred_classes.eq(y_test).sum() / float(y_test.shape[0])
    print(f'\nTest Accuracy: {accuracy:.4f}')


Test Accuracy: 0.8947
