Breast cancer neural net classification

In [275]:
import torch
from torch import nn
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, TensorDataset

Import data

In [276]:
dataset = pd.read_csv('breast-cancer-wisconsin.data', header=None, na_values='?')
dataset.dropna(inplace=True)
dataset = dataset.astype(float)



Split features and labels

In [277]:
X = dataset.iloc[:, 1:-1].values
y = dataset.iloc[:, -1].values

Convert labels from 2-4 to 0-1

In [278]:
y = (y == 4).astype(int)

Determine GPU vs CPU and select device

In [279]:
device = torch.cuda.current_device() if torch.cuda.is_available() else 'cpu'
print(f"Using {device} device")

Using cpu device


Split dataset

In [280]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Normalise features 

In [281]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

Convert to pytorch tensors

In [282]:
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)


Create dataloader

In [283]:
train_dataset = torch.utils.data.TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = torch.utils.data.TensorDataset(X_test_tensor, y_test_tensor)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False)

Define neural net   

In [284]:
class BreastCancerModel(nn.Module):
    def __init__(self, input_size):
        super(BreastCancerModel, self).__init__()
        self.fc1 = nn.Linear(input_size, 16)
        self.bn1 = nn.BatchNorm1d(16)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(16, 1)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        x = self.bn1(self.fc1(x))
        x = self.relu(x)
        x = self.dropout(x)
        x = self.sigmoid(self.fc2(x))
        return x


Model init  

In [285]:
input_size = X_train.shape[1]
model = BreastCancerModel(input_size)

Define loss and optimiser

In [286]:
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)

Train the model

In [294]:
epochs = 2
best_loss = float('inf')
for epoch in range(epochs):
    model.train()
    epoch_loss = 0
    for batch_X, batch_y in train_loader:
        optimizer.zero_grad()
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    
    avg_loss = epoch_loss / len(train_loader)
    print(f'Epoch [{epoch+1}/{epochs}], Loss: {avg_loss:.4f}')
    
    # Early stopping check
    if avg_loss < best_loss:
        best_loss = avg_loss
    else:
        print("Early stopping triggered.")
        break


Epoch [1/2], Loss: 0.2901
Epoch [2/2], Loss: 0.2749


Evaluate the model

In [295]:
model.eval()
with torch.no_grad():
    predictions = model(X_test_tensor)
    predictions = (predictions >= 0.5).float()
    accuracy = (predictions.eq(y_test_tensor).sum().item()) / y_test_tensor.shape[0]
    print(f'Accuracy: {accuracy * 100:.2f}%')

Accuracy: 97.81%
