<a href="https://colab.research.google.com/github/afeef-shaikh/Cancer-Prediction/blob/main/ITM740_Assingment_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Import necessary libraries

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score
import torch
import torch.nn as nn
import torch.optim as optim

Load the dataset

In [None]:
# The dataset has 9 input attributes and a target column named "Class" (10th column).
df = pd.read_csv('bcancer_data.csv')
df

Unnamed: 0,Cl.thickness,Cell.size,Cell.shape,Marg.adhesion,Epith.c.size,Bare.nuclei,Bl.cromatin,Normal.nucleoli,Mitoses,Class
0,1,1,1,1,2,1,2,1,1,0
1,5,1,1,1,1,1,3,1,1,0
2,4,1,1,1,2,1,1,1,1,0
3,2,1,1,1,2,1,2,1,1,0
4,1,2,3,1,2,1,2,1,1,0
...,...,...,...,...,...,...,...,...,...,...
535,1,1,1,1,2,1,1,1,1,0
536,3,1,1,1,2,1,2,3,1,0
537,4,1,1,1,2,1,1,1,1,0
538,2,1,1,1,2,1,1,1,1,0


In [None]:
# Option 1: Use all columns except the target column as features.
# If the file has headers, this will drop the "Class" column.
X = df.drop(columns=['Class'])
y = df['Class']

Split the dataset into training and testing sets
- 80% as training data
- 20% as test data


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


Convert the data to PyTorch tensors


In [None]:
# Convert features to float tensors and target to a float tensor reshaped as a column vector.
X_train = torch.tensor(X_train.values, dtype=torch.float32)
X_test  = torch.tensor(X_test.values, dtype=torch.float32)
y_train = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
y_test  = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)

Define the Neural Network Model


In [None]:
# A simple network with one hidden layer (10 neurons) using ReLU and an output layer.
class BreastCancerNet(nn.Module):
    def __init__(self, input_size):
        super(BreastCancerNet, self).__init__()
        self.fc1 = nn.Linear(input_size, 10)   # Hidden layer with 10 neurons
        self.relu = nn.ReLU()                   # ReLU activation for the hidden layer
        self.fc2 = nn.Linear(10, 1)             # Output layer (1 unit for binary classification)

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

input_size = X_train.shape[1]  # should be 9 if using all attributes
model = BreastCancerNet(input_size)


Define Loss Function and Optimizer


In [None]:
# Using BCEWithLogitsLoss (which combines a sigmoid layer) and SGD optimizer.
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

Train the Model

In [None]:
num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()          # Clear gradients for this training step
    outputs = model(X_train)       # Forward pass
    loss = criterion(outputs, y_train)  # Compute loss
    loss.backward()                # Backward pass
    optimizer.step()               # Update weights

    # Print loss every 10 epochs for monitoring
    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

Epoch [10/100], Loss: 0.4577
Epoch [20/100], Loss: 0.3713
Epoch [30/100], Loss: 0.3148
Epoch [40/100], Loss: 0.2802
Epoch [50/100], Loss: 0.2572
Epoch [60/100], Loss: 0.2403
Epoch [70/100], Loss: 0.2270
Epoch [80/100], Loss: 0.2160
Epoch [90/100], Loss: 0.2066
Epoch [100/100], Loss: 0.1983


Evaluate the Model on Test Data

In [None]:
model.eval()
with torch.no_grad():
    test_outputs = model(X_test)
    # Apply sigmoid to get probabilities
    probabilities = torch.sigmoid(test_outputs)
    # Classify as 1 if probability >= 0.5, else 0
    predicted = (probabilities >= 0.5).float()

# Convert tensors to NumPy arrays for computing metrics
y_true = y_test.numpy()
y_pred = predicted.numpy()

Build the Confusion Matrix and Calculate Metrics

In [None]:
cm = confusion_matrix(y_true, y_pred)
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, zero_division=0)
recall = recall_score(y_true, y_pred, zero_division=0)

# Print the results
print("Confusion Matrix:")
print(cm)
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")

Confusion Matrix:
[[63  1]
 [ 4 40]]
Accuracy: 0.9537
Precision: 0.9756
Recall: 0.9091
