In [45]:
!pip install torch

import torch.nn as nn
import pandas as pd



In [46]:
df = pd.read_csv("bcancer_data.csv")

In [47]:
df.head()

Unnamed: 0,Cl.thickness,Cell.size,Cell.shape,Marg.adhesion,Epith.c.size,Bare.nuclei,Bl.cromatin,Normal.nucleoli,Mitoses,Class
0,1,1,1,1,2,1,2,1,1,0
1,5,1,1,1,1,1,3,1,1,0
2,4,1,1,1,2,1,1,1,1,0
3,2,1,1,1,2,1,2,1,1,0
4,1,2,3,1,2,1,2,1,1,0


In [48]:
df.shape

(540, 10)

In [49]:
df.Class.value_counts()

Class
0    301
1    239
Name: count, dtype: int64

In [50]:
df = df.to_numpy()
df

array([[1, 1, 1, ..., 1, 1, 0],
       [5, 1, 1, ..., 1, 1, 0],
       [4, 1, 1, ..., 1, 1, 0],
       ...,
       [4, 1, 1, ..., 1, 1, 0],
       [2, 1, 1, ..., 1, 1, 0],
       [4, 8, 8, ..., 4, 1, 1]])

In [51]:
X = df[:, :5]   
y = df[:, 9]    


In [52]:
X.shape, y.shape

((540, 5), (540,))

In [53]:
X_sample = X[0]
y_sample = y[0]

print(X_sample, y_sample)


[1 1 1 1 2] 0


In [54]:
import torch
X = torch.from_numpy(X).type(torch.float)
y = torch.from_numpy(y).type(torch.float)

X[:5], y[:5]

(tensor([[1., 1., 1., 1., 2.],
         [5., 1., 1., 1., 1.],
         [4., 1., 1., 1., 2.],
         [2., 1., 1., 1., 2.],
         [1., 2., 3., 1., 2.]]),
 tensor([0., 0., 0., 0., 0.]))

In [55]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


len(X_train), len(y_train), len(X_test), len(y_test)

(432, 432, 108, 108)

In [56]:
import torch
from torch import nn

torch.manual_seed(42)

<torch._C.Generator at 0x11fade390>

In [57]:
class bcancerclassifier(nn.Module):
    def __init__(self):
        super().__init__()

        self.layer_1 = nn.Linear(in_features=5, out_features=10) 
        self.layer_2 = nn.Linear(in_features=10, out_features=1)

        self.relu = nn.ReLU() 

    def forward(self, x):
       return self.layer_2(self.relu(self.layer_1(x)))


In [58]:
model = bcancerclassifier()
print(model)

bcancerclassifier(
  (layer_1): Linear(in_features=5, out_features=10, bias=True)
  (layer_2): Linear(in_features=10, out_features=1, bias=True)
  (relu): ReLU()
)


In [59]:
loss_fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.SGD(params=model.parameters(),lr=0.1)

In [60]:
def accuracy_fn(y_true, y_pred):
    correct = torch.eq(y_true, y_pred).sum().item()
    acc = (correct / len(y_pred)) * 100
    return acc

In [61]:
y_logits = model(X_test)[:5]
y_logits

tensor([[-1.4880],
        [-0.0572],
        [ 0.1132],
        [-0.0221],
        [-0.6182]], grad_fn=<SliceBackward0>)

In [62]:
torch.manual_seed(42)
epochs = 100

X_train, y_train = X_train, y_train
X_test, y_test = X_test, y_test

for epoch in range(epochs):
    y_logits = model(X_train).squeeze()
    y_pred = torch.round(torch.sigmoid(y_logits)) 

    loss = loss_fn(y_logits, y_train) 
    acc = accuracy_fn(y_true=y_train,
                      y_pred=y_pred)

    optimizer.zero_grad()

    loss.backward()

    optimizer.step()

    model.eval()
    with torch.inference_mode():
      test_logits = model(X_test).squeeze()
      test_pred = torch.round(torch.sigmoid(test_logits)) 
      test_loss = loss_fn(test_logits, y_test)
      test_acc = accuracy_fn(y_true=y_test,
                             y_pred=test_pred)

    if epoch % 100 == 0:
        print(f"Epoch: {epoch} | Loss: {loss:.5f}, Accuracy: {acc:.2f}% | Test Loss: {test_loss:.5f}, Test Accuracy: {test_acc:.2f}%")

Epoch: 0 | Loss: 0.94396, Accuracy: 14.12% | Test Loss: 0.88319, Test Accuracy: 40.74%


In [67]:
# Put the model in evaluation mode
model.eval()

# Turn off gradient tracking (same idea as torch.no_grad())
with torch.inference_mode():
    # 1. Pass the test data through the model to get the raw logits
    test_logits = model(X_test).squeeze()

    # 2. Convert logits to probabilities via the sigmoid function
    test_probs = torch.sigmoid(test_logits)

    # 3. Round probabilities to 0 or 1 to obtain predicted class labels
    test_preds = torch.round(test_probs)

    # 4. Calculate the loss on test data (if you want to see how well it does)
    test_loss = loss_fn(test_logits, y_test)

    # 5. Calculate accuracy on test data (or any other metrics)
    test_acc = accuracy_fn(y_true=y_test, y_pred=test_preds)

print(f"Test Loss: {test_loss:.5f}, Test Accuracy: {test_acc:.2f}%")


Test Loss: 0.19180, Test Accuracy: 95.37%


In [71]:
from sklearn.metrics import confusion_matrix

y_true = y_test.numpy()
y_pred = test_preds.numpy()

cm = confusion_matrix(y_true, y_pred)
print("Confusion Matrix:\n", cm)


Confusion Matrix:
 [[62  2]
 [ 3 41]]
