In [1]:
import os
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

In [2]:
import torch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from torch.utils.data import Dataset, DataLoader
from torch import nn

In [3]:
import kagglehub
path = kagglehub.dataset_download("uciml/breast-cancer-wisconsin-data")

Using Colab cache for faster access to the 'breast-cancer-wisconsin-data' dataset.


In [4]:
df = pd.read_csv(path + "/data.csv")

In [5]:
df.drop(columns=["Unnamed: 32", "id"], inplace=True)

In [6]:
X_train, X_test, y_train, y_test = train_test_split(df.iloc[:, 1:], df.iloc[:,0], test_size=0.2)

In [7]:
y_test

Unnamed: 0,diagnosis
215,M
309,B
460,M
181,M
80,B
...,...
558,B
527,B
367,B
16,M


In [8]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [9]:
encoder = LabelEncoder()
y_train = encoder.fit_transform(y_train)
y_test = encoder.transform(y_test)

In [11]:
X_train_tensor = torch.from_numpy(X_train.astype(np.float32))
X_test_tensor = torch.from_numpy(X_test.astype(np.float32))
y_train_tensor = torch.from_numpy(y_train.astype(np.float32))
y_test_tensor = torch.from_numpy(y_test.astype(np.float32))

In [12]:
X_train_tensor.shape, y_test_tensor.shape

(torch.Size([455, 30]), torch.Size([114]))

In [13]:
class MySimpleNN(nn.Module):
  def __init__(self, num_features):
    super().__init__()
    self.linear = nn.Linear(num_features, 1)
    self.sigmoid = nn.Sigmoid()

  def forward(self, features):
      x = self.linear(features)
      x = self.sigmoid(x)
      return x

  # def loss_function(self, y_pred, y):
  #   epsilon = 1e-7
  #   y_pred = torch.clamp(y_pred, epsilon, 1 - epsilon)
  #   return -(y * torch.log(y_pred) + (1 - y) * torch.log(1 - y_pred)).mean()

In [14]:
# Important Parameters
learning_rate = 0.1
epochs= 25

In [16]:
model = MySimpleNN(X_train_tensor.shape[1])

optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

for epoch in range(epochs):

    # forward pass
    y_pred = model(X_train_tensor)

    # loss
    loss = nn.BCELoss()(y_pred.squeeze(), y_train_tensor)

    # backward pass
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    print(f"Epoch: {epoch+1}, Loss: {loss.item()}")


Epoch: 1, Loss: 0.797137439250946
Epoch: 2, Loss: 0.5638657212257385
Epoch: 3, Loss: 0.44535109400749207
Epoch: 4, Loss: 0.3781226575374603
Epoch: 5, Loss: 0.334795206785202
Epoch: 6, Loss: 0.3042648136615753
Epoch: 7, Loss: 0.2814081609249115
Epoch: 8, Loss: 0.2635405957698822
Epoch: 9, Loss: 0.24911348521709442
Epoch: 10, Loss: 0.23716744780540466
Epoch: 11, Loss: 0.22707462310791016
Epoch: 12, Loss: 0.21840593218803406
Epoch: 13, Loss: 0.2108578234910965
Epoch: 14, Loss: 0.2042090743780136
Epoch: 15, Loss: 0.19829455018043518
Epoch: 16, Loss: 0.19298823177814484
Epoch: 17, Loss: 0.18819227814674377
Epoch: 18, Loss: 0.18382951617240906
Epoch: 19, Loss: 0.17983804643154144
Epoch: 20, Loss: 0.17616768181324005
Epoch: 21, Loss: 0.17277735471725464
Epoch: 22, Loss: 0.1696329265832901
Epoch: 23, Loss: 0.1667058765888214
Epoch: 24, Loss: 0.1639721393585205
Epoch: 25, Loss: 0.16141119599342346


In [22]:
# Model Evaluation
with torch.no_grad():
    y_pred = model(X_test_tensor)
    y_pred = (y_pred > 0.6).float()
    accuracy = (y_pred == y_test_tensor).float().mean()
    print(f"Accuracy: {accuracy.item()}")

Accuracy: 0.5409356951713562
