<a href="https://colab.research.google.com/github/ajibigad/ML-Playground/blob/main/breast_cancer_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd

from google.colab import files

_ = files.upload()

Saving breast-cancer.csv to breast-cancer.csv


In [2]:
from sklearn.preprocessing import LabelEncoder

# Load CSV file into a pandas DataFrame
df = pd.read_csv("breast-cancer.csv")

# Convert M -> 1 and B -> 0
label_encoder = LabelEncoder()
df['diagnosis'] = label_encoder.fit_transform(df['diagnosis'])

X = df.drop(['id', 'diagnosis'], axis=1).values
y = df['diagnosis'].values

In [6]:
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert the data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)  # Reshape targets to rank 2 tensor eg. from (455) to (455, 1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)  # Reshape targets to (455, 1)

# Z-score normalization
mean = X_train_tensor.mean(dim=0, keepdim=True)
std = X_train_tensor.std(dim=0, keepdim=True)

X_train_normalized = (X_train_tensor - mean) / std
X_test_normalized = (X_test_tensor - mean) / std

In [22]:
import pdb

def sigmoid(x): return 1/(1+torch.exp(-x))

def abs_mean_loss(predictions, targets):
  # // abs mean or mse but we did abs mean loss here
  # // for every prediction, if the target is 1, then difference is 1-pred, else difference is abs(0-pred)
  # // predictions will always be between 0 and 1 due to sigmond function
  return torch.where(targets==1, 1-predictions, predictions).mean()

def mse(preds, targets): return ((preds-targets)**2).mean()

def batch_accuracy(predictions, targets):
  with torch.no_grad():
    correct = (predictions>=0.5) == targets
    return correct.float().mean()

def validate_epoch(model, val_data, val_target):
    accs = [batch_accuracy(model.predict(xb), yb) for xb,yb in zip(val_data, val_target)]
    return round(torch.stack(accs).mean().item(), 4)

class LinearModel():
  def __init__(self, weight, bias, x, y, x_validate, y_validate, loss_fn, metrics):
    self.weight = weight
    self.bias = bias
    self.x = x
    self.y = y
    self.x_validate = x_validate
    self.y_validate = y_validate
    self.params = self.weight, self.bias
    self.loss_fn = loss_fn
    self.metrics = metrics

  def predict(self, x):
    # z(x) = wx + b
    # y = g(z(x)) where g is an activation function
    z = x@self.weight + self.bias

    # apply activation function
    return sigmoid(z)

  def step(self, loss, learning_rate):
    # get loss
    # calculate gradient, loss.backward()
    # update parameters using learning rate
    loss.backward()
    for p in self.params:
      p.data -= p.grad.data * learning_rate
      p.grad.zero_()

  def train(self, epoch, learning_rate):
    for i in range(epoch):
      preds = self.predict(self.x)
      try:
        loss = self.loss_fn(preds, self.y)
      except:
        print(preds.view(-1).tolist())
        raise
      accuracy = self.metrics(preds, self.y)
      validation = validate_epoch(self, self.x_validate, self.y_validate)
      print(f"epoch {i}: loss: {loss}, accuracy: {accuracy}, validation: {validation}")
      self.step(loss, learning_rate)

In [32]:
def init_params(size, std=1.0): return (torch.randn(size)*std).requires_grad_()

w = init_params((X_train_normalized.shape[1], 1)) # size number of
b = init_params(1)

model = LinearModel(w, b, X_train_normalized, y_train_tensor, X_test_normalized, y_test_tensor, nn.BCELoss(), batch_accuracy)
model.train(epoch=50, learning_rate=1e-1)

epoch 0: loss: 1.4885520935058594, accuracy: 0.5692307949066162, validation: 0.5614
epoch 1: loss: 1.2992156744003296, accuracy: 0.6000000238418579, validation: 0.5965
epoch 2: loss: 1.1410770416259766, accuracy: 0.6351648569107056, validation: 0.6404
epoch 3: loss: 1.009139060974121, accuracy: 0.6659340858459473, validation: 0.6491
epoch 4: loss: 0.899129331111908, accuracy: 0.692307710647583, validation: 0.6579
epoch 5: loss: 0.8081143498420715, accuracy: 0.7340659499168396, validation: 0.7018
epoch 6: loss: 0.7330877780914307, accuracy: 0.7648351788520813, validation: 0.7368
epoch 7: loss: 0.6708152890205383, accuracy: 0.797802209854126, validation: 0.7982
epoch 8: loss: 0.6185960173606873, accuracy: 0.8175824284553528, validation: 0.807
epoch 9: loss: 0.5743277668952942, accuracy: 0.8285714387893677, validation: 0.8158
epoch 10: loss: 0.5363526940345764, accuracy: 0.8483516573905945, validation: 0.8158
epoch 11: loss: 0.5033795237541199, accuracy: 0.8527472615242004, validation: 0.

In [33]:
model.params

(tensor([[ 0.0575],
         [ 0.6589],
         [-0.1864],
         [ 2.2274],
         [ 0.8819],
         [-0.2817],
         [ 0.1732],
         [ 0.7158],
         [ 1.7172],
         [-0.0216],
         [ 1.1601],
         [-1.0381],
         [ 1.4504],
         [-0.1992],
         [ 0.3045],
         [-1.3961],
         [ 0.4994],
         [-1.2522],
         [-1.0119],
         [-1.0024],
         [ 0.3183],
         [ 1.0799],
         [ 0.0771],
         [ 1.9786],
         [ 0.8255],
         [ 0.5805],
         [ 0.7644],
         [-0.6810],
         [-0.6051],
         [ 0.7109]], requires_grad=True),
 tensor([-0.4543], requires_grad=True))