<a href="https://colab.research.google.com/github/ajibigad/ML-Playground/blob/main/breast_cancer_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [13]:
import os
import pandas as pd

from google.colab import files, drive
import shutil

if not os.path.ismount('/content/drive'):
    print("Mounting Google Drive...")
    drive.mount('/content/drive')
else:
    print("Google Drive is already mounted.")

destination = "/content/drive/MyDrive/Colab Notebooks/breast-cancer.csv"  # Set the destination path in your Drive

if not os.path.exists(destination):
  uploaded = files.upload()
  filename = list(uploaded.keys())[0]  # Get the uploaded filename
  shutil.move(filename, destination)
  print(f"Uploaded file '{filename}' has been moved to Google Drive at '{destination}'")
else:
  print(f"{destination} already exists")

Google Drive is already mounted.
/content/drive/MyDrive/Colab Notebooks/breast-cancer.csv already exists


In [14]:
from sklearn.preprocessing import LabelEncoder

# Load CSV file into a pandas DataFrame
df = pd.read_csv(destination)

# Convert M -> 1 and B -> 0
label_encoder = LabelEncoder()
df['diagnosis'] = label_encoder.fit_transform(df['diagnosis'])

X = df.drop(['id', 'diagnosis'], axis=1).values
y = df['diagnosis'].values

In [15]:
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert the data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)  # Reshape targets to rank 2 tensor eg. from (455) to (455, 1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)  # Reshape targets to (455, 1)

# Z-score normalization
mean = X_train_tensor.mean(dim=0, keepdim=True)
std = X_train_tensor.std(dim=0, keepdim=True)

X_train_normalized = (X_train_tensor - mean) / std
X_test_normalized = (X_test_tensor - mean) / std

In [16]:
import pdb
import numpy as np

def sigmoid(x): return 1/(1+torch.exp(-x))

def abs_mean_loss(predictions, targets):
  # // abs mean or mse but we did abs mean loss here
  # // for every prediction, if the target is 1, then difference is 1-pred, else difference is abs(0-pred)
  # // predictions will always be between 0 and 1 due to sigmond function
  return torch.where(targets==1, 1-predictions, predictions).mean()

def mse(preds, targets): return ((preds-targets)**2).mean()

def batch_accuracy(predictions, targets):
  with torch.no_grad():
    correct = (predictions>=0.5) == targets
    return correct.float().mean()

def validate_epoch(model, val_data, val_target):
    accs = [batch_accuracy(model.predict(xb), yb) for xb,yb in zip(val_data, val_target)]
    return round(torch.stack(accs).mean().item(), 4)

class LinearModel():
  def __init__(self, weight, bias, x, y, x_validate, y_validate, loss_fn, metrics):
    self.weight = weight
    self.bias = bias
    self.x = x
    self.y = y
    self.x_validate = x_validate
    self.y_validate = y_validate
    self.params = self.weight, self.bias
    self.loss_fn = loss_fn
    self.metrics = metrics
    self.z = None

  def predict(self, x):
    # z(x) = xw + b
    # y = g(z(x)) where g is an activation function
    self.z = x@self.weight + self.bias

    if torch.isnan(self.z).any():
      pdb.set_trace()

    # apply activation function
    return sigmoid(self.z)

  def step(self, loss, learning_rate):
    # get loss
    # calculate gradient, loss.backward()
    # update parameters using learning rate
    loss.backward()
    for p in self.params:
      p.data -= p.grad.data * learning_rate
      p.grad.zero_()

  def train(self, epoch, learning_rate):
    for i in range(epoch):
      preds = self.predict(self.x)
      try:
        loss = self.loss_fn(preds, self.y)
      except:
        print(preds.view(-1).tolist(), self.z)
        raise
      accuracy = self.metrics(preds, self.y)
      validation = validate_epoch(self, self.x_validate, self.y_validate)
      print(f"epoch {i}: loss: {loss}, accuracy: {accuracy}, validation: {validation}")
      self.step(loss, learning_rate)

def init_params(size, std=1.0): return (torch.randn(size)*std).requires_grad_()

In [17]:
w = init_params((X_train_normalized.shape[1], 1)) # size number of
b = init_params(1)

model = LinearModel(w, b, X_train_normalized, y_train_tensor, X_test_normalized, y_test_tensor, nn.BCELoss(), batch_accuracy)
model.train(epoch=50, learning_rate=1e-1)

epoch 0: loss: 2.669750928878784, accuracy: 0.19560439884662628, validation: 0.193
epoch 1: loss: 2.1073226928710938, accuracy: 0.2549450695514679, validation: 0.2193
epoch 2: loss: 1.6575454473495483, accuracy: 0.3384615480899811, validation: 0.307
epoch 3: loss: 1.322835087776184, accuracy: 0.40219780802726746, validation: 0.4561
epoch 4: loss: 1.0862756967544556, accuracy: 0.48571428656578064, validation: 0.5351
epoch 5: loss: 0.9203118681907654, accuracy: 0.5472527742385864, validation: 0.5614
epoch 6: loss: 0.800133466720581, accuracy: 0.6065934300422668, validation: 0.6053
epoch 7: loss: 0.7099872827529907, accuracy: 0.6527472734451294, validation: 0.6404
epoch 8: loss: 0.6403664946556091, accuracy: 0.6835165023803711, validation: 0.6842
epoch 9: loss: 0.5852901935577393, accuracy: 0.7098901271820068, validation: 0.7018
epoch 10: loss: 0.5408181548118591, accuracy: 0.7274725437164307, validation: 0.7193
epoch 11: loss: 0.5042439103126526, accuracy: 0.7450549602508545, validation:

In [18]:
def random_test(model):
  test_X = X[35:67]
  test_y = y[35:67]

  test_X_tensor = torch.tensor(test_X, dtype=torch.float32)
  test_X_tensor_normalized = (test_X_tensor - mean) / std

  # print(test_X)

  preds = (model.predict(test_X_tensor_normalized).view(-1) >= 0.5).tolist()
  targets = test_y >= 0.5
  for pred, target in zip(preds, targets):
    print(f"pred: {pred:>1}, target {target:>1},   match: {pred==target:>1}")

In [19]:
random_test(model)
model.params

pred: 1, target 1,   match: 1
pred: 1, target 1,   match: 1
pred: 0, target 0,   match: 1
pred: 0, target 1,   match: 0
pred: 1, target 1,   match: 1
pred: 0, target 1,   match: 0
pred: 0, target 1,   match: 0
pred: 1, target 1,   match: 1
pred: 1, target 1,   match: 1
pred: 0, target 1,   match: 0
pred: 1, target 1,   match: 1
pred: 0, target 0,   match: 1
pred: 1, target 1,   match: 1
pred: 0, target 0,   match: 1
pred: 0, target 0,   match: 1
pred: 0, target 0,   match: 1
pred: 0, target 0,   match: 1
pred: 0, target 0,   match: 1
pred: 1, target 1,   match: 1
pred: 1, target 1,   match: 1
pred: 0, target 0,   match: 1
pred: 1, target 1,   match: 1
pred: 1, target 1,   match: 1
pred: 0, target 0,   match: 1
pred: 0, target 0,   match: 1
pred: 1, target 0,   match: 0
pred: 0, target 0,   match: 1
pred: 1, target 1,   match: 1
pred: 0, target 0,   match: 1
pred: 1, target 1,   match: 1
pred: 1, target 1,   match: 1
pred: 0, target 0,   match: 1


(tensor([[-0.9323],
         [ 0.9114],
         [ 0.0852],
         [ 2.1837],
         [ 2.0660],
         [ 0.1539],
         [-0.9970],
         [ 0.2827],
         [ 0.6096],
         [-1.0862],
         [-1.2712],
         [-0.6088],
         [ 0.6329],
         [ 1.1695],
         [ 0.7023],
         [-1.1133],
         [ 0.7984],
         [-0.4331],
         [ 0.8844],
         [ 0.6218],
         [ 0.6715],
         [ 0.3528],
         [ 0.7818],
         [ 0.5043],
         [-1.1342],
         [ 0.6044],
         [ 0.9468],
         [ 0.2875],
         [ 0.2243],
         [-0.2090]], requires_grad=True),
 tensor([0.2283], requires_grad=True))

In [30]:
class NeuralNet(LinearModel):
  def __init__(self, layers, x, y, x_validate, y_validate, loss_fn, metrics):
    self.layers = layers
    self.x = x
    self.y = y
    self.x_validate = x_validate
    self.y_validate = y_validate
    self.loss_fn = loss_fn
    self.metrics = metrics
    self.z = None

  def predict(self, x):
    for layer in self.layers[:-1]:
      res = x@layer[0] + layer[1]
      res = res.max(torch.tensor(0.0))
      if torch.isnan(res).any():
        print("nan after during prediction in inner layers")
        # pdb.set_trace()

    res = res@self.layers[-1][0] + self.layers[-1][1]
    self.z = res
    if torch.isnan(self.z).any():
      print("nan after during prediction")
      # pdb.set_trace()
    return sigmoid(res)

  def step(self, loss, learning_rate):
    loss.backward()
    for layer in self.layers:
      for p in layer:
        p.data -= p.grad.data * learning_rate
        if torch.isnan(p).any():
          print("nan after backpropagation")
          # pdb.set_trace()
        p.grad.zero_()


In [40]:
neurons = 3
w1 = init_params((30,neurons))
b1 = init_params(neurons)
w2 = init_params((neurons,1))
b2 = init_params(1)

layer1 = [w1, b1]
layer2 = [w2, b2]
layers = [layer1, layer2]

model2 = NeuralNet(layers, X_train_normalized, y_train_tensor, X_test_normalized, y_test_tensor, nn.BCELoss(), batch_accuracy)
model2.train(epoch=50, learning_rate=1e-1)

epoch 0: loss: 2.6648316383361816, accuracy: 0.4285714328289032, validation: 0.4035
epoch 1: loss: 2.1337993144989014, accuracy: 0.4307692348957062, validation: 0.4386
epoch 2: loss: 1.732007384300232, accuracy: 0.4571428596973419, validation: 0.4649
epoch 3: loss: 1.4322041273117065, accuracy: 0.5010989308357239, validation: 0.4912
epoch 4: loss: 1.21170175075531, accuracy: 0.5362637639045715, validation: 0.5175
epoch 5: loss: 1.0547499656677246, accuracy: 0.5714285969734192, validation: 0.5263
epoch 6: loss: 0.9423450231552124, accuracy: 0.5978022217750549, validation: 0.5526
epoch 7: loss: 0.8547509908676147, accuracy: 0.6241758465766907, validation: 0.5614
epoch 8: loss: 0.7872779965400696, accuracy: 0.6593406796455383, validation: 0.5789
epoch 9: loss: 0.7337020039558411, accuracy: 0.6813187003135681, validation: 0.6053
epoch 10: loss: 0.689014196395874, accuracy: 0.6989011168479919, validation: 0.6053
epoch 11: loss: 0.6511649489402771, accuracy: 0.7098901271820068, validation: 0

In [41]:
random_test(model2)
model2.layers

pred: 1, target 1,   match: 1
pred: 1, target 1,   match: 1
pred: 0, target 0,   match: 1
pred: 0, target 1,   match: 0
pred: 1, target 1,   match: 1
pred: 0, target 1,   match: 0
pred: 0, target 1,   match: 0
pred: 1, target 1,   match: 1
pred: 1, target 1,   match: 1
pred: 1, target 1,   match: 1
pred: 1, target 1,   match: 1
pred: 0, target 0,   match: 1
pred: 1, target 1,   match: 1
pred: 0, target 0,   match: 1
pred: 1, target 0,   match: 0
pred: 0, target 0,   match: 1
pred: 0, target 0,   match: 1
pred: 0, target 0,   match: 1
pred: 1, target 1,   match: 1
pred: 1, target 1,   match: 1
pred: 0, target 0,   match: 1
pred: 1, target 1,   match: 1
pred: 1, target 1,   match: 1
pred: 0, target 0,   match: 1
pred: 0, target 0,   match: 1
pred: 0, target 0,   match: 1
pred: 0, target 0,   match: 1
pred: 1, target 1,   match: 1
pred: 0, target 0,   match: 1
pred: 1, target 1,   match: 1
pred: 1, target 1,   match: 1
pred: 0, target 0,   match: 1


[[tensor([[ 1.2383,  1.1942, -0.9469],
          [ 0.0942, -0.1442, -2.2273],
          [-1.7200, -0.0260,  0.3555],
          [ 0.4452, -1.0220, -0.4364],
          [-0.9116, -0.1421, -1.0726],
          [-0.1580,  0.4530,  0.4340],
          [ 1.0321, -1.2220, -2.1283],
          [ 0.0423, -0.8246, -0.6428],
          [-2.5702,  0.0575,  0.7201],
          [ 1.1303, -0.4688, -0.6655],
          [-0.1507,  0.2804,  0.4506],
          [-1.4209, -0.1024,  0.3018],
          [-1.0430, -0.7053, -0.5171],
          [-0.3081,  0.2862,  0.4239],
          [-1.0914,  0.0615, -1.5576],
          [-0.9529,  0.2569, -0.7648],
          [ 0.4419,  0.1963, -0.5129],
          [ 2.6655,  0.5138,  1.9001],
          [-1.1843,  0.6703, -1.0193],
          [-0.7176,  1.4107, -0.5194],
          [-1.5368, -0.5789, -1.0575],
          [ 0.9231,  0.7710, -0.7630],
          [ 1.2569, -0.7771,  1.3468],
          [ 0.1088, -1.5200,  0.9969],
          [-0.2547,  0.5956,  0.2270],
          [ 1.2881,  0.35