<a href="https://colab.research.google.com/github/ajibigad/ML-Playground/blob/main/breast_cancer_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd

from google.colab import files

uploaded_data = files.upload()["breast-cancer.csv"]

Saving breast-cancer.csv to breast-cancer.csv


In [3]:
from sklearn.preprocessing import LabelEncoder

# Load CSV file into a pandas DataFrame
df = pd.read_csv("breast-cancer.csv")

# Convert M -> 1 and B -> 0
label_encoder = LabelEncoder()
df['diagnosis'] = label_encoder.fit_transform(df['diagnosis'])

X = df.drop(['id', 'diagnosis'], axis=1).values
y = df['diagnosis'].values

In [24]:
from sklearn.model_selection import train_test_split
import torch


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert the data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

# Z-score normalization
mean = X_train_tensor.mean(dim=0, keepdim=True)
std = X_train_tensor.std(dim=0, keepdim=True)

X_train_normalized = (X_train_tensor - mean) / std
X_test_normalized = (X_test_tensor - mean) / std

In [28]:
import pdb

def sigmoid(x): return 1/(1+torch.exp(-x))

def abs_mean_loss(predictions, targets):
  # // abs mean or mse but we did abs mean loss here
  # // for every prediction, if the target is 1, then difference is 1-pred, else difference is abs(0-pred)
  # // predictions will always be between 0 and 1 due to sigmond function
  return torch.where(targets==1, 1-predictions, predictions).mean()

def mse(preds, targets): return ((preds-targets)**2).mean()

def batch_accuracy(predictions, targets):
  with torch.no_grad():
    correct = (predictions>=0.5) == targets
    return correct.float().mean()

def validate_epoch(model, val_data, val_target):
    accs = [batch_accuracy(model.predict(xb), yb) for xb,yb in zip(val_data, val_target)]
    return round(torch.stack(accs).mean().item(), 4)

class LinearModel():
  def __init__(self, weight, bias, x, y, x_validate, y_validate, loss_fn, metrics):
    self.weight = weight
    self.bias = bias
    self.x = x
    self.y = y
    self.x_validate = x_validate
    self.y_validate = y_validate
    self.params = self.weight, self.bias
    self.loss_fn = loss_fn
    self.metrics = metrics

  def predict(self, x):
    # z(x) = wx + b
    # y = g(z(x)) where g is an activation function
    z = x@self.weight + self.bias
    # print(f"predictions before activation function \n\t {z[5:10]}")
    predictions = sigmoid(z)
    # print(f"predictions after activation function \n\t {predictions[5:10]}")

    # apply activation function
    return predictions

  def step(self, loss, learning_rate):
  	# get loss
  	# calculate gradient, loss.backward()
  	# update parameters using learning rate
    loss.backward()
    for p in self.params:
      # print(f"params before {p.data[:5]}")
      p.data -= p.grad.data * learning_rate
      # print(f"params grads {p.grad.data[:5]}")
      # print(f"params after {p.data[:5]}")
      p.grad.zero_()

  def train(self, epoch, learning_rate):
    for i in range(epoch):
      preds = self.predict(self.x)
      loss = self.loss_fn(preds, self.y)
      accuracy = self.metrics(preds, self.y)
      validation = validate_epoch(self, self.x_validate, self.y_validate)
      print(f"epoch {i}: loss: {loss}, accuracy: {accuracy}, validation: {validation}")
      self.step(loss, learning_rate)

In [29]:
def init_params(size, std=1.0): return (torch.randn(size)*std).requires_grad_()

w = init_params((X_train_normalized.shape[1], 1)) # size number of
b = init_params(1)

model = LinearModel(w, b, X_train_normalized, y_train_tensor, X_test_normalized, y_test_tensor, mse, batch_accuracy)
model.train(epoch=50, learning_rate=2)

epoch 0: loss: 0.3968909978866577, accuracy: 0.4759811758995056, validation: 0.2105
epoch 1: loss: 0.3789397180080414, accuracy: 0.4867189824581146, validation: 0.2368
epoch 2: loss: 0.3655160963535309, accuracy: 0.49406594038009644, validation: 0.2807
epoch 3: loss: 0.35531577467918396, accuracy: 0.5002825856208801, validation: 0.3246
epoch 4: loss: 0.34690678119659424, accuracy: 0.5070643424987793, validation: 0.3596
epoch 5: loss: 0.3395509719848633, accuracy: 0.5121507048606873, validation: 0.386
epoch 6: loss: 0.3327654004096985, accuracy: 0.516671895980835, validation: 0.4211
epoch 7: loss: 0.3262692093849182, accuracy: 0.5194976329803467, validation: 0.4211
epoch 8: loss: 0.3200600743293762, accuracy: 0.5257142782211304, validation: 0.4298
epoch 9: loss: 0.31432661414146423, accuracy: 0.5285400152206421, validation: 0.4649
epoch 10: loss: 0.3092736005783081, accuracy: 0.53362637758255, validation: 0.4561
epoch 11: loss: 0.3049805164337158, accuracy: 0.5392778515815735, validatio