<a href="https://colab.research.google.com/github/ajibigad/ML-Playground/blob/main/breast_cancer_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [40]:
import pandas as pd

from google.colab import files

_ = files.upload()

Saving breast-cancer.csv to breast-cancer (1).csv


In [36]:
from sklearn.preprocessing import LabelEncoder

# Load CSV file into a pandas DataFrame
df = pd.read_csv("breast-cancer.csv")

# Convert M -> 1 and B -> 0
label_encoder = LabelEncoder()
df['diagnosis'] = label_encoder.fit_transform(df['diagnosis'])

X = df.drop(['id', 'diagnosis'], axis=1).values
y = df['diagnosis'].values

In [37]:
from sklearn.model_selection import train_test_split
import torch


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert the data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

# Z-score normalization
mean = X_train_tensor.mean(dim=0, keepdim=True)
std = X_train_tensor.std(dim=0, keepdim=True)

X_train_normalized = (X_train_tensor - mean) / std
X_test_normalized = (X_test_tensor - mean) / std

In [38]:
import pdb

def sigmoid(x): return 1/(1+torch.exp(-x))

def abs_mean_loss(predictions, targets):
  # // abs mean or mse but we did abs mean loss here
  # // for every prediction, if the target is 1, then difference is 1-pred, else difference is abs(0-pred)
  # // predictions will always be between 0 and 1 due to sigmond function
  return torch.where(targets==1, 1-predictions, predictions).mean()

def mse(preds, targets): return ((preds-targets)**2).mean()

def batch_accuracy(predictions, targets):
  with torch.no_grad():
    correct = (predictions>=0.5) == targets
    return correct.float().mean()

def validate_epoch(model, val_data, val_target):
    accs = [batch_accuracy(model.predict(xb), yb) for xb,yb in zip(val_data, val_target)]
    return round(torch.stack(accs).mean().item(), 4)

class LinearModel():
  def __init__(self, weight, bias, x, y, x_validate, y_validate, loss_fn, metrics):
    self.weight = weight
    self.bias = bias
    self.x = x
    self.y = y
    self.x_validate = x_validate
    self.y_validate = y_validate
    self.params = self.weight, self.bias
    self.loss_fn = loss_fn
    self.metrics = metrics

  def predict(self, x):
    # z(x) = wx + b
    # y = g(z(x)) where g is an activation function
    z = x@self.weight + self.bias

    # apply activation function
    return sigmoid(z)

  def step(self, loss, learning_rate):
    # get loss
    # calculate gradient, loss.backward()
    # update parameters using learning rate
    loss.backward()
    for p in self.params:
      p.data -= p.grad.data * learning_rate
      p.grad.zero_()

  def train(self, epoch, learning_rate):
    for i in range(epoch):
      preds = self.predict(self.x)
      loss = self.loss_fn(preds, self.y)
      accuracy = self.metrics(preds, self.y)
      validation = validate_epoch(self, self.x_validate, self.y_validate)
      print(f"epoch {i}: loss: {loss}, accuracy: {accuracy}, validation: {validation}")
      self.step(loss, learning_rate)

In [39]:
def init_params(size, std=1.0): return (torch.randn(size)*std).requires_grad_()

w = init_params((X_train_normalized.shape[1], 1)) # size number of
b = init_params(1)

model = LinearModel(w, b, X_train_normalized, y_train_tensor, X_test_normalized, y_test_tensor, mse, batch_accuracy)
model.train(epoch=50, learning_rate=2)

epoch 0: loss: 0.3704274594783783, accuracy: 0.49519622325897217, validation: 0.5439
epoch 1: loss: 0.36136573553085327, accuracy: 0.49689167737960815, validation: 0.5263
epoch 2: loss: 0.35322287678718567, accuracy: 0.49858713150024414, validation: 0.5263
epoch 3: loss: 0.34630343317985535, accuracy: 0.4997174143791199, validation: 0.5088
epoch 4: loss: 0.3402440845966339, accuracy: 0.49858713150024414, validation: 0.5
epoch 5: loss: 0.3345414102077484, accuracy: 0.5036734938621521, validation: 0.4825
epoch 6: loss: 0.328931599855423, accuracy: 0.5053688883781433, validation: 0.5088
epoch 7: loss: 0.3233233690261841, accuracy: 0.513280987739563, validation: 0.5175
epoch 8: loss: 0.31771668791770935, accuracy: 0.5144113302230835, validation: 0.5263
epoch 9: loss: 0.31216731667518616, accuracy: 0.5211930871009827, validation: 0.5351
epoch 10: loss: 0.3067368268966675, accuracy: 0.5240188241004944, validation: 0.5439
epoch 11: loss: 0.30146729946136475, accuracy: 0.5274097323417664, vali