<a href="https://colab.research.google.com/github/dhruv20047228/Pytorch-Notebooks/blob/main/breast_cancer_pipeline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder

In [None]:
df = pd.read_csv('https://raw.githubusercontent.com/gscdit/Breast-Cancer-Detection/refs/heads/master/data.csv')
df.head()

In [None]:
df.shape

In [None]:
df.drop(columns=['id', 'Unnamed: 32'], inplace=True)

In [None]:
df.head()

In [None]:
#train test split
X_train, X_test, y_train, y_test = train_test_split(df.iloc[:, 1:], df.iloc[:, 0], test_size=0.2)

In [None]:
#scaling (neural netwrok needs same scale data while feeding it) so data scaling implementation
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
X_train

In [None]:
y_train

In [None]:
#in diagnosis column there are two classes M and B, both are alphabetical which neural netwrok can't understand, therefore the use of label encoder is necessary
encoder = LabelEncoder()
y_train = encoder.fit_transform(y_train)
y_test = encoder.transform(y_test)

In [None]:
y_train

In [None]:
#Conversion from numpy array to pytorch tensors
X_train_tensor = torch.from_numpy(X_train)
X_test_tensor = torch.from_numpy(X_test)
y_train_tensor = torch.from_numpy(y_train)
y_test_tensor = torch.from_numpy(y_test)

In [None]:
X_train_tensor.shape

In [None]:
y_train_tensor.shape

In [None]:
#Neural Network
class MySimpleNN():

  def __init__(self, X):

    self.weights = torch.rand(X.shape[1], 1, dtype=torch.float64, requires_grad=True)
    self.bias = torch.zeros(1, dtype=torch.float64, requires_grad=True)

  def forward(self, X):
    z = torch.matmul(X, self.weights) + self.bias
    y_pred = torch.sigmoid(z)
    return y_pred

  def loss_function(self, y_pred, y):
    #clamp predictions to avoid log(0)
    epsilon = 1e-7
    y_pred = torch.clamp(y_pred, epsilon, 1 - epsilon)

    #Calculate loss
    loss = -(y_train_tensor * torch.log(y_pred) + (1 - y_train_tensor) * torch.log(1 - y_pred)).mean()
    return loss

In [None]:
#important params
learning_rate = 0.1
epochs = 25

In [None]:
#training pipeline

#creation of model
model = MySimpleNN(X_train_tensor)

#define loop
for epoch in range(epochs):

  #forward pass
  y_pred = model.forward(X_train_tensor)


  #loss calculation
  loss = model.loss_function(y_pred, y_train_tensor)

  #backward pass
  loss.backward()

  #parameters
  with torch.no_grad():
    model.weights -= learning_rate * model.weights.grad
    model.bias -= learning_rate * model.bias.grad

  #zero gradients for unnecessary memory loss prevention
  model.weights.grad.zero_()
  model.bias.grad.zero_()

  #loss printing
  print(f'Epoch: {epoch + 1}, Loss: {loss.item()}')

In [None]:
model.weights

In [None]:
model.bias

In [None]:
#Evaluation
with torch.no_grad():
  y_pred = model.forward(X_test_tensor)
  y_pred = (y_pred > 0.9).float()

  #accuracy
  accuracy = (y_pred == y_test_tensor).float().mean()
  print(f'Accuracy: {accuracy.item()}')
