**PyTorch Training Pipeline**

* Data preparation 

In [1]:
import numpy as np
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

In [2]:
df = pd.read_csv('https://raw.githubusercontent.com/gscdit/Breast-Cancer-Detection/refs/heads/master/data.csv')
df.head()

Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,...,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst,Unnamed: 32
0,842302,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,
1,842517,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,
2,84300903,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,
3,84348301,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,
4,84358402,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,


In [None]:
# This dataset has 33 features
df.shape

(569, 33)

In [5]:
df.drop(columns=['id', 'Unnamed: 32'], inplace=True)

In [6]:
df.head()

Unnamed: 0,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


In [None]:
# iloc is a pandas indexer
df.iloc[1,2]

np.float64(17.77)

* Train, Test, Split

In [19]:
X_train, X_test, y_train, y_test = train_test_split(df.iloc[:,1:], df.iloc[:,0], test_size=0.2)

In [20]:
print(X_test)

     radius_mean  texture_mean  perimeter_mean  area_mean  smoothness_mean  \
261        17.35         23.06          111.00      933.1          0.08662   
355        12.56         19.07           81.92      485.8          0.08760   
126        13.61         24.69           87.76      572.6          0.09258   
517        19.89         20.26          130.50     1214.0          0.10370   
389        19.55         23.21          128.90     1174.0          0.10100   
..           ...           ...             ...        ...              ...   
100        13.61         24.98           88.05      582.7          0.09488   
160        11.75         20.18           76.10      419.8          0.10890   
212        28.11         18.47          188.50     2499.0          0.11420   
274        17.93         24.48          115.20      998.9          0.08855   
476        14.20         20.53           92.41      618.4          0.08931   

     compactness_mean  concavity_mean  concave points_mean  sym

In [21]:
print(y_train)

375    B
303    B
150    B
295    B
425    B
      ..
250    M
293    B
316    B
529    B
278    B
Name: diagnosis, Length: 455, dtype: object


* Scaling data

In [22]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [23]:
X_train.shape

(455, 30)

* Label Encoding

In [24]:
encoder = LabelEncoder()
y_train = encoder.fit_transform(y_train)
y_test = encoder.transform(y_test)

In [25]:
# Classified as binary
print(y_train)

[0 0 0 0 0 0 0 1 0 1 0 1 0 1 1 0 0 1 0 0 0 1 1 0 1 0 0 1 0 0 1 0 1 1 0 0 0
 1 1 0 0 0 1 0 1 0 0 0 0 0 1 1 1 0 1 1 0 0 1 1 0 0 1 1 0 1 0 1 0 1 1 1 0 0
 0 0 0 1 0 0 0 1 0 0 0 0 0 0 1 0 0 1 0 0 0 1 1 1 0 0 0 1 0 1 0 1 0 0 0 0 0
 1 0 0 1 1 0 0 1 0 1 0 1 0 0 0 0 0 0 1 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0 1 0
 0 0 0 0 0 1 0 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 1 0 0 0 0 1 0 1 0
 0 0 1 0 0 0 0 0 1 1 0 0 0 0 0 0 1 1 1 0 0 1 1 1 0 0 0 0 1 0 0 0 0 0 0 0 0
 1 1 0 0 0 0 0 1 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 1 1 1 0 0 0 1 0 0 0 0 1 1 0
 1 0 0 1 0 0 0 0 0 0 1 1 0 1 0 1 1 0 1 0 1 1 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0
 1 1 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 1 1 0 0 0 0 0 0 0 1 1 0 1
 1 1 1 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 1 1 1 1 0 1 1 0 0 0 0 1 0 0 0 0 0 0 0
 1 1 0 0 1 0 1 0 1 0 0 1 0 0 1 1 1 1 1 0 1 1 1 0 0 1 1 1 1 0 0 0 1 1 0 1 1
 1 1 0 1 1 0 1 1 0 1 1 0 0 0 1 0 0 0 1 0 1 1 0 1 1 1 0 0 0 0 0 1 1 1 0 0 1
 1 0 1 0 1 0 1 0 0 0 0]


* Numpy arrays to PyTorch tensors

In [28]:
type(X_train)

numpy.ndarray

In [26]:
X_train_tensor = torch.from_numpy(X_train)
X_test_tensor = torch.from_numpy(X_test)
y_train_tensor = torch.from_numpy(y_train)
y_test_tensor = torch.from_numpy(y_test)

In [27]:
type(X_train_tensor)

torch.Tensor

* Building the Model

In [34]:
class MySimpleNN():

    def __init__(self, X):
        self.weights = torch.rand(X.shape[1], 1, dtype=torch.float64, requires_grad=True)
        self.bias = torch.zeros(1, dtype=torch.float64, requires_grad=True)

    def forward(self, X):
        z = torch.matmul(X, self.weights) + self.bias
        y_pred = torch.sigmoid(z)
        return y_pred

    def loss_function(self, y_pred, y):
    # Clamp predictions to avoid log(0)
        epsilon = 1e-7
        y_pred = torch.clamp(y_pred, epsilon, 1 - epsilon)

        # Calculate loss
        loss = -(y * torch.log(y_pred) + (1 - y) * torch.log(1 - y_pred)).mean()
        return loss

* Hyperparameters

In [35]:
learning_rate = 0.1
epochs = 25

* Training Pipeline

In [None]:
# create model
model = MySimpleNN(X_train_tensor)

for epoch in range(epochs):

    # forward pass on train data
    y_pred = model.forward(X_train_tensor)

    # calculate the loss
    loss = model.loss_function(y_pred, y_train_tensor)

    # backward pass
    loss.backward()

    # parameter updates
    with torch.no_grad():
        model.weights -= learning_rate * model.weights.grad
        model.bias -= learning_rate * model.bias.grad

    # Print loss in each epoch
    print(f"Epoch: {epoch + 1}, Loss: {loss.item()}")


Epoch: 1, Loss: 3.721819615737283
Epoch: 2, Loss: 3.588673737472849
Epoch: 3, Loss: 3.318495588992075
Epoch: 4, Loss: 2.901158012533813
Epoch: 5, Loss: 2.3177170948608214
Epoch: 6, Loss: 1.58980205742846
Epoch: 7, Loss: 1.0111841734538654
Epoch: 8, Loss: 1.2822843425922092
Epoch: 9, Loss: 1.9862816498364098
Epoch: 10, Loss: 2.622729177412035
Epoch: 11, Loss: 3.0762927315419364
Epoch: 12, Loss: 3.347897530644397
Epoch: 13, Loss: 3.441541377304051
Epoch: 14, Loss: 3.3679753726664043
Epoch: 15, Loss: 3.132055709826168
Epoch: 16, Loss: 2.741397262431301
Epoch: 17, Loss: 2.21825401663787
Epoch: 18, Loss: 1.6432689115004409
Epoch: 19, Loss: 1.2579558155199546
Epoch: 20, Loss: 1.4119218130800983
Epoch: 21, Loss: 1.9589492584869215
Epoch: 22, Loss: 2.5274511004660423
Epoch: 23, Loss: 2.9675922865841784
Epoch: 24, Loss: 3.2487178702908412
Epoch: 25, Loss: 3.3732303055720614


In [37]:
model.bias

tensor([-3.0057], dtype=torch.float64, requires_grad=True)

In [38]:
model.weights

tensor([[ 0.7821],
        [ 0.3538],
        [ 0.6657],
        [ 0.5489],
        [ 0.0724],
        [ 0.2655],
        [ 0.8250],
        [ 0.8483],
        [ 0.0626],
        [-0.0973],
        [-0.1059],
        [-0.3734],
        [ 0.2584],
        [ 0.5044],
        [-0.5314],
        [ 0.0944],
        [ 0.2451],
        [ 0.3891],
        [-0.2592],
        [ 0.1572],
        [ 0.3180],
        [-0.4720],
        [ 0.2641],
        [ 1.0247],
        [ 0.7332],
        [ 0.4034],
        [ 0.3533],
        [ 0.7692],
        [ 0.3548],
        [ 0.6388]], dtype=torch.float64, requires_grad=True)

* Model Evaluation

In [46]:
with torch.no_grad():
    y_pred = model.forward(X_test_tensor)
    y_pred = (y_pred > 0.5).float()
    accuracy = (y_pred == y_test_tensor).float().mean()
    print(f'Accuracy: {accuracy.item()}')

Accuracy: 0.5495536923408508


In [44]:
x = torch.tensor(0.3)
x = (x > 0.5).float()
print(x)

tensor(0.)


In [45]:
x = torch.tensor(0.8)
x = (x > 0.5).float()
print(x)

tensor(1.)
