In [1]:
import pandas as pd
import numpy as np
import matplotlib as plt
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler , LabelEncoder
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn

In [2]:
df = pd.read_csv('https://raw.githubusercontent.com/gscdit/Breast-Cancer-Detection/refs/heads/master/data.csv')
df.head()

Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,...,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst,Unnamed: 32
0,842302,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,
1,842517,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,
2,84300903,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,
3,84348301,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,
4,84358402,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,


In [3]:
df.shape

(569, 33)

In [4]:
for feature in df:
    print(feature)

id
diagnosis
radius_mean
texture_mean
perimeter_mean
area_mean
smoothness_mean
compactness_mean
concavity_mean
concave points_mean
symmetry_mean
fractal_dimension_mean
radius_se
texture_se
perimeter_se
area_se
smoothness_se
compactness_se
concavity_se
concave points_se
symmetry_se
fractal_dimension_se
radius_worst
texture_worst
perimeter_worst
area_worst
smoothness_worst
compactness_worst
concavity_worst
concave points_worst
symmetry_worst
fractal_dimension_worst
Unnamed: 32


In [5]:
df.drop(columns = ["id","Unnamed: 32"],inplace=True)  

In [6]:
X_train,X_test,y_train,y_test = train_test_split(df.iloc[:,1:],df.iloc[:,0],test_size =0.2)



# df.iloc[:, 1:]
# : → all rows (from top to bottom)
# 1: → starting from column index 1 up to the last column
# ✅ So this gives you all rows and all columns except the first one
# (because the first column is index 0)


# df.iloc[:, 0]
# : → all rows
# 0 → only column at index 0 (the first column)
# ✅ So this gives you all rows from the first column only

In [7]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [8]:
X_train

array([[ 0.90587361,  1.42719428,  0.85433731, ...,  0.9090239 ,
        -0.64244475, -0.25748864],
       [ 0.18730936,  1.08574787,  0.22620428, ...,  0.71602632,
         0.64836397,  0.25004267],
       [ 0.11516436,  0.02449551,  0.09429634, ..., -0.29466341,
        -0.6880397 , -0.50994216],
       ...,
       [-0.48219628,  1.10881858, -0.33618416, ...,  1.63623878,
         2.29135436,  6.46375837],
       [ 1.53497806,  0.02218844,  1.44478237, ...,  1.31045886,
         0.73955387,  0.27051187],
       [-0.04932625, -0.5038236 , -0.02379267, ...,  0.34547096,
         0.25058735, -0.25066557]], shape=(455, 30))

In [9]:
y_train

441    M
65     M
448    B
230    M
227    B
      ..
291    B
49     B
9      M
254    M
255    M
Name: diagnosis, Length: 455, dtype: object

# since the above col is in char form we need to encode it 

In [10]:
encoder = LabelEncoder()
y_train = encoder.fit_transform(y_train)
y_test = encoder.transform(y_test)

In [11]:
y_train 

array([1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0,
       0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0,
       1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1,
       0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1,
       0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0,
       0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1,
       1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,

**convert numpy array to tensor of tensorFlow**

In [12]:
X_train_tensor = torch.from_numpy(X_train)
X_test_tensor = torch.from_numpy(X_test)
y_train_tensor = torch.from_numpy(y_train)
y_test_tensor = torch.from_numpy(y_test)

In [13]:
X_train_tensor.shape

torch.Size([455, 30])

In [14]:
X_train_tensor

tensor([[ 0.9059,  1.4272,  0.8543,  ...,  0.9090, -0.6424, -0.2575],
        [ 0.1873,  1.0857,  0.2262,  ...,  0.7160,  0.6484,  0.2500],
        [ 0.1152,  0.0245,  0.0943,  ..., -0.2947, -0.6880, -0.5099],
        ...,
        [-0.4822,  1.1088, -0.3362,  ...,  1.6362,  2.2914,  6.4638],
        [ 1.5350,  0.0222,  1.4448,  ...,  1.3105,  0.7396,  0.2705],
        [-0.0493, -0.5038, -0.0238,  ...,  0.3455,  0.2506, -0.2507]],
       dtype=torch.float64)

# model Defining

In [15]:
# here we use OOP concept ... we create the class and then write the functions within it 
class MySimpleNN():
    def __init__(self,X):  #here X is the training dataset
        self.weights = torch.rand(X.shape[1], 1, dtype= torch.float64,requires_grad = True)
        self.bias = torch.zeros(1, dtype= torch.float64,requires_grad = True)
        
# explaination:
# X.shape => shape of training dataset i.e> [455,30]
# X.shape[1]=> i.e> 30 
# X.shape[1],1  => this crates a weight matrix of [30 x 1] which helps in calc models weights
# dtype= torch.float64  => stors datatype "float" 
# requires_grad=True → ensures parameters are learnable.


    
    # forward pass
    def forward(self,X):
        z= torch.matmul(X, self.weights) +self.bias   # this is thr formula to find Z = WiXi + W2X2 + b
        y_pred = torch.sigmoid(z)
        return y_pred

    # loss calculation 
    def loss(self,y_pred,y):
        # clamp predictions to avoid log(0)
        epsilon = 1e-7
        y_pred = torch.clamp(y_pred, epsilon, 1-epsilon)

        # calculate loss
        loss = -(y_train_tensor *torch.log(y_pred) + (1- y_train_tensor) * torch.log(1-y_pred)).mean()
        return loss
        
    

# Important Parameter

In [16]:
lr = 0.1 #learning rate 
epochs = 25

# Training Pipeline 

In [17]:
# create model
model = MySimpleNN(X_train_tensor)

# define loops 
for epoch in range(epochs):
    
    # forward pass
    y_pred = model.forward(X_train_tensor)
    # print(y_pred.shape)
    

    # loss calculation
    loss = model.loss(y_pred,y_train_tensor)
    # print(f'epoch :{epoch},loss:{loss.item()}')


    # backward pass
    loss.backward()

    # parameters update
    with torch.no_grad():
        model.weights -= lr * model.weights.grad
        model.bias -= lr * model.bias.grad

    # zero gradients
    model.weights.grad.zero_()
    model.bias.grad.zero_()

    # print loss in each epoch 
    print(f'epoch :{epoch},loss:{loss.item()}')


epoch :0,loss:3.418190127362089
epoch :1,loss:3.2906813383212494
epoch :2,loss:3.1626388320412513
epoch :3,loss:3.030794887205569
epoch :4,loss:2.8947299588038167
epoch :5,loss:2.757339154564372
epoch :6,loss:2.6225056986570445
epoch :7,loss:2.485914867822859
epoch :8,loss:2.3495500706085357
epoch :9,loss:2.2127881988871807
epoch :10,loss:2.0735323582861156
epoch :11,loss:1.940340112216605
epoch :12,loss:1.8114172598166545
epoch :13,loss:1.6887018446308562
epoch :14,loss:1.575598024171797
epoch :15,loss:1.472736057424875
epoch :16,loss:1.378377379555894
epoch :17,loss:1.2905563688128663
epoch :18,loss:1.2139369203801085
epoch :19,loss:1.1466570389328992
epoch :20,loss:1.0884539658301444
epoch :21,loss:1.040767503563271
epoch :22,loss:1.001976349924941
epoch :23,loss:0.9703691020099316
epoch :24,loss:0.9443539321326297


# Model Evaluation 

In [18]:
# model evaluation
with torch.no_grad():
  y_pred = model.forward(X_test_tensor)
  y_pred = (y_pred > 0.9).float()
  accuracy = (y_pred == y_test_tensor).float().mean()
  print(f'Accuracy: {accuracy.item()}')


Accuracy: 0.598030149936676
