In [1]:
import pandas as pd
import numpy as np
import matplotlib as plt
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler , LabelEncoder
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn

In [2]:
df = pd.read_csv('https://raw.githubusercontent.com/gscdit/Breast-Cancer-Detection/refs/heads/master/data.csv')
df.head()

Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,...,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst,Unnamed: 32
0,842302,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,
1,842517,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,
2,84300903,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,
3,84348301,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,
4,84358402,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,


In [3]:
df.shape

(569, 33)

In [4]:
for feature in df:
    print(feature)

id
diagnosis
radius_mean
texture_mean
perimeter_mean
area_mean
smoothness_mean
compactness_mean
concavity_mean
concave points_mean
symmetry_mean
fractal_dimension_mean
radius_se
texture_se
perimeter_se
area_se
smoothness_se
compactness_se
concavity_se
concave points_se
symmetry_se
fractal_dimension_se
radius_worst
texture_worst
perimeter_worst
area_worst
smoothness_worst
compactness_worst
concavity_worst
concave points_worst
symmetry_worst
fractal_dimension_worst
Unnamed: 32


In [5]:
df.drop(columns = ["id","Unnamed: 32"],inplace=True)  

In [6]:
X_train,X_test,y_train,y_test = train_test_split(df.iloc[:,1:],df.iloc[:,0],test_size =0.2)



# df.iloc[:, 1:]
# : → all rows (from top to bottom)
# 1: → starting from column index 1 up to the last column
# ✅ So this gives you all rows and all columns except the first one
# (because the first column is index 0)


# df.iloc[:, 0]
# : → all rows
# 0 → only column at index 0 (the first column)
# ✅ So this gives you all rows from the first column only

In [7]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [8]:
X_train

array([[-1.00297593, -0.12181395, -0.93902556, ..., -0.55228149,
         0.06333824, -0.46934505],
       [ 0.54570956, -0.33165247,  0.52343822, ...,  0.09528538,
        -0.17603752, -0.15653704],
       [-0.7949855 ,  0.41677158, -0.80837553, ..., -0.49844504,
        -0.11619358,  0.21269692],
       ...,
       [ 0.37144731,  0.11600304,  0.41320226, ...,  1.4794328 ,
         0.46283697,  0.9566431 ],
       [-0.45770372, -0.73500873, -0.45031279, ..., -0.47810343,
         0.41108005, -0.49618846],
       [ 0.28712686, -1.4601174 ,  0.21926862, ..., -0.51373949,
        -0.66934566, -0.91910927]], shape=(455, 30))

In [9]:
y_train

266    B
141    M
109    B
295    B
265    M
      ..
492    M
117    M
392    M
502    B
371    B
Name: diagnosis, Length: 455, dtype: object

# since the above col is in char form we need to encode it 

In [10]:
encoder = LabelEncoder()
y_train = encoder.fit_transform(y_train)
y_test = encoder.transform(y_test)

In [11]:
y_train 

array([0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1,
       0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0,
       0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0,
       0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1,
       1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0,
       0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1,
       0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1,
       0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0,
       1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1,
       1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1,
       1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,

**convert numpy array to tensor of tensorFlow**

In [12]:
X_train_tensor = torch.from_numpy(X_train)
X_test_tensor = torch.from_numpy(X_test)
y_train_tensor = torch.from_numpy(y_train)
y_test_tensor = torch.from_numpy(y_test)

In [13]:
X_train_tensor.dtype

# we need to change it to float ,cause both are not at float32 format 
X_train_tensor = X_train_tensor.float()
y_train_tensor = y_train_tensor.float()

In [14]:
X_train_tensor

tensor([[-1.0030, -0.1218, -0.9390,  ..., -0.5523,  0.0633, -0.4693],
        [ 0.5457, -0.3317,  0.5234,  ...,  0.0953, -0.1760, -0.1565],
        [-0.7950,  0.4168, -0.8084,  ..., -0.4984, -0.1162,  0.2127],
        ...,
        [ 0.3714,  0.1160,  0.4132,  ...,  1.4794,  0.4628,  0.9566],
        [-0.4577, -0.7350, -0.4503,  ..., -0.4781,  0.4111, -0.4962],
        [ 0.2871, -1.4601,  0.2193,  ..., -0.5137, -0.6693, -0.9191]])

# model Defining

In [15]:
class MySimpleNN(nn.Module):
    def __init__(self,num_of_features): 
        super().__init__()
        self.linear = nn.Linear(num_of_features,1)
        self.sigmoid = nn.Sigmoid()

    
    # forward pass
    def forward(self,features):
        out = self.linear(features)
        out = self.sigmoid(out)
        return out

        
    

# Important Parameter

In [16]:
lr = 0.1 #learning rate 
epochs = 25

# Training Pipeline 

In [17]:
# defining the loss function

loss_function = nn.BCELoss()

In [18]:
# create model
model = MySimpleNN(X_train_tensor.shape[1])

# define the optimizer funciton 
optimizer = torch.optim.SGD(model.parameters(),lr=lr) # lr = learning rate, SDG =>stochastic gradient descent

# define loops 
for epoch in range(epochs):
    
    # forward pass
    y_pred = model(X_train_tensor)
    # print(y_pred.shape)
    

    # loss calculation
    loss = loss_function(y_pred,y_train_tensor.view(-1,1))
    # print(f'epoch :{epoch},loss:{loss.item()}')



    # clear gradients
    optimizer.zero_grad()

    
    # backward pass
    loss.backward()

    # parameters update
    optimizer.step()

    # print loss in each epoch 
    print(f'epoch :{epoch},loss:{loss.item()}')


epoch :0,loss:0.6865292191505432
epoch :1,loss:0.5152156352996826
epoch :2,loss:0.42328619956970215
epoch :3,loss:0.3671382665634155
epoch :4,loss:0.32917675375938416
epoch :5,loss:0.3016105890274048
epoch :6,loss:0.2805343568325043
epoch :7,loss:0.26378750801086426
epoch :8,loss:0.250080406665802
epoch :9,loss:0.23859603703022003
epoch :10,loss:0.2287912666797638
epoch :11,loss:0.2202906608581543
epoch :12,loss:0.21282605826854706
epoch :13,loss:0.2062004953622818
epoch :14,loss:0.20026588439941406
epoch :15,loss:0.19490845501422882
epoch :16,loss:0.19003930687904358
epoch :17,loss:0.1855878233909607
epoch :18,loss:0.1814970225095749
epoch :19,loss:0.17772048711776733
epoch :20,loss:0.17421986162662506
epoch :21,loss:0.1709631234407425
epoch :22,loss:0.16792337596416473
epoch :23,loss:0.16507770121097565
epoch :24,loss:0.16240650415420532


# Model Evaluation 

In [19]:
# we need to change it to float ,cause both are not at float32 format 
X_test_tensor = X_test_tensor.float()
y_test_tensor = y_test_tensor.float()

In [20]:
# model evaluation
with torch.no_grad():
  y_pred = model.forward(X_test_tensor)
  y_pred = (y_pred > 0.9).float()
  accuracy = (y_pred == y_test_tensor).float().mean()
  print(f'Accuracy: {accuracy.item()}')


Accuracy: 0.6151123642921448
