In [34]:
import numpy as np
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import requests
from io import StringIO

In [35]:
url = "https://raw.githubusercontent.com/gscdit/Breast-Cancer-Detection/master/data.csv"

try:
    response = requests.get(url)
    response.raise_for_status()  # Check if the request was successful
    df = pd.read_csv(StringIO(response.text))
    print(df.head())
except requests.exceptions.RequestException as e:
    print(f"Error fetching data: {e}")



         id diagnosis  radius_mean  texture_mean  perimeter_mean  area_mean  \
0    842302         M        17.99         10.38          122.80     1001.0   
1    842517         M        20.57         17.77          132.90     1326.0   
2  84300903         M        19.69         21.25          130.00     1203.0   
3  84348301         M        11.42         20.38           77.58      386.1   
4  84358402         M        20.29         14.34          135.10     1297.0   

   smoothness_mean  compactness_mean  concavity_mean  concave points_mean  \
0          0.11840           0.27760          0.3001              0.14710   
1          0.08474           0.07864          0.0869              0.07017   
2          0.10960           0.15990          0.1974              0.12790   
3          0.14250           0.28390          0.2414              0.10520   
4          0.10030           0.13280          0.1980              0.10430   

   ...  texture_worst  perimeter_worst  area_worst  smoothness

In [36]:
df.shape

(569, 33)

In [37]:
df=df.drop(columns=['id','Unnamed: 32'])

In [38]:
X_train,X_test,y_train,y_test=train_test_split(df.iloc[:,1:],df.iloc[:,0],test_size=0.2)
X_train.head(2)

Unnamed: 0,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,fractal_dimension_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
263,15.61,19.38,100.0,758.6,0.0784,0.05616,0.04209,0.02847,0.1547,0.05443,...,17.91,31.67,115.9,988.6,0.1084,0.1807,0.226,0.08568,0.2683,0.06829
163,12.34,22.22,79.85,464.5,0.1012,0.1015,0.0537,0.02822,0.1551,0.06761,...,13.58,28.68,87.36,553.0,0.1452,0.2338,0.1688,0.08194,0.2268,0.09082


In [39]:
y_train.head(2)

263    M
163    B
Name: diagnosis, dtype: object

In [40]:
scaler=StandardScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)

X_train

array([[ 0.42015123,  0.02801066,  0.33250406, ..., -0.43373885,
        -0.34695692, -0.85242787],
       [-0.50823998,  0.6840805 , -0.49925506, ..., -0.49082321,
        -0.99786093,  0.40859421],
       [-0.3606059 , -1.37191302, -0.37665831, ...,  0.0214097 ,
        -0.81905839,  0.11250868],
       ...,
       [-0.5451485 , -1.19403493, -0.54342291, ..., -0.34643336,
        -0.35636758,  0.26418955],
       [ 0.16463072,  0.53854388,  0.14881532, ...,  1.05778083,
         1.24501314,  1.44853163],
       [ 1.09586105, -2.05108392,  1.27365086, ...,  2.30936304,
         2.66131754,  1.98025421]], shape=(455, 30))

In [41]:
y_train

263    M
163    B
322    B
531    B
259    M
      ..
157    B
318    B
386    B
57     M
0      M
Name: diagnosis, Length: 455, dtype: object

In [42]:
encoder=LabelEncoder()
y_train=encoder.fit_transform(y_train)
y_test=encoder.transform(y_test)

y_train

array([1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0,
       1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0,
       1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0,
       0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1,
       0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1,
       0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1,
       1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,
       0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1,
       0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0,
       0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1,

In [43]:
#Numpy arrays to tensors
X_train_tensor=torch.tensor(X_train)
X_test_tensor=torch.tensor(X_test)
y_train_tensor=torch.tensor(y_train)
y_test_tensor=torch.tensor(y_test)

In [44]:
X_train_tensor

tensor([[ 0.4202,  0.0280,  0.3325,  ..., -0.4337, -0.3470, -0.8524],
        [-0.5082,  0.6841, -0.4993,  ..., -0.4908, -0.9979,  0.4086],
        [-0.3606, -1.3719, -0.3767,  ...,  0.0214, -0.8191,  0.1125],
        ...,
        [-0.5451, -1.1940, -0.5434,  ..., -0.3464, -0.3564,  0.2642],
        [ 0.1646,  0.5385,  0.1488,  ...,  1.0578,  1.2450,  1.4485],
        [ 1.0959, -2.0511,  1.2737,  ...,  2.3094,  2.6613,  1.9803]],
       dtype=torch.float64)

In [45]:
#Defining The model

class MySimpleNN():
    def __init__(self,X):
        self.weights=torch.rand(X.shape[1],1,dtype=torch.float64,requires_grad=True)
        self.bias=torch.zeros(1,dtype=torch.float64,requires_grad=True)

    def forward(self,X):
        z=torch.matmul(X,self.weights)+self.bias
        y_pred=torch.sigmoid(z)
        return y_pred
    
    def loss_function(self,y_pred,y_train_tensor):
        #Clamp prediction to avoid log(0) error
        epsilon=1e-7
        y_pred=torch.clamp(y_pred,epsilon,1-epsilon)

        #Calculate Loss
        loss=-(y_train_tensor*torch.log(y_pred)+(1-y_train_tensor)*torch.log(1-y_pred)).mean()
        return loss
    
    
        

In [46]:
#important Parameters
learning_rate=0.1
epochs=25


In [47]:
#Training Pipeline
#Create Model
model=MySimpleNN(X_train_tensor)

#Training Loop
for epoch in   range(epochs):
    #forward Pass
    y_pred=model.forward(X_train_tensor)

    #Loss Function
    loss=model.loss_function(y_pred,y_train_tensor)
    # print(f'Epoch: {epoch+1} Loss: {loss}')

    #Backward Pass
    loss.backward()

    #parameter Update
    with torch.no_grad():
        model.weights-=learning_rate*model.weights.grad
        model.bias-=learning_rate*model.bias.grad

    #Zero the gradients
    model.weights.grad.zero_()
    model.bias.grad.zero_()

    print(f'Epoch: {epoch+1} Loss: {loss.item()}')

Epoch: 1 Loss: 3.581341234720667
Epoch: 2 Loss: 3.442730638921226
Epoch: 3 Loss: 3.3013578332712283
Epoch: 4 Loss: 3.152992667135974
Epoch: 5 Loss: 3.0016883083733856
Epoch: 6 Loss: 2.848385228019615
Epoch: 7 Loss: 2.6901056357421385
Epoch: 8 Loss: 2.5330565962492613
Epoch: 9 Loss: 2.377081984104692
Epoch: 10 Loss: 2.2231348152609067
Epoch: 11 Loss: 2.069499979260195
Epoch: 12 Loss: 1.9147872802821186
Epoch: 13 Loss: 1.7603365663132433
Epoch: 14 Loss: 1.609107899895346
Epoch: 15 Loss: 1.4679826678800616
Epoch: 16 Loss: 1.3405411727412968
Epoch: 17 Loss: 1.2284160725682916
Epoch: 18 Loss: 1.1326743044040384
Epoch: 19 Loss: 1.0533681603177225
Epoch: 20 Loss: 0.9893632078425076
Epoch: 21 Loss: 0.9386267186756193
Epoch: 22 Loss: 0.8987632250742
Epoch: 23 Loss: 0.8674495682782606
Epoch: 24 Loss: 0.8426699737340124
Epoch: 25 Loss: 0.8228096537672277


In [48]:
model.bias

tensor([-0.1457], dtype=torch.float64, requires_grad=True)

In [49]:
with torch.no_grad():
    y_pred=model.forward(X_test_tensor)
    y_pred=torch.round(y_pred)
    accuracy=(y_pred==y_test_tensor).sum()/y_test_tensor.shape[0]
    print(f'Accuracy: {accuracy.item()}')

Accuracy: 59.08771896362305


In [50]:
from torch.utils.data import Dataset, DataLoader
class CustomDataset(Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, index):
        return self.features[index], self.labels[index]
    

In [51]:
train_dataset=CustomDataset(X_train_tensor,y_train_tensor)
test_dataset=CustomDataset(X_test_tensor,y_test_tensor)

In [53]:
train_loader=DataLoader(train_dataset,batch_size=16,shuffle=True)
test_loader=DataLoader(test_dataset,batch_size=16,shuffle=True)

In [55]:
import torch.nn as nn

class MySimpleNN(nn.Module):
    def __init__(self,num_features):
        super().__init__()
        self.linear=nn.Linear(num_features,1)
        self.sigmoid=nn.Sigmoid()
    
    def forward(self,features):
        out=self.linear(features)
        out=self.sigmoid(out)

        return out

In [56]:
learning_rate=0.1
epochs=25

model=MySimpleNN(X_train_tensor.shape[1])

optimiser=torch.optim.SGD(model.parameters(),lr=learning_rate)

loss_function=nn.BCELoss()

In [60]:
for epoch in range(epochs):
    for features,labels in train_loader:
        optimiser.zero_grad()
        y_pred=model(features.float())
        loss=loss_function(y_pred,labels.float().view(-1,1))
        loss.backward()
        optimiser.step()
    print(f'Epoch: {epoch+1} Loss: {loss.item()}')

Epoch: 1 Loss: 0.2818625867366791
Epoch: 2 Loss: 0.023374240845441818
Epoch: 3 Loss: 0.10206346958875656
Epoch: 4 Loss: 0.050338711589574814
Epoch: 5 Loss: 0.03152155131101608
Epoch: 6 Loss: 0.0028630737215280533
Epoch: 7 Loss: 0.058141183108091354
Epoch: 8 Loss: 0.01188418548554182
Epoch: 9 Loss: 0.03943657875061035
Epoch: 10 Loss: 0.08406185358762741
Epoch: 11 Loss: 0.0043503353372216225
Epoch: 12 Loss: 0.00028935904265381396
Epoch: 13 Loss: 0.22715076804161072
Epoch: 14 Loss: 0.015040270052850246
Epoch: 15 Loss: 0.011244179680943489
Epoch: 16 Loss: 0.011786156333982944
Epoch: 17 Loss: 0.0457335002720356
Epoch: 18 Loss: 0.030387742444872856
Epoch: 19 Loss: 0.19668294489383698
Epoch: 20 Loss: 0.03336084634065628
Epoch: 21 Loss: 0.04091880843043327
Epoch: 22 Loss: 0.0605073980987072
Epoch: 23 Loss: 0.06164391711354256
Epoch: 24 Loss: 0.022116035223007202
Epoch: 25 Loss: 0.015810901299118996


In [62]:
model.eval()
accuracy_list=[]

with torch.no_grad():
    for batch_features,batch_labels in test_loader:
        # forward pass
        y_pred=model(batch_features.float())
        y_pred=(y_pred>0.8).float()

        #accuracy
        batch_accuracy=(y_pred.view(-1)==batch_labels).float().mean().item()
        accuracy_list.append(batch_accuracy)

overall_accuracy=sum(accuracy_list)/len(accuracy_list)

In [63]:
print(f'Overall Accuracy: {overall_accuracy}')

Overall Accuracy: 0.984375
