In [2]:
#import libraries
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import torch

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder

In [3]:
#pip install torch

In [4]:
#load dataset
df = pd.read_csv("/breast-cancer.csv")
df.head()

Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,842302,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,842517,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,84300903,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,84348301,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,84358402,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


In [5]:
df.shape

(569, 32)

In [6]:
df.columns

Index(['id', 'diagnosis', 'radius_mean', 'texture_mean', 'perimeter_mean',
       'area_mean', 'smoothness_mean', 'compactness_mean', 'concavity_mean',
       'concave points_mean', 'symmetry_mean', 'fractal_dimension_mean',
       'radius_se', 'texture_se', 'perimeter_se', 'area_se', 'smoothness_se',
       'compactness_se', 'concavity_se', 'concave points_se', 'symmetry_se',
       'fractal_dimension_se', 'radius_worst', 'texture_worst',
       'perimeter_worst', 'area_worst', 'smoothness_worst',
       'compactness_worst', 'concavity_worst', 'concave points_worst',
       'symmetry_worst', 'fractal_dimension_worst'],
      dtype='object')

In [7]:
df.drop(columns = 'id', inplace = True)

In [8]:
df.columns

Index(['diagnosis', 'radius_mean', 'texture_mean', 'perimeter_mean',
       'area_mean', 'smoothness_mean', 'compactness_mean', 'concavity_mean',
       'concave points_mean', 'symmetry_mean', 'fractal_dimension_mean',
       'radius_se', 'texture_se', 'perimeter_se', 'area_se', 'smoothness_se',
       'compactness_se', 'concavity_se', 'concave points_se', 'symmetry_se',
       'fractal_dimension_se', 'radius_worst', 'texture_worst',
       'perimeter_worst', 'area_worst', 'smoothness_worst',
       'compactness_worst', 'concavity_worst', 'concave points_worst',
       'symmetry_worst', 'fractal_dimension_worst'],
      dtype='object')

In [9]:
df.shape

(569, 31)

In [10]:
#Split data into train and test sets
xtrain, xtest, ytrain, ytest = train_test_split(df.iloc[:,1:], df.iloc[:,0], test_size= 0.2)

In [11]:
#Scale the training data and testing data
scaler = StandardScaler()
xtrain = scaler.fit_transform(xtrain)
xtest = scaler.fit_transform(xtest)

In [12]:
#To convert the ytrain and ytest values from M and B numeric, we will use LabelEncoder
encoder = LabelEncoder()
ytrain = encoder.fit_transform(ytrain)
ytest = encoder.fit_transform(ytest)

In [13]:
print(type(ytrain))
print(type(xtrain))

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>


In [14]:
#Convert numpy arrays to pytorch tensors
xtrain_tensor = torch.from_numpy(xtrain)
xtest_tensor = torch.from_numpy(xtest)
ytrain_tensor = torch.from_numpy(ytrain)
ytest_tensor = torch.from_numpy(ytest)

In [15]:
print(xtrain_tensor.shape)
print(ytrain_tensor.shape)

torch.Size([455, 30])
torch.Size([455])


### Define the model

In [17]:
# hyper parameters
learning_rate = 0.1
epoch = 100

In [18]:
class mySimpleNN:
    def __init__(self, X):
        self.weights = torch.rand(X.shape[1], 1, dtype = torch.float64, requires_grad = True)
        self.bias = torch.zeros(1, dtype = torch.float64, requires_grad = True)
        
    def forward(self, X):
        z = torch.matmul(X, self.weights) + self.bias
        ypred = torch.sigmoid(z)
        return ypred

    def loss_function(self, ypred, y):
        
        #Clamp predictions to avoid log(0) situation
        epsilon = 1e-7
        ypred = torch.clamp(ypred, min = epsilon, max = 1 - epsilon)

        #- [y * log(p) + (1 - y) * log(1 - p)]
        loss  = - (ytrain_tensor * torch.log(ypred) + (1 - ytrain_tensor) * torch.log(1 - ypred)).mean()
        return loss

In [19]:
#Training pipeline
#1. Create model
model = mySimpleNN(xtrain_tensor)

In [21]:
#loop
for e in range(epoch):
    
    #forward pass (calculate z= wz+b and then sigmoid(z))
    ypred = model.forward(xtrain_tensor)
    
    #loss calculate
    loss = model.loss_function(ypred, ytrain_tensor)
    
    #backward pass
    loss.backward()

    #parameter update
    #weight update formula: w_new = w_old - learning_rate * partial derviative of the loss function L with respect to the weight w.
    #same formula to update bias is-
    #bias update formulaw: b_new = b_old - learning_rate * partial derviative of the loss function L with respect to the bias b.
    
    with torch.no_grad():
        model.weights -= learning_rate * model.weights.grad
        model.bias -= learning_rate * model.bias.grad
    
    #zero gradients
    #to stop accumulation of gradients with each epoch we will use grad.zero_() so that gradients will start from zero for each epoch
    model.weights.grad.zero_()
    model.bias.grad.zero_()

    #print loss in each epoch
    print(f'Epoch: {e + 1}, Loss: {loss.item()}')

Epoch: 1, Loss: 0.6768336658374505
Epoch: 2, Loss: 0.676717057415376
Epoch: 3, Loss: 0.6766036832714702
Epoch: 4, Loss: 0.6764933927948127
Epoch: 5, Loss: 0.6763860441775356
Epoch: 6, Loss: 0.6762815038418932
Epoch: 7, Loss: 0.6761796459070809
Epoch: 8, Loss: 0.6760803516929035
Epoch: 9, Loss: 0.6759835092576297
Epoch: 10, Loss: 0.6758890129675541
Epoch: 11, Loss: 0.6757967630959896
Epoch: 12, Loss: 0.6757066654495721
Epoch: 13, Loss: 0.6756186310199277
Epoch: 14, Loss: 0.6755325756588885
Epoch: 15, Loss: 0.6754484197755871
Epoch: 16, Loss: 0.6753660880538787
Epoch: 17, Loss: 0.6752855091886518
Epoch: 18, Loss: 0.6752066156397044
Epoch: 19, Loss: 0.675129343401946
Epoch: 20, Loss: 0.6750536317907886
Epoch: 21, Loss: 0.674979423241666
Epoch: 22, Loss: 0.6749066631226991
Epoch: 23, Loss: 0.6748352995595998
Epoch: 24, Loss: 0.6747652832719636
Epoch: 25, Loss: 0.674696567420173
Epoch: 26, Loss: 0.6746291074621784
Epoch: 27, Loss: 0.6745628610194866
Epoch: 28, Loss: 0.6744977877517265
Epoch

In [22]:
#Model Evaluation
with torch.no_grad():
    ypred = model.forward(xtrain_tensor)
    ypred = (ypred > 0.5).float()
    accuracy = (ypred == ytest_tensor).float().mean()
    print(f'Accuracy: {accuracy.item()}')

Accuracy: 0.666955828666687
