In [1]:
import torch

In [2]:
import torch.nn as nn

In [3]:
class Model(nn.Module):
    def __init__(self,new_features):
      super().__init__()
      self.linear = nn.Linear(new_features,1) # helps to create input and hidden layer corresponding to size of new_features
      self.sigmoid = nn.Sigmoid() # helps to create an output layer

    def forward(self,features):
      out = self.linear(features)
      out = self.sigmoid(out)
      return out

In [4]:
import torch
features = torch.rand(10,5)

In [5]:
features.shape

torch.Size([10, 5])

In [6]:
features.shape[0],features.shape[1]

(10, 5)

In [7]:
# Linear equation for forward propogation --> z = wx + b
model = Model(features.shape[1])

In [8]:
class test:
  def __init__(self):
    print("hello from init")
  def __call__(self):
    print("hello from call")
  def testing(self):
    print("hello from testing")

In [9]:
obj = test()

hello from init


In [10]:
obj.testing()

hello from testing


In [11]:
# Calling obj as function
obj()

hello from call


In [12]:
model(features)

tensor([[0.5029],
        [0.4982],
        [0.4984],
        [0.5263],
        [0.5767],
        [0.5309],
        [0.5310],
        [0.5300],
        [0.4940],
        [0.5219]], grad_fn=<SigmoidBackward0>)

In [13]:
model.forward(features) ## model.forward(features) == model(features)

tensor([[0.5029],
        [0.4982],
        [0.4984],
        [0.5263],
        [0.5767],
        [0.5309],
        [0.5310],
        [0.5300],
        [0.4940],
        [0.5219]], grad_fn=<SigmoidBackward0>)

In [14]:
model.linear.weight

Parameter containing:
tensor([[-0.0972,  0.3315,  0.2108,  0.0216, -0.4305]], requires_grad=True)

In [15]:
model.linear.bias

Parameter containing:
tensor([0.0672], requires_grad=True)

## weights and biases in linear model
- For 5 features, 5 weights are there and 1 bias --> Total of 6 parameters

In [16]:
class Model(nn.Module):
    def __init__(self,new_features):
      super().__init__()
      self.linear1 = nn.Linear(new_features,3) # helps to create input and hidden layer corresponding to size of new_features
      self.relu = nn.ReLU() # helps to create an activation function
      self.linear2 = nn.Linear(3,1)
      self.sigmoid = nn.Sigmoid() # helps to create an output layer

    def forward(self,features):
      out = self.linear1(features)
      out = self.relu(out)
      out = self.linear2(out)
      out = self.sigmoid(out)
      return out

In [17]:
data = torch.rand(10,5)

In [18]:
data

tensor([[0.5812, 0.1530, 0.7253, 0.5753, 0.6457],
        [0.1808, 0.4925, 0.2328, 0.1168, 0.0906],
        [0.5206, 0.9882, 0.6517, 0.8029, 0.6165],
        [0.4290, 0.5016, 0.4701, 0.5423, 0.2789],
        [0.5566, 0.5590, 0.6784, 0.5159, 0.1194],
        [0.7076, 0.7351, 0.0520, 0.9652, 0.6272],
        [0.3824, 0.3950, 0.1907, 0.4750, 0.1034],
        [0.4766, 0.0055, 0.0916, 0.2558, 0.0015],
        [0.4556, 0.6372, 0.9327, 0.5509, 0.3270],
        [0.7228, 0.5062, 0.4050, 0.9214, 0.9283]])

In [19]:
model = Model(data.shape[1]) # data.shape[1]

In [20]:
model.forward(data)

tensor([[0.5007],
        [0.5017],
        [0.5011],
        [0.5009],
        [0.5014],
        [0.5007],
        [0.5010],
        [0.5010],
        [0.5011],
        [0.5007]], grad_fn=<SigmoidBackward0>)

In [21]:
model(data) # for every there is 1 weight --> here, no. of rows = 10

tensor([[0.5007],
        [0.5017],
        [0.5011],
        [0.5009],
        [0.5014],
        [0.5007],
        [0.5010],
        [0.5010],
        [0.5011],
        [0.5007]], grad_fn=<SigmoidBackward0>)

In [22]:
model.linear1.weight # as linear1 have (5,3) --> 5x3 = 15

Parameter containing:
tensor([[-0.1389, -0.3953, -0.1529, -0.2947,  0.0845],
        [ 0.3313, -0.2882, -0.1911,  0.0433,  0.2933],
        [ 0.2863,  0.3379, -0.0345, -0.4287, -0.1247]], requires_grad=True)

In [23]:
model.linear1.bias

Parameter containing:
tensor([-0.0804, -0.3447,  0.0446], requires_grad=True)

In [24]:
model.linear2.weight

Parameter containing:
tensor([[0.5390, 0.4300, 0.0215]], requires_grad=True)

In [25]:
model.linear2.bias

Parameter containing:
tensor([0.0026], requires_grad=True)

## Data --> shape(10,5)
- linear1(5,3) --> no.of weights = 5 and no. of biases = 3, total_no of parameters = 5x3 + 3 = 18
- linear2(3,1) --> no.of weights = 3 and no. of biases = 1, total_no of parameters = 3x1 + 1 = 4

In [57]:
# create model class
# Using Sequential function here, so, we don't have to write the whole parameters again in the forward method, we can call the variable where the sequential parameter is stored
import torch
import torch.nn as nn

class MySimpleNN(nn.Module):
  def __init__(self, num_features):
    super().__init__()
    self.network = nn.Sequential(
        nn.Linear(num_features, 3),
        nn.ReLU(),
        nn.Linear(3, 1),
        nn.Sigmoid()
    )
  def forward(self, features):
    out = self.network(features)
    return out

In [27]:
data = torch.rand(10,5)

In [58]:
model1 = MySimpleNN(data.shape[1]) # n_features = 5

In [59]:
model1(data)

tensor([[0.5243],
        [0.5207],
        [0.4928],
        [0.5251],
        [0.5269],
        [0.5219],
        [0.5251],
        [0.5261],
        [0.5192],
        [0.5277]], grad_fn=<SigmoidBackward0>)

In [60]:
!pip install torchinfo



In [61]:
# Summary
from torchinfo import summary

In [62]:
summary(model1,input_size=(10,5))

Layer (type:depth-idx)                   Output Shape              Param #
MySimpleNN                               [10, 1]                   --
├─Sequential: 1-1                        [10, 1]                   --
│    └─Linear: 2-1                       [10, 3]                   18
│    └─ReLU: 2-2                         [10, 3]                   --
│    └─Linear: 2-3                       [10, 1]                   4
│    └─Sigmoid: 2-4                      [10, 1]                   --
Total params: 22
Trainable params: 22
Non-trainable params: 0
Total mult-adds (M): 0.00
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00

In [33]:
import pandas as pd
df = pd.read_csv("https://raw.githubusercontent.com/gscdit/Breast-Cancer-Detection/refs/heads/master/data.csv")

In [63]:
df.head()

Unnamed: 0,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


In [64]:
df["diagnosis"].value_counts()

diagnosis
B    357
M    212
Name: count, dtype: int64

In [65]:
df.shape

(569, 31)

In [66]:
df.columns

Index(['diagnosis', 'radius_mean', 'texture_mean', 'perimeter_mean',
       'area_mean', 'smoothness_mean', 'compactness_mean', 'concavity_mean',
       'concave points_mean', 'symmetry_mean', 'fractal_dimension_mean',
       'radius_se', 'texture_se', 'perimeter_se', 'area_se', 'smoothness_se',
       'compactness_se', 'concavity_se', 'concave points_se', 'symmetry_se',
       'fractal_dimension_se', 'radius_worst', 'texture_worst',
       'perimeter_worst', 'area_worst', 'smoothness_worst',
       'compactness_worst', 'concavity_worst', 'concave points_worst',
       'symmetry_worst', 'fractal_dimension_worst'],
      dtype='object')

In [67]:
df = df.drop(columns=["id","Unnamed: 32"])

KeyError: "['id', 'Unnamed: 32'] not found in axis"

In [None]:
df.head()

Unnamed: 0,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


In [None]:
df["diagnosis"].value_counts() # B -> Benign and M -> Maligant

diagnosis
B    357
M    212
Name: count, dtype: int64

In [None]:
X = df.drop(columns=["diagnosis"]) # df.iloc[:,1]
y = df["diagnosis"] # df.iloc[:,0]

In [None]:
X.head(2)

Unnamed: 0,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,fractal_dimension_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902


In [None]:
y.head(2)

0    M
1    M
Name: diagnosis, dtype: object

In [None]:
import numpy as np
import pandas as pd
import torch
from sklearn.model_selection import  train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42)

In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

encoder = LabelEncoder()
y_train = encoder.fit_transform(y_train)
y_test = encoder.transform(y_test)

In [None]:
X_train

array([[-1.44075296, -0.43531947, -1.36208497, ...,  0.9320124 ,
         2.09724217,  1.88645014],
       [ 1.97409619,  1.73302577,  2.09167167, ...,  2.6989469 ,
         1.89116053,  2.49783848],
       [-1.39998202, -1.24962228, -1.34520926, ..., -0.97023893,
         0.59760192,  0.0578942 ],
       ...,
       [ 0.04880192, -0.55500086, -0.06512547, ..., -1.23903365,
        -0.70863864, -1.27145475],
       [-0.03896885,  0.10207345, -0.03137406, ...,  1.05001236,
         0.43432185,  1.21336207],
       [-0.54860557,  0.31327591, -0.60350155, ..., -0.61102866,
        -0.3345212 , -0.84628745]], shape=(455, 30))

In [None]:
y_test

array([0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0,
       1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0,
       1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0,
       1, 0, 0, 1])

In [68]:
# Converting normal arrays into tensors
X_train_tensor = torch.from_numpy(X_train.astype(np.float32))
X_test_tensor = torch.from_numpy(X_test.astype(np.float32))
y_train_tensor = torch.from_numpy(y_train.astype(np.float32))
y_test_tensor = torch.from_numpy(y_test.astype(np.float32))


In [69]:
X_train_tensor

tensor([[-1.4408, -0.4353, -1.3621,  ...,  0.9320,  2.0972,  1.8865],
        [ 1.9741,  1.7330,  2.0917,  ...,  2.6989,  1.8912,  2.4978],
        [-1.4000, -1.2496, -1.3452,  ..., -0.9702,  0.5976,  0.0579],
        ...,
        [ 0.0488, -0.5550, -0.0651,  ..., -1.2390, -0.7086, -1.2715],
        [-0.0390,  0.1021, -0.0314,  ...,  1.0500,  0.4343,  1.2134],
        [-0.5486,  0.3133, -0.6035,  ..., -0.6110, -0.3345, -0.8463]])

In [70]:
type(X_train), type(X_train_tensor)

(numpy.ndarray, torch.Tensor)

In [71]:
df.shape

(569, 31)

In [72]:
X_train.shape

(455, 30)

In [73]:
y_train.shape

(455,)

In [87]:
# Hyperparameter 
learning_rate = 0.1
epochs = 20 # one forward and one backward propogation

In [83]:
loss_function = nn.BCELoss()

In [84]:
model = MySimpleNN(X_train.shape[1])

In [85]:
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [None]:
for epoch in range(epochs):
    # forward pass
    y_pred = model(X_train_tensor)
    
    # loss calculation 
    loss = loss_function(y_pred, y_train_tensor.view(-1,1)) # converting the normal tensor into the 1D Tensor

    # clear gradients 
    optimizer.zero_grad() # after every forward and backward propogation

    loss.backward()
    
    # parameters update 
    optimizer.step()

    # print loss in each epoch 
    print(f"Epoch : {epoch + 1}, Loss : {loss.item()}")

Epoch : 1, Loss : 0.4209081530570984
Epoch : 2, Loss : 0.41453319787979126
Epoch : 3, Loss : 0.40830197930336
Epoch : 4, Loss : 0.40221652388572693
Epoch : 5, Loss : 0.3962653577327728
Epoch : 6, Loss : 0.3904326260089874
Epoch : 7, Loss : 0.3847271203994751
Epoch : 8, Loss : 0.3790929913520813
Epoch : 9, Loss : 0.37355881929397583
Epoch : 10, Loss : 0.3681110739707947
Epoch : 11, Loss : 0.362729012966156
Epoch : 12, Loss : 0.35736775398254395
Epoch : 13, Loss : 0.35207268595695496
Epoch : 14, Loss : 0.34684059023857117
Epoch : 15, Loss : 0.341668039560318
Epoch : 16, Loss : 0.33652058243751526
Epoch : 17, Loss : 0.3313944935798645
Epoch : 18, Loss : 0.32629677653312683
Epoch : 19, Loss : 0.3212333917617798
Epoch : 20, Loss : 0.3162032663822174


In [90]:
with torch.no_grad():
    model.forward(X_test_tensor)
    y_pred = (y_pred>0.5).float()
    accuracy = (y_pred == y_test_tensor).float().mean()
    print(f"Accuracy : {accuracy.item()}")

Accuracy : 0.541835367679596


In [91]:
model.forward(X_test_tensor[1])

tensor([0.9144], grad_fn=<SigmoidBackward0>)

In [92]:
y_pred = model.forward(X_test_tensor[1])

In [94]:
print(y_pred)

tensor([0.9144], grad_fn=<SigmoidBackward0>)


In [97]:
print((y_pred>0.5).float())

tensor([1.])


# DataLoader and DataSet

- Dataset -> Intialization of data
- DataLoader -> Batches of data

In [99]:
from sklearn.datasets import make_classification
import torch

In [100]:
# Create a synthetic classification dataset
# state = 42 --> for reproducibility
X, y = make_classification(
    n_samples=10, # No. of samples 
    n_features = 2, # Number of features 
    n_informative = 2, # Number of informative features 
    n_redundant = 0, # No. of reducndant features, n_features should be (>) greater than the n_redundant
    n_classes = 2, # No. of classes
    random_state=42
)

In [101]:
X

array([[ 1.06833894, -0.97007347],
       [-1.14021544, -0.83879234],
       [-2.8953973 ,  1.97686236],
       [-0.72063436, -0.96059253],
       [-1.96287438, -0.99225135],
       [-0.9382051 , -0.54304815],
       [ 1.72725924, -1.18582677],
       [ 1.77736657,  1.51157598],
       [ 1.89969252,  0.83444483],
       [-0.58723065, -1.97171753]])

In [102]:
y

array([1, 0, 0, 0, 0, 1, 1, 1, 1, 0])

In [104]:
# Convert the data into pytorch tensors 
X = torch.tensor(X, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.long)

  X = torch.tensor(X, dtype=torch.float32)
  y = torch.tensor(y, dtype=torch.long)


In [105]:
X

tensor([[ 1.0683, -0.9701],
        [-1.1402, -0.8388],
        [-2.8954,  1.9769],
        [-0.7206, -0.9606],
        [-1.9629, -0.9923],
        [-0.9382, -0.5430],
        [ 1.7273, -1.1858],
        [ 1.7774,  1.5116],
        [ 1.8997,  0.8344],
        [-0.5872, -1.9717]])

In [107]:
y

tensor([1, 0, 0, 0, 0, 1, 1, 1, 1, 0])

In [112]:
features.shape

torch.Size([10, 5])

In [108]:
from torch.utils.data import Dataset, DataLoader

In [116]:
class CustomDataset(Dataset):
    def __init__(self,features,labels):
        self.features = features 
        self.labels = labels 

    def __len__(self):
        return self.features.shape[0] # No. of datapoints in the dataset
    def __getitem__(self,index):
        return self.features[index], self.labels[index]

In [117]:
dataset = CustomDataset(X,y)

In [118]:
len(dataset)

10

In [None]:
dataset[2] ## __getitem__

(tensor([-2.8954,  1.9769]), tensor(0))

In [121]:
dataloader = DataLoader(dataset,batch_size=2,shuffle=False)

In [122]:
for batch_features, batch_labels in dataloader:
    print(batch_features)
    print(batch_labels)
    print("-"*50)

tensor([[ 1.0683, -0.9701],
        [-1.1402, -0.8388]])
tensor([1, 0])
--------------------------------------------------
tensor([[-2.8954,  1.9769],
        [-0.7206, -0.9606]])
tensor([0, 0])
--------------------------------------------------
tensor([[-1.9629, -0.9923],
        [-0.9382, -0.5430]])
tensor([0, 1])
--------------------------------------------------
tensor([[ 1.7273, -1.1858],
        [ 1.7774,  1.5116]])
tensor([1, 1])
--------------------------------------------------
tensor([[ 1.8997,  0.8344],
        [-0.5872, -1.9717]])
tensor([1, 0])
--------------------------------------------------
