<a href="https://colab.research.google.com/github/jay-kanakia/GenAI/blob/main/05_pytorch_training_pipeline_using_dataset_and_dataloader.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Sample Code

In [1]:
from sklearn.datasets import make_classification
import numpy as np

In [2]:
X,y=make_classification(n_samples=10,n_informative=2,n_redundant=0,n_repeated=0,n_features=2,n_classes=2)

In [3]:
from torch.utils.data import Dataset,DataLoader

In [4]:
class MyDataset(Dataset):

  def __init__(self,features,labels):
    self.features=features
    self.labels=labels

  def __len__(self):
    return self.features.shape[0]

  def __getitem__(self,index):
    return self.features[index],self.labels[index]

In [5]:
dataset=MyDataset(X,y)

In [6]:
len(X)

10

In [7]:
dataloader=DataLoader(dataset,batch_size=2,shuffle=True)
import time

for batch_features,batch_labels in dataloader:
  print(batch_features,batch_labels)
  print('*'*50)
  time.sleep(2)

tensor([[-1.2985,  0.6713],
        [-1.0986, -0.8397]], dtype=torch.float64) tensor([1, 0])
**************************************************
tensor([[ 1.9800, -2.4476],
        [ 0.4317,  2.1175]], dtype=torch.float64) tensor([0, 1])
**************************************************
tensor([[ 1.6077,  0.3643],
        [-1.1971, -0.5124]], dtype=torch.float64) tensor([1, 0])
**************************************************
tensor([[ 0.7658,  1.2255],
        [ 1.4716, -0.1528]], dtype=torch.float64) tensor([1, 0])
**************************************************
tensor([[-0.7253, -1.1881],
        [-0.7245,  1.2705]], dtype=torch.float64) tensor([0, 1])
**************************************************


# Actual dataset example

In [8]:
import torch
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler,LabelEncoder
from sklearn.model_selection import train_test_split

In [9]:
df = pd.read_csv('https://raw.githubusercontent.com/gscdit/Breast-Cancer-Detection/refs/heads/master/data.csv')
df.head()

Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,...,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst,Unnamed: 32
0,842302,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,
1,842517,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,
2,84300903,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,
3,84348301,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,
4,84358402,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,


In [10]:
df.drop(columns=['id','Unnamed: 32'],inplace=True)

In [11]:
X=df.iloc[:,1:]
y=df.iloc[:,0]

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

ss=StandardScaler()
X_train_scaled=ss.fit_transform(X_train)
X_test_scaled=ss.transform(X_test)

le=LabelEncoder()
y_train_transformed=le.fit_transform(y_train)
y_test_transformed=le.transform(y_test)

X_train_tensor=torch.from_numpy(X_train_scaled.astype(np.float32))
X_test_tensor=torch.from_numpy(X_test_scaled.astype(np.float32))
y_train_tensor=torch.from_numpy(y_train_transformed.astype(np.float32))
y_test_tensor=torch.from_numpy(y_test_transformed.astype(np.float32))

In [21]:
from torch.utils.data import Dataset,DataLoader

class MyDataset(Dataset):

  def __init__(self,features,labels):
    self.features=features
    self.labels=labels

  def __len__(self):
    return self.features.shape[0]

  def __getitem__(self,index):
    return self.features[index],self.labels[index]

In [22]:
train_dataset=MyDataset(X_train_tensor,y_train_tensor)
test_dataset=MyDataset(X_test_tensor,y_test_tensor)

In [23]:
train_dataset[0]

(tensor([-1.4408, -0.4353, -1.3621, -1.1391,  0.7806,  0.7189,  2.8231, -0.1191,
          1.0927,  2.4582, -0.2638, -0.0161, -0.4704, -0.4748,  0.8384,  3.2510,
          8.4389,  3.3920,  2.6212,  2.0612, -1.2329, -0.4763, -1.2479, -0.9740,
          0.7229,  1.1867,  4.6728,  0.9320,  2.0972,  1.8865]),
 tensor(0.))

In [24]:
train_loader=DataLoader(train_dataset,batch_size=32,shuffle=True)
test_loader=DataLoader(test_dataset,batch_size=32,shuffle=True)

In [25]:
import torch.nn as nn

class MyNN(nn.Module):

  def __init__(self,num_features):

    super().__init__()

    self.num_features=num_features
    self.linear=nn.Linear(self.num_features,1)
    self.sigmoid=nn.Sigmoid()

  def forward(self,features):
    out=self.linear(features)
    out=self.sigmoid(out)
    return out

In [26]:
epochs=200
learning_rate=0.01
model=MyNN(X_train_tensor.shape[1])

In [27]:
loss_function=nn.BCELoss()
optimizer=torch.optim.Adam(model.parameters(),lr=learning_rate)

In [40]:
for epoch in range(epochs):
  for batch_features,batch_labels in train_loader:
    y_pred=model(batch_features)
    loss=loss_function(y_pred,batch_labels.unsqueeze(1))
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

  print(f"Epochs is {epoch+1} and loss is {loss.item()}")

Epochs is 1 and loss is 0.026831958442926407
Epochs is 2 and loss is 0.10342616587877274
Epochs is 3 and loss is 0.11872636526823044
Epochs is 4 and loss is 0.0035836815368384123
Epochs is 5 and loss is 0.03699813410639763
Epochs is 6 and loss is 0.12599165737628937
Epochs is 7 and loss is 0.0036394840572029352
Epochs is 8 and loss is 0.10083888471126556
Epochs is 9 and loss is 0.019582325592637062
Epochs is 10 and loss is 0.2994533181190491
Epochs is 11 and loss is 0.0005359628703445196
Epochs is 12 and loss is 0.007205795031040907
Epochs is 13 and loss is 0.01292070746421814
Epochs is 14 and loss is 0.03688644617795944
Epochs is 15 and loss is 0.01588229089975357
Epochs is 16 and loss is 0.009201200678944588
Epochs is 17 and loss is 0.04332365840673447
Epochs is 18 and loss is 0.0026877897325903177
Epochs is 19 and loss is 0.009896080009639263
Epochs is 20 and loss is 0.0006761277327314019
Epochs is 21 and loss is 0.1512884646654129
Epochs is 22 and loss is 9.091789979720488e-05
Epoc

In [43]:
# evaluation

model.eval()
accuracy_list=[]
with torch.no_grad():
  for batch_features,batch_labels in test_loader:
    y_pred=model(batch_features)
    y_pred=torch.round(y_pred)
    accuracy=(y_pred==batch_labels).float().mean()
    accuracy_list.append(accuracy)
  #Calculate overall accuracy
  overall_accuracy = sum(accuracy_list) / len(accuracy_list)
  print(f'Accuracy: {overall_accuracy:.4f}')

Accuracy: 0.5347
