- Calculating gradients on entire training samples is computationally expensive.
- So it's better to calculate gradients over a small sample or batches of training samples in each epoch.
- This can be done using Dataset Loader in pytorch

In [2]:
import torch
from torch.utils.data import Dataset,DataLoader
import numpy as np

In [28]:
class WineDataset(Dataset):
    
    def __init__(self):
        xy=np.loadtxt(fname="wine.csv",dtype=np.float32,delimiter=",",skiprows=1)
        self.x = torch.from_numpy(xy[:,1:]) # 1st column is target column, so we are skipping it
        self.y = torch.from_numpy(xy[:,[0]]) # returns features in list of list [[1],[2],[3]]. torch need data in this format
        self.n_samples=xy.shape[0]
        #print(self.y[:10])
    
    def __getitem__(self, index):
        #dataset[0]
        return self.x[index],self.y[index]
    
    def __len__(self):
        return self.n_samples

In [29]:
# Create a Dataset Object
dataset= WineDataset()
features,labels=dataset[0]
print(features,labels)
print(f'No of samples is {len(dataset)}')

tensor([1.4230e+01, 1.7100e+00, 2.4300e+00, 1.5600e+01, 1.2700e+02, 2.8000e+00,
        3.0600e+00, 2.8000e-01, 2.2900e+00, 5.6400e+00, 1.0400e+00, 3.9200e+00,
        1.0650e+03]) tensor([1.])
No of samples is 178


In [30]:
# Creating Batches
data = DataLoader(WineDataset(),batch_size=4,shuffle=True)
print(f'Total Batches:{len(data)}')
for features,label in data:
    print(features)
    print(label)
    break

# Each batch contains 4 samples, such that there are 45 batches

Total Batches:45
tensor([[1.1840e+01, 8.9000e-01, 2.5800e+00, 1.8000e+01, 9.4000e+01, 2.2000e+00,
         2.2100e+00, 2.2000e-01, 2.3500e+00, 3.0500e+00, 7.9000e-01, 3.0800e+00,
         5.2000e+02],
        [1.2040e+01, 4.3000e+00, 2.3800e+00, 2.2000e+01, 8.0000e+01, 2.1000e+00,
         1.7500e+00, 4.2000e-01, 1.3500e+00, 2.6000e+00, 7.9000e-01, 2.5700e+00,
         5.8000e+02],
        [1.2690e+01, 1.5300e+00, 2.2600e+00, 2.0700e+01, 8.0000e+01, 1.3800e+00,
         1.4600e+00, 5.8000e-01, 1.6200e+00, 3.0500e+00, 9.6000e-01, 2.0600e+00,
         4.9500e+02],
        [1.1960e+01, 1.0900e+00, 2.3000e+00, 2.1000e+01, 1.0100e+02, 3.3800e+00,
         2.1400e+00, 1.3000e-01, 1.6500e+00, 3.2100e+00, 9.9000e-01, 3.1300e+00,
         8.8600e+02]])
tensor([[2.],
        [2.],
        [2.],
        [2.]])


In [31]:
import math
num_epochs=2
num_iterations = math.ceil(len(dataset)/4)

for epoch in range(num_epochs):
    
    for i,[feature,label] in enumerate(data):
        
        if (i+1)%5==0: #For every 5th iteration
        
            print(f'epoch {epoch+1} batch {i+1}')

epoch 1 batch 5
epoch 1 batch 10
epoch 1 batch 15
epoch 1 batch 20
epoch 1 batch 25
epoch 1 batch 30
epoch 1 batch 35
epoch 1 batch 40
epoch 1 batch 45
epoch 2 batch 5
epoch 2 batch 10
epoch 2 batch 15
epoch 2 batch 20
epoch 2 batch 25
epoch 2 batch 30
epoch 2 batch 35
epoch 2 batch 40
epoch 2 batch 45
