<a href="https://colab.research.google.com/github/ahmedbasemdev/PyTorch-Tutorial/blob/main/08_Dataset_and_DataLoader_Batch_Training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### gradient computation etc. not efficient for whole data set
### -> divide dataset into small batches

'''
### training loop
for epoch in range(num_epochs):
    # loop over all batches
    for i in range(total_batches):
        batch_x, batch_y = ...
'''

### epoch = one forward and backward pass of ALL training samples
### batch_size = number of training samples used in one forward/backward pass
### number of iterations = number of passes, each pass (forward+backward) using [batch_size] number of sampes
### e.g : 100 samples, batch_size=20 -> 100/20=5 iterations for 1 epoch

### --> DataLoader can do the batch computation for us

### Implement a custom Dataset:
### inherit Dataset
### implement __init__ , __getitem__ , and __len__


In [1]:
import torch
import torchvision
from torch.utils.data import Dataset , DataLoader
import numpy as np
import math

In [2]:
class WineDataset(Dataset):

  def __init__(self):
    # data loading
    xy = np.loadtxt('/content/wine.csv',delimiter=',',dtype=np.float32,skiprows=1)

    print(xy.shape)

    self.x = torch.from_numpy(xy[:,1:])

    self.y = torch.from_numpy(xy[:,[0]])

    self.nSamples = self.x.shape[0]

  

  def __getitem__(self,index):
    # dataset[0]
    return self.x[index] , self.y[index]


  def __len__(self):
    ## len(dataset)
    return self.nSamples


In [3]:
dataset = WineDataset()


(178, 14)


In [4]:
firstData = dataset[0]
features, labels = firstData
print(features, labels)

tensor([1.4230e+01, 1.7100e+00, 2.4300e+00, 1.5600e+01, 1.2700e+02, 2.8000e+00,
        3.0600e+00, 2.8000e-01, 2.2900e+00, 5.6400e+00, 1.0400e+00, 3.9200e+00,
        1.0650e+03]) tensor([1.])


In [6]:
dataLoader =DataLoader(
    dataset=dataset,
    batch_size =4,
    shuffle=True,
    num_workers=2,
    
)

In [8]:
dataIter = iter(dataLoader)
data = dataIter.next()
features , labels = data
print(features , labels)

tensor([[1.2450e+01, 3.0300e+00, 2.6400e+00, 2.7000e+01, 9.7000e+01, 1.9000e+00,
         5.8000e-01, 6.3000e-01, 1.1400e+00, 7.5000e+00, 6.7000e-01, 1.7300e+00,
         8.8000e+02],
        [1.4370e+01, 1.9500e+00, 2.5000e+00, 1.6800e+01, 1.1300e+02, 3.8500e+00,
         3.4900e+00, 2.4000e-01, 2.1800e+00, 7.8000e+00, 8.6000e-01, 3.4500e+00,
         1.4800e+03],
        [1.2330e+01, 1.1000e+00, 2.2800e+00, 1.6000e+01, 1.0100e+02, 2.0500e+00,
         1.0900e+00, 6.3000e-01, 4.1000e-01, 3.2700e+00, 1.2500e+00, 1.6700e+00,
         6.8000e+02],
        [1.1790e+01, 2.1300e+00, 2.7800e+00, 2.8500e+01, 9.2000e+01, 2.1300e+00,
         2.2400e+00, 5.8000e-01, 1.7600e+00, 3.0000e+00, 9.7000e-01, 2.4400e+00,
         4.6600e+02]]) tensor([[3.],
        [1.],
        [2.],
        [2.]])


In [13]:
numEpochs = 2
totalSample = len(dataLoader)
nIteration = math.ceil(totalSample/4)


In [14]:
for epoch in range(numEpochs):
  for i , (data,label) in enumerate(dataLoader):
    if (i + 1 ) % 5 == 0:
      print(f"epoch is {epoch} / {numEpochs} , step {i+1} / {nIteration}")

epoch is 0 / 2 , step 5 / 12
epoch is 0 / 2 , step 10 / 12
epoch is 0 / 2 , step 15 / 12
epoch is 0 / 2 , step 20 / 12
epoch is 0 / 2 , step 25 / 12
epoch is 0 / 2 , step 30 / 12
epoch is 0 / 2 , step 35 / 12
epoch is 0 / 2 , step 40 / 12
epoch is 0 / 2 , step 45 / 12
epoch is 1 / 2 , step 5 / 12
epoch is 1 / 2 , step 10 / 12
epoch is 1 / 2 , step 15 / 12
epoch is 1 / 2 , step 20 / 12
epoch is 1 / 2 , step 25 / 12
epoch is 1 / 2 , step 30 / 12
epoch is 1 / 2 , step 35 / 12
epoch is 1 / 2 , step 40 / 12
epoch is 1 / 2 , step 45 / 12
