<a href="https://colab.research.google.com/github/dongminkim0220/pytorch_tutorial/blob/master/dataloader.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# dataloader

PyTorch Tutorial 09 - Dataset and DataLoader - Batch Training

https://www.youtube.com/watch?v=PXOzkkB5eH0&list=PLqnslRFeH2UrcDBWF5mfPGpqQDSta6VK4&index=10

PyTorch Tutorial 10 - Dataset Transforms

https://www.youtube.com/watch?v=X_QOZEko5uE&list=PLqnslRFeH2UrcDBWF5mfPGpqQDSta6VK4&index=10

In [18]:
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
import numpy as np
import math

In [19]:
file = "/content/drive/MyDrive/pytorch intro/wine.csv"

## dataset

In [20]:
class WineDataset(Dataset):

  def __init__(self):
    # data loading
    xy = np.loadtxt(file, delimiter=',', dtype = np.float32, skiprows = 1)
    self.x = torch.from_numpy(xy[:, 1:])
    self.y = torch.from_numpy(xy[:, [0]]) # n_samples, 1
    self.n_samples = xy.shape[0]

  def __getitem__(self, index):
    # dataset[0]
    return self.x[index], self.y[index]

  def __len__(self):
    # len(dataset)
    return self.n_samples

In [21]:
dataset = WineDataset()

In [22]:
first_data = dataset[0]

In [23]:
features, labels = first_data

In [24]:
features, labels

(tensor([1.4230e+01, 1.7100e+00, 2.4300e+00, 1.5600e+01, 1.2700e+02, 2.8000e+00,
         3.0600e+00, 2.8000e-01, 2.2900e+00, 5.6400e+00, 1.0400e+00, 3.9200e+00,
         1.0650e+03]), tensor([1.]))

## dataloader

In [32]:
dataloader = DataLoader(dataset = dataset, batch_size = 4, shuffle = True, num_workers = 2)

In [33]:
dataiter = iter(dataloader)

In [34]:
data = dataiter.next()

In [35]:
features,labels = data

In [36]:
features, labels

(tensor([[1.3870e+01, 1.9000e+00, 2.8000e+00, 1.9400e+01, 1.0700e+02, 2.9500e+00,
          2.9700e+00, 3.7000e-01, 1.7600e+00, 4.5000e+00, 1.2500e+00, 3.4000e+00,
          9.1500e+02],
         [1.3450e+01, 3.7000e+00, 2.6000e+00, 2.3000e+01, 1.1100e+02, 1.7000e+00,
          9.2000e-01, 4.3000e-01, 1.4600e+00, 1.0680e+01, 8.5000e-01, 1.5600e+00,
          6.9500e+02],
         [1.4370e+01, 1.9500e+00, 2.5000e+00, 1.6800e+01, 1.1300e+02, 3.8500e+00,
          3.4900e+00, 2.4000e-01, 2.1800e+00, 7.8000e+00, 8.6000e-01, 3.4500e+00,
          1.4800e+03],
         [1.3730e+01, 1.5000e+00, 2.7000e+00, 2.2500e+01, 1.0100e+02, 3.0000e+00,
          3.2500e+00, 2.9000e-01, 2.3800e+00, 5.7000e+00, 1.1900e+00, 2.7100e+00,
          1.2850e+03]]), tensor([[1.],
         [3.],
         [1.],
         [1.]]))

## training loop

In [37]:
num_epochs = 2

In [38]:
total_samples = len(dataset)

In [39]:
n_iterations = math.ceil(total_samples/4)

In [40]:
total_samples, n_iterations

(178, 45)

In [44]:
for epoch in range(num_epochs):
  for i, (input, labels) in  enumerate(dataloader):
    # forward, backward, update
    # ...

    # check
    if (i+1) % 5 == 0:
      print(f"epoch {epoch+1}/{num_epochs}, step {i+1}/{n_iterations}, input {input.shape}")

epoch 1/2, step 5/45, input torch.Size([4, 13])
epoch 1/2, step 10/45, input torch.Size([4, 13])
epoch 1/2, step 15/45, input torch.Size([4, 13])
epoch 1/2, step 20/45, input torch.Size([4, 13])
epoch 1/2, step 25/45, input torch.Size([4, 13])
epoch 1/2, step 30/45, input torch.Size([4, 13])
epoch 1/2, step 35/45, input torch.Size([4, 13])
epoch 1/2, step 40/45, input torch.Size([4, 13])
epoch 1/2, step 45/45, input torch.Size([2, 13])
epoch 2/2, step 5/45, input torch.Size([4, 13])
epoch 2/2, step 10/45, input torch.Size([4, 13])
epoch 2/2, step 15/45, input torch.Size([4, 13])
epoch 2/2, step 20/45, input torch.Size([4, 13])
epoch 2/2, step 25/45, input torch.Size([4, 13])
epoch 2/2, step 30/45, input torch.Size([4, 13])
epoch 2/2, step 35/45, input torch.Size([4, 13])
epoch 2/2, step 40/45, input torch.Size([4, 13])
epoch 2/2, step 45/45, input torch.Size([2, 13])


## Transform

In [49]:
class WineDataset(Dataset):

  def __init__(self, transform = None):
    # data loading
    xy = np.loadtxt(file, delimiter=',', dtype = np.float32, skiprows = 1)
    self.x = xy[:, 1:]
    self.y = xy[:, [0]] # n_samples, 1
    self.n_samples = xy.shape[0]
    self.transform = transform

  def __getitem__(self, index):
    sample = self.x[index], self.y[index]
    if self.transform:
      sample = self.transform(sample)
    return sample

  def __len__(self):
    # len(dataset)
    return self.n_samples

In [50]:
class ToTensor:
  def __call__(self, sample):
    inputs, targets = sample
    return torch.from_numpy(inputs), torch.from_numpy(targets)

In [51]:
dataset = WineDataset(transform = ToTensor())

In [53]:
first_data = dataset[0]

In [54]:
features, labels, type(features), type(labels)

(tensor([[1.3870e+01, 1.9000e+00, 2.8000e+00, 1.9400e+01, 1.0700e+02, 2.9500e+00,
          2.9700e+00, 3.7000e-01, 1.7600e+00, 4.5000e+00, 1.2500e+00, 3.4000e+00,
          9.1500e+02],
         [1.3450e+01, 3.7000e+00, 2.6000e+00, 2.3000e+01, 1.1100e+02, 1.7000e+00,
          9.2000e-01, 4.3000e-01, 1.4600e+00, 1.0680e+01, 8.5000e-01, 1.5600e+00,
          6.9500e+02],
         [1.4370e+01, 1.9500e+00, 2.5000e+00, 1.6800e+01, 1.1300e+02, 3.8500e+00,
          3.4900e+00, 2.4000e-01, 2.1800e+00, 7.8000e+00, 8.6000e-01, 3.4500e+00,
          1.4800e+03],
         [1.3730e+01, 1.5000e+00, 2.7000e+00, 2.2500e+01, 1.0100e+02, 3.0000e+00,
          3.2500e+00, 2.9000e-01, 2.3800e+00, 5.7000e+00, 1.1900e+00, 2.7100e+00,
          1.2850e+03]]), tensor([[3.],
         [2.]]), torch.Tensor, torch.Tensor)

In [55]:
class MulTransform:
  def __init__(self, factor):
    self.factor = factor

  def __call__(self, sample):
    inputs, target = sample
    inputs *= self.factor
    return inputs, target

In [57]:
composed = torchvision.transforms.Compose([ToTensor(), MulTransform(2)])

In [58]:
dataset = WineDataset(transform = composed)

In [59]:
first_data = dataset[0]

In [60]:
features, labels = first_data

In [61]:
features, labels, type(features), type(labels)

(tensor([2.8460e+01, 3.4200e+00, 4.8600e+00, 3.1200e+01, 2.5400e+02, 5.6000e+00,
         6.1200e+00, 5.6000e-01, 4.5800e+00, 1.1280e+01, 2.0800e+00, 7.8400e+00,
         2.1300e+03]), tensor([1.]), torch.Tensor, torch.Tensor)