# Dataset and Dataloaders

> Datasets and Dataloaders


- skip_showdoc: true
- skip_exec: true

In [None]:
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
import numpy as np
import math

class WineDataset(Dataset):
    def __init__(self):
        #data loading
        xy = np.loadtxt('Data/wine.csv', delimiter=",", dtype=np.float32, skiprows = 1)
        self.xy = xy
        self.x = torch.from_numpy(xy[:,1:])
        self.y = torch.from_numpy(xy[:,[0]])
        self.n_samples = xy.shape[0]

    def __getitem__(self, index):
        return self.x[index], self.y[index]

    def __len__(self):
        return self.n_samples

In [None]:
dataset = WineDataset()


In [None]:
first_data = dataset[0]
first_data

(tensor([1.4230e+01, 1.7100e+00, 2.4300e+00, 1.5600e+01, 1.2700e+02, 2.8000e+00,
         3.0600e+00, 2.8000e-01, 2.2900e+00, 5.6400e+00, 1.0400e+00, 3.9200e+00,
         1.0650e+03]),
 tensor([1.]))

In [None]:
features, labels = dataset[0]
features, labels

(tensor([1.4230e+01, 1.7100e+00, 2.4300e+00, 1.5600e+01, 1.2700e+02, 2.8000e+00,
         3.0600e+00, 2.8000e-01, 2.2900e+00, 5.6400e+00, 1.0400e+00, 3.9200e+00,
         1.0650e+03]),
 tensor([1.]))

In [None]:
dataloader = DataLoader(dataset=dataset, batch_size = 4, shuffle = True, num_workers = 2)

In [None]:
dataiter = iter(dataloader)

In [None]:
next(dataiter)

[tensor([[1.4020e+01, 1.6800e+00, 2.2100e+00, 1.6000e+01, 9.6000e+01, 2.6500e+00,
          2.3300e+00, 2.6000e-01, 1.9800e+00, 4.7000e+00, 1.0400e+00, 3.5900e+00,
          1.0350e+03],
         [1.2600e+01, 2.4600e+00, 2.2000e+00, 1.8500e+01, 9.4000e+01, 1.6200e+00,
          6.6000e-01, 6.3000e-01, 9.4000e-01, 7.1000e+00, 7.3000e-01, 1.5800e+00,
          6.9500e+02],
         [1.2790e+01, 2.6700e+00, 2.4800e+00, 2.2000e+01, 1.1200e+02, 1.4800e+00,
          1.3600e+00, 2.4000e-01, 1.2600e+00, 1.0800e+01, 4.8000e-01, 1.4700e+00,
          4.8000e+02],
         [1.2080e+01, 1.8300e+00, 2.3200e+00, 1.8500e+01, 8.1000e+01, 1.6000e+00,
          1.5000e+00, 5.2000e-01, 1.6400e+00, 2.4000e+00, 1.0800e+00, 2.2700e+00,
          4.8000e+02]]),
 tensor([[1.],
         [3.],
         [3.],
         [2.]])]

In [None]:
dataiter = iter(dataloader)

In [None]:
data = next(dataiter)
features, labels = data

In [None]:
features, labels 

(tensor([[1.4200e+01, 1.7600e+00, 2.4500e+00, 1.5200e+01, 1.1200e+02, 3.2700e+00,
          3.3900e+00, 3.4000e-01, 1.9700e+00, 6.7500e+00, 1.0500e+00, 2.8500e+00,
          1.4500e+03],
         [1.2170e+01, 1.4500e+00, 2.5300e+00, 1.9000e+01, 1.0400e+02, 1.8900e+00,
          1.7500e+00, 4.5000e-01, 1.0300e+00, 2.9500e+00, 1.4500e+00, 2.2300e+00,
          3.5500e+02],
         [1.3400e+01, 3.9100e+00, 2.4800e+00, 2.3000e+01, 1.0200e+02, 1.8000e+00,
          7.5000e-01, 4.3000e-01, 1.4100e+00, 7.3000e+00, 7.0000e-01, 1.5600e+00,
          7.5000e+02],
         [1.4390e+01, 1.8700e+00, 2.4500e+00, 1.4600e+01, 9.6000e+01, 2.5000e+00,
          2.5200e+00, 3.0000e-01, 1.9800e+00, 5.2500e+00, 1.0200e+00, 3.5800e+00,
          1.2900e+03]]),
 tensor([[1.],
         [2.],
         [3.],
         [1.]]))

In [None]:
num_epochs = 2
total_samples = len(dataset)
n_iterations = math.ceil(total_samples/4)

total_samples, n_iterations

(178, 45)

In [None]:
for epoch in range(num_epochs):
    for i, (inputs, labels) in enumerate(dataloader):
        if (i + 1) % 5 == 0:
            print(f'epoch {epoch + 1}/{num_epochs}, step {i+1}/{n_iterations}, inputs:{inputs[0][:5]} labels:{labels[0]}')

epoch 1/2, step 5/45, inputs:tensor([13.1100,  1.0100,  1.7000, 15.0000, 78.0000]) labels:tensor([2.])
epoch 1/2, step 10/45, inputs:tensor([12.0800,  1.8300,  2.3200, 18.5000, 81.0000]) labels:tensor([2.])
epoch 1/2, step 15/45, inputs:tensor([ 12.4700,   1.5200,   2.2000,  19.0000, 162.0000]) labels:tensor([2.])
epoch 1/2, step 20/45, inputs:tensor([ 13.6800,   1.8300,   2.3600,  17.2000, 104.0000]) labels:tensor([1.])
epoch 1/2, step 25/45, inputs:tensor([12.3700,  0.9400,  1.3600, 10.6000, 88.0000]) labels:tensor([2.])
epoch 1/2, step 30/45, inputs:tensor([12.8200,  3.3700,  2.3000, 19.5000, 88.0000]) labels:tensor([3.])
epoch 1/2, step 35/45, inputs:tensor([ 13.5800,   2.5800,   2.6900,  24.5000, 105.0000]) labels:tensor([3.])
epoch 1/2, step 40/45, inputs:tensor([ 13.9400,   1.7300,   2.2700,  17.4000, 108.0000]) labels:tensor([1.])
epoch 1/2, step 45/45, inputs:tensor([ 13.6400,   3.1000,   2.5600,  15.2000, 116.0000]) labels:tensor([1.])
epoch 2/2, step 5/45, inputs:tensor([13.

## Dataset Transform

### Types of Transform:

#### On Images:
> CenterCrop, Grayscale, Pad, RandomAffine RandomCrop, RandomHorizontalFlip, RandomRotation Resize, Scale

#### On Tensors:
> LinearTransformation, Normalize, RandomErasing

#### Conversion:
> ToPILImage: from tensor or ndarray

> ToTensor: from numpy.ndarray or PIL Image

In [None]:
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
import numpy as np
import math

class WineDataset(Dataset):
    def __init__(self, transform = None):
        #data loading
        xy = np.loadtxt('Data/wine.csv', delimiter=",", dtype=np.float32, skiprows = 1)
        self.xy = xy
        self.x = xy[:,1:]
        self.y = xy[:,[0]]
        self.n_samples = xy.shape[0]
        self.transform = transform

    def __getitem__(self, index):
        sample = self.x[index], self.y[index]

        if self.transform:
            sample = self.transform(sample)

        return sample

    def __len__(self):
        return self.n_samples

In [None]:
class ToTensor():
    def __call__(self, sample):
        inputs, targets  = sample
        return torch.from_numpy(inputs), torch.from_numpy(targets)

class MulTransform:
    def __init__(self, factor):
        self.factor = factor

    def __call__(self, sample):
        inputs, target = sample
        inputs *= self.factor
        return inputs, target
        

In [None]:
composed = torchvision.transforms.Compose([ToTensor(), MulTransform(2)])

In [None]:
dataset = WineDataset(transform = composed)


In [None]:
first_data = dataset[0]
first_data

(tensor([2.8460e+01, 3.4200e+00, 4.8600e+00, 3.1200e+01, 2.5400e+02, 5.6000e+00,
         6.1200e+00, 5.6000e-01, 4.5800e+00, 1.1280e+01, 2.0800e+00, 7.8400e+00,
         2.1300e+03]),
 tensor([1.]))

In [None]:
features, labels = dataset[0]
features, labels

(tensor([5.6920e+01, 6.8400e+00, 9.7200e+00, 6.2400e+01, 5.0800e+02, 1.1200e+01,
         1.2240e+01, 1.1200e+00, 9.1600e+00, 2.2560e+01, 4.1600e+00, 1.5680e+01,
         4.2600e+03]),
 tensor([1.]))

In [None]:
dataloader = DataLoader(dataset=dataset, batch_size = 4, shuffle = True, num_workers = 2)

In [None]:
dataiter = iter(dataloader)

In [None]:
next(dataiter)

[tensor([[2.4660e+01, 1.9800e+00, 3.9000e+00, 2.9600e+01, 2.7200e+02, 3.8000e+00,
          3.7000e+00, 7.0000e-01, 5.5200e+00, 6.8000e+00, 2.1200e+00, 4.6200e+00,
          1.5000e+03],
         [2.7000e+01, 6.2400e+00, 5.2400e+00, 4.8000e+01, 2.4600e+02, 2.8000e+00,
          3.1400e+00, 4.4000e-01, 2.5000e+00, 1.7200e+01, 1.1800e+00, 2.6000e+00,
          1.0000e+03],
         [2.8200e+01, 4.0400e+00, 4.8000e+00, 3.7600e+01, 2.0600e+02, 5.5000e+00,
          5.8400e+00, 6.4000e-01, 4.7600e+00, 1.2400e+01, 2.1400e+00, 5.5000e+00,
          2.1200e+03],
         [2.4900e+01, 6.0600e+00, 5.2800e+00, 5.4000e+01, 1.9400e+02, 3.8000e+00,
          1.1600e+00, 1.2600e+00, 2.2800e+00, 1.5000e+01, 1.3400e+00, 3.4600e+00,
          1.7600e+03]]),
 tensor([[2.],
         [3.],
         [1.],
         [3.]])]

In [None]:
dataiter = iter(dataloader)

In [None]:
data = next(dataiter)
features, labels = data

In [None]:
features, labels 

(tensor([[2.4440e+01, 2.5800e+00, 3.8800e+00, 3.8000e+01, 1.8400e+02, 4.7200e+00,
          4.0800e+00, 7.8000e-01, 4.1600e+00, 5.4000e+00, 1.7200e+00, 6.0400e+00,
          6.2400e+02],
         [2.3920e+01, 2.1800e+00, 4.6000e+00, 4.2000e+01, 2.0200e+02, 6.7600e+00,
          4.2800e+00, 2.6000e-01, 3.3000e+00, 6.4200e+00, 1.9800e+00, 6.2600e+00,
          1.7720e+03],
         [2.4500e+01, 3.4600e+00, 4.2400e+00, 3.8000e+01, 1.6000e+02, 3.3000e+00,
          4.0600e+00, 7.4000e-01, 3.2600e+00, 6.8000e+00, 2.0000e+00, 6.3400e+00,
          1.0200e+03],
         [2.6820e+01, 7.6800e+00, 4.2400e+00, 3.7600e+01, 1.8000e+02, 4.9000e+00,
          5.3600e+00, 5.4000e-01, 2.9600e+00, 8.5600e+00, 1.8200e+00, 6.0000e+00,
          2.0700e+03]]),
 tensor([[2.],
         [2.],
         [2.],
         [1.]]))

In [None]:
num_epochs = 2
total_samples = len(dataset)
n_iterations = math.ceil(total_samples/4)

total_samples, n_iterations

(178, 45)

In [None]:
for epoch in range(num_epochs):
    for i, (inputs, labels) in enumerate(dataloader):
        if (i + 1) % 5 == 0:
            print(f'epoch {epoch + 1}/{num_epochs}, step {i+1}/{n_iterations}, inputs:{inputs[0][:5]} labels:{labels[0]}')

epoch 1/2, step 5/45, inputs:tensor([ 23.9200,   2.1800,   4.6000,  42.0000, 202.0000]) labels:tensor([2.])
epoch 1/2, step 10/45, inputs:tensor([ 24.5800,   3.2200,   4.4200,  40.8000, 206.0000]) labels:tensor([2.])
epoch 1/2, step 15/45, inputs:tensor([ 28.2000,   4.0400,   4.8000,  37.6000, 206.0000]) labels:tensor([1.])
epoch 1/2, step 20/45, inputs:tensor([ 27.3800,   6.5200,   5.0800,  40.0000, 214.0000]) labels:tensor([3.])
epoch 1/2, step 25/45, inputs:tensor([ 28.2000,   4.3200,   4.6000,  36.0000, 210.0000]) labels:tensor([1.])
epoch 1/2, step 30/45, inputs:tensor([ 23.3000,   3.3400,   5.2400,  52.0000, 176.0000]) labels:tensor([2.])
epoch 1/2, step 35/45, inputs:tensor([ 26.1000,  11.6000,   4.2600,  43.0000, 172.0000]) labels:tensor([2.])
epoch 1/2, step 40/45, inputs:tensor([ 24.6600,   2.2000,   4.5600,  32.0000, 202.0000]) labels:tensor([2.])
epoch 1/2, step 45/45, inputs:tensor([ 27.1200,   3.4200,   4.6200,  32.4000, 234.0000]) labels:tensor([1.])
epoch 2/2, step 5/45

## MNIST


In [None]:
# Import dependencies
import torch 
from PIL import Image
from torch import nn, save, load
from torch.optim import Adam
from torch.utils.data import DataLoader
import torchvision 

# Get data 
train = torchvision.datasets.MNIST(root="data", download=True, train=True, transform=torchvision.transforms.ToTensor())
dataset = DataLoader(train, 32)
#1,28,28 - classes 0-9