# Datasets

In [1]:
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
import numpy as np
import math

In [4]:
class WineDataset(Dataset):
    def __init__(self):
        # data loading
        xy = np.loadtxt('./data/wine.csv',
                        delimiter=",",
                        dtype=np.float32,
                        skiprows=1)
        self.x = torch.from_numpy(xy[:, 1:])
        self.y = torch.from_numpy(xy[:, [0]])
        self.n_samples = xy.shape[0]
    
    def __getitem__(self, index):
        # dataset indexing
        return self.x[index], self.y[index]
        
    def __len__(self):
        # length of dataset
        return self.n_samples

In [7]:
# Testing the dataset
dataset = WineDataset()
first = dataset[0]
features, labels = first
print(f'features: {features}\n\n labels: {labels}')

features: tensor([1.4230e+01, 1.7100e+00, 2.4300e+00, 1.5600e+01, 1.2700e+02, 2.8000e+00,
        3.0600e+00, 2.8000e-01, 2.2900e+00, 5.6400e+00, 1.0400e+00, 3.9200e+00,
        1.0650e+03])

 labels: tensor([1.])


In [8]:
# Build dataloader
dataloader = DataLoader(dataset=dataset, batch_size=4,
                        shuffle=True, num_workers=2)

In [9]:
# Build iterator and test data loader
data_iter = iter(dataloader)
data = data_iter.next()
features, labels = data
print(f'features: {features}\n\n labels: {labels}')

features: tensor([[1.4230e+01, 1.7100e+00, 2.4300e+00, 1.5600e+01, 1.2700e+02, 2.8000e+00,
         3.0600e+00, 2.8000e-01, 2.2900e+00, 5.6400e+00, 1.0400e+00, 3.9200e+00,
         1.0650e+03],
        [1.2700e+01, 3.8700e+00, 2.4000e+00, 2.3000e+01, 1.0100e+02, 2.8300e+00,
         2.5500e+00, 4.3000e-01, 1.9500e+00, 2.5700e+00, 1.1900e+00, 3.1300e+00,
         4.6300e+02],
        [1.3560e+01, 1.7300e+00, 2.4600e+00, 2.0500e+01, 1.1600e+02, 2.9600e+00,
         2.7800e+00, 2.0000e-01, 2.4500e+00, 6.2500e+00, 9.8000e-01, 3.0300e+00,
         1.1200e+03],
        [1.3050e+01, 3.8600e+00, 2.3200e+00, 2.2500e+01, 8.5000e+01, 1.6500e+00,
         1.5900e+00, 6.1000e-01, 1.6200e+00, 4.8000e+00, 8.4000e-01, 2.0100e+00,
         5.1500e+02]])

 labels: tensor([[1.],
        [2.],
        [1.],
        [2.]])


In [11]:
# Training loop
n_epochs = 2
total_samples = len(dataset)
n_iter = math.ceil(total_samples/4)
print(f'Total samples: {total_samples}\nNum. iterations: {n_iter}')

Total samples: 178
Num. iterations: 45


In [12]:
# Batch gradient descent
for epoch in range(n_epochs):
    for i, (inputs, labels) in enumerate(dataloader):
        # Forward pass
        # Backward pass
        # Optimizer update
        if (i+1)%5==0:
            print(f'epoch: {epoch+1}/{n_epochs}, step {i+1}/{n_iter}, inputs: {inputs.shape}')

epoch: 1/2, step 5/45, inputs: torch.Size([4, 13])
epoch: 1/2, step 10/45, inputs: torch.Size([4, 13])
epoch: 1/2, step 15/45, inputs: torch.Size([4, 13])
epoch: 1/2, step 20/45, inputs: torch.Size([4, 13])
epoch: 1/2, step 25/45, inputs: torch.Size([4, 13])
epoch: 1/2, step 30/45, inputs: torch.Size([4, 13])
epoch: 1/2, step 35/45, inputs: torch.Size([4, 13])
epoch: 1/2, step 40/45, inputs: torch.Size([4, 13])
epoch: 1/2, step 45/45, inputs: torch.Size([2, 13])
epoch: 2/2, step 5/45, inputs: torch.Size([4, 13])
epoch: 2/2, step 10/45, inputs: torch.Size([4, 13])
epoch: 2/2, step 15/45, inputs: torch.Size([4, 13])
epoch: 2/2, step 20/45, inputs: torch.Size([4, 13])
epoch: 2/2, step 25/45, inputs: torch.Size([4, 13])
epoch: 2/2, step 30/45, inputs: torch.Size([4, 13])
epoch: 2/2, step 35/45, inputs: torch.Size([4, 13])
epoch: 2/2, step 40/45, inputs: torch.Size([4, 13])
epoch: 2/2, step 45/45, inputs: torch.Size([2, 13])


# Dataset Transforms

In [13]:
import torch
import torchvision

In [14]:
# https://pytorch.org/vision/stable/transforms.html
# Crop, grayscale, padding
# Linear transform, normalize
# Pillow images
# Generic lambdas, custom
# Compose multiple transforms

In [15]:
class WineDataset(Dataset):
    
    # support transform argument
    def __init__(self, transform=None):
        xy = np.loadtxt("./data/wine.csv",
                        delimiter=",",
                        dtype=np.float32,
                        skiprows=1)
        self.n_samples = xy.shape[0]
        self.x = xy[:, 1:]
        self.y = xy[:, [0]]
        
        # transform attribute
        self.transform = transform
        
    def __getitem__(self, index):
        sample = self.x[index], self.y[index]
        
        # Apply transformation
        if self.transform:
            sample = self.transform(sample)
            
        return sample
    
    def __len__(self):
        return self.n_samples

In [37]:
# Transformation to convert from numpy to tensor
class ToTensor():
    def __call__(self, sample):
        inputs, targets = sample
        return torch.from_numpy(inputs), torch.from_numpy(targets)

In [38]:
dataset = WineDataset(transform=ToTensor())
first = dataset[0]
features, labels = first
print(f'features: {features[0:3]}\ntype: {type(features)}\n')
print(f'labels: {labels[:]}\ntype: {type(labels)}')

features: tensor([14.2300,  1.7100,  2.4300])
type: <class 'torch.Tensor'>

labels: tensor([1.])
type: <class 'torch.Tensor'>


In [39]:
# Transformation to multiply by a factor
class MulTransform:
    def __init__(self, factor):
        self.factor = factor
    
    def __call__(self, sample):
        inputs, targets = sample
        inputs *= self.factor
        return inputs, targets

In [40]:
# Compose transforms
composed = torchvision.transforms.Compose([ToTensor(), MulTransform(2)])
dataset = WineDataset(transform=composed)
first = dataset[0]
features, labels = first
print(f'features: {features[0:3]}\ntype: {type(features)}\n')
print(f'labels: {labels[:]}\ntype: {type(labels)}')

features: tensor([28.4600,  3.4200,  4.8600])
type: <class 'torch.Tensor'>

labels: tensor([1.])
type: <class 'torch.Tensor'>
