# Transforms

Once we have a ```Dataset``` and ```Dataloader``` ready to go, we can also start throwing in PyTorch **transforms** which manipulate and transform the input data.

In [2]:
import torch
import torchvision
from torch.utils.data import Dataset
import numpy as np

In [3]:
# recreate wine data set
class WineDataset(Dataset):
    
    def __init__(self):
        data = np.loadtxt('./data/winequality.csv', delimiter = ',', dtype=np.float32, skiprows=1)
        self.n_samples = data.shape[0]
        self.x = data[:, 1:]
        self.y = data[:,[0]]
        
    def __getitem__(self, index):
        return self.x[index], self.y[index]
    
    def __len__(self):
        return self.n_samples

<br>

In the ```Dataset``` class above, we are simply reading in the data and storing it in a numpy matrix. We can ask add some preprocessing and manipulation steps via transforms.

<br>

In [5]:
# re-define the WineDataset class, this time with transforms
class WineDataset(Dataset):
    # during init, we have to pass a transform
    def __init__(self, transform=None):
        data = np.loadtxt('./data/winequality.csv', delimiter = ',', dtype=np.float32, skiprows=1)
        self.n_samples = data.shape[0]
        self.x = data[:, 1:]
        self.y = data[:,[0]]
        # store a transform
        self.transform = transform
        
    def __getitem__(self, index):
        sample = self.x[index], self.y[index]
        # apply transform, if one is stored
        if self.transform:
            sample = self.transform(sample)
            
        return sample
    
    def __len__(self):
        return self.n_samples

In [6]:
# we can also define our own custom tranforms
# To create a transform, we just need to define a __call__() method
class ToTensor:
    def __call__(self, sample, device='cuda'):
        inputs, targets = sample
        return torch.as_tensor(inputs, device=device, dtype=torch.float32), torch.as_tensor(targets, device=device, dtype=torch.float32)
    

In [7]:
# instantiate wine dataset, with the ToTensor() tranform
dataset = WineDataset(transform = ToTensor())

In [12]:
# get the first item in the dataset.
## This calls the __getitem__() method, which now will pass the sample through the ToTensor() transform
first_data = dataset[0]

first_data

(tensor([ 0.7000,  0.0000,  1.9000,  0.0760, 11.0000, 34.0000,  0.9978,  3.5100,
          0.5600,  9.4000,  5.0000,  1.0000], device='cuda:0'),
 tensor([7.4000], device='cuda:0'))

In [13]:
# We can also compose multiple transforms. This makes things nice if we want to setup a neat little pipeline

# define a second transform
class MulTransform:
    def __init__(self, factor):
        self.factor = factor
        
    def __call__(self, sample):
        inputs, target = sample
        inputs *=self.factor
        return inputs, target


In [16]:
# compose the MulTransform with ToTensor transform
# To do this, we use the torchvision.transform.Compose() function
# and pass a list of the transforms we want to compose
composed = torchvision.transforms.Compose([ ToTensor(), MulTransform(2) ])

# new data set with composed transform
dataset = WineDataset(transform=composed)

first_data = dataset[0]

first_data

(tensor([ 1.4000,  0.0000,  3.8000,  0.1520, 22.0000, 68.0000,  1.9956,  7.0200,
          1.1200, 18.8000, 10.0000,  2.0000], device='cuda:0'),
 tensor([7.4000], device='cuda:0'))