In [2]:
# [VI] More about 'transforms' here: https://pytorch.org/tutorials/beginner/basics/transforms_tutorial.html

import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
import numpy as np
import math

# Data does not always come in its final processed form that is required for training machine learning algorithms. 
# We use transforms to perform some manipulation of the data and make it suitable for training.

# All TorchVision datasets have two parameters -transform to modify the features and target_transform to modify the labels.
# The torchvision.transforms module offers several commonly-used transforms out of the box.

# The FashionMNIST features are in PIL Image format, and the labels are integers.
# dataset = torchvision.datasets.MNIST(
#     # Convert numpy arrays (or PIL image) to tensors - https://pytorch.org/vision/main/generated/torchvision.transforms.ToTensor.html
#     root='data', download=True, transform=torchvision.transforms.ToTensor() 
# )

In [3]:
class WineDataset(Dataset):
    # We have to implement the functions below
    def __init__(self, transform=None):
        # Data loading
        xy = np.loadtxt('./wine.csv', delimiter=",", dtype=np.float32, skiprows=1)  # 'skiprows' will skip the first header row        
        self.n_samples = xy.shape[0]

        # Note that we do not convert to tensor here anymore
        self.x = xy[:, 1:]
        self.y = xy[:, [0]]

        self.transform = transform


    def __getitem__(self, index):
        sample = self.x[index], self.y[index] # pass a tuple

        if self.transform: # if it is not None
            sample = self.transform(sample)

        return sample


    def __len__(self):
        return self.n_samples

In [5]:
# Creating a custom tranform - https://pytorch.org/vision/stable/transforms.html
class ToTensor:
    # That's the only method we need to implement
    def __call__(self, sample):
        inputs, targets = sample
        return torch.from_numpy(inputs), torch.from_numpy(targets)


In [10]:
# we pass the ToTensor as a function (it uses the __call__ method)
dataset = WineDataset(transform=ToTensor()) # PS: if you change to 'transform=None', then it will print the types as "<class 'numpy.ndarray'> <class 'numpy.ndarray'>"
first_data = dataset[0]
features, labels = first_data
print(type(features), type(labels))
print(features)

<class 'torch.Tensor'> <class 'torch.Tensor'>
tensor([1.4230e+01, 1.7100e+00, 2.4300e+00, 1.5600e+01, 1.2700e+02, 2.8000e+00,
        3.0600e+00, 2.8000e-01, 2.2900e+00, 5.6400e+00, 1.0400e+00, 3.9200e+00,
        1.0650e+03])


In [11]:
# Another custom transform
class MulTransform:
    def __init__(self, factor):
        self.factor = factor

    def __call__(self, sample):
        inputs, target = sample
        inputs *= self.factor
        return inputs, target


# Compose Multiple transforms together - https://pytorch.org/vision/main/generated/torchvision.transforms.Compose.html
composed = torchvision.transforms.Compose([ToTensor(), MulTransform(2)])
dataset = WineDataset(transform=composed)
first_data = dataset[0]
features, labels = first_data
print(type(features), type(labels))
print(features)

<class 'torch.Tensor'> <class 'torch.Tensor'>
tensor([2.8460e+01, 3.4200e+00, 4.8600e+00, 3.1200e+01, 2.5400e+02, 5.6000e+00,
        6.1200e+00, 5.6000e-01, 4.5800e+00, 1.1280e+01, 2.0800e+00, 7.8400e+00,
        2.1300e+03])
