In [None]:
# PyTorch provides two data primitives: torch.utils.data.DataLoader and torch.utils.data.Dataset 
# that allow you to use pre-loaded datasets as well as your own data. Dataset stores the samples 
# and their corresponding labels, and DataLoader wraps an iterable around the Dataset to enable 
# easy access to the samples.

# Preloaded datasets
# https://pytorch.org/vision/stable/datasets.html
# https://pytorch.org/text/stable/datasets.html
# https://pytorch.org/audio/stable/datasets.html

# https://matplotlib.org/stable/

In [None]:
# Build a dataset & dataloader by reading local csv file and browse through the data

import torch
import numpy as np
from torch.utils.data import Dataset, DataLoader

# custom dataset. Needs 3 method definitions
class WineDataset(Dataset):
    def __init__(self):
        xy = np.loadtxt("data/wine/wine.csv", delimiter=",", skiprows=1) # skip header
        self.x = torch.from_numpy(xy[:, 1:]) # Load training data features are columns 2 onwards
        self.y = torch.from_numpy(xy[:, [1]]) # Column 1 is the output
        self.n_samples = xy.shape[0]

    def __getitem__(self, index):
        return self.x[index], self.y[index]

    def __len__(self):
        return self.n_samples
    
ds = WineDataset()
dl = DataLoader(ds, batch_size=10, shuffle=True)

# In real implementation this would be training loop .....      
num_epochs = 2
for epoch in range(num_epochs):
    for i, data in enumerate(dl):
        inputs, labels = data # unpack the data  
        if i%5==0:
            print(f"epoch {epoch}: Step {i}, batch size = {inputs.shape}")
 

In [None]:
# Demonstrate transforms on dataset, modify the WineDataset for transforms

from typing import Any
import torch
import numpy as np
from torch.utils.data import Dataset
from torchvision import transforms

# custom dataset. Needs 3 method definitions
class WineDataset(Dataset):
    def __init__(self, transform=None):
        xy = np.loadtxt("data/wine/wine.csv", delimiter=",", skiprows=1) # skip header
        # Load training data features are columns 2 onwards, no need to convert to tensor
        self.x = xy[:, 1:] 
        # Column 1 is the output, no need to convert to tensor
        self.y = xy[:, [1]] 
        self.n_samples = xy.shape[0]
        self.transform = transform

    def __getitem__(self, index):
        sample = self.x[index], self.y[index]
        if self.transform != None:
            sample = self.transform(sample)
        
        return sample

    def __len__(self):
        return self.n_samples

# custom transform
class ToTensor:
    def __call__(self, sample):
        features, labels = sample
        features = torch.from_numpy(features)
        labels = torch.from_numpy(labels)

        return features, labels


# another custom transform
class AddTransform:
    def __init__(self, addendum):
        self.addendum = addendum

    def __call__(self, sample):
        features, labels = sample
        features += self.addendum

        return features, labels

# compose the transforms
composed = transforms.Compose([ToTensor(), AddTransform(2)])

# Without any transform    
ds = WineDataset()
features, labels = ds[0]
print(f"\nNo transforms here")
print(f"Features: {features}")
print(f"features type: {type(features)}, labels type: {type(labels)}")

# With ToTensor transform    
ds = WineDataset(transform=ToTensor())
features, labels = ds[0]
print(f"\nToTensor transform applied")
print(f"Features: {features}")
print(f"features type: {type(features)}, labels type: {type(labels)}")

# With AddTransform transform    
ds = WineDataset(transform=AddTransform(1))
features, labels = ds[0]
print(f"\nAddTransform transform applied")
print(f"Features: {features}")
print(f"features type: {type(features)}, labels type: {type(labels)}")

# With composite transform    
ds = WineDataset(transform=composed)
features, labels = ds[0]
print(f"\nToTensor & AddTransform both applied")
print(f"Features: {features}")
print(f"features type: {type(features)}, labels type: {type(labels)}")


In [None]:
# Loading default datasets provided by FashionMNIST

import torch
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt


training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor()
)

test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)

labels_map = {
    0: "T-Shirt",
    1: "Trouser",
    2: "Pullover",
    3: "Dress",
    4: "Coat",
    5: "Sandal",
    6: "Shirt",
    7: "Sneaker",
    8: "Bag",
    9: "Ankle Boot",
}
figure = plt.figure(figsize=(8, 8))
cols, rows = 3, 3
for i in range(1, cols * rows + 1):
    # find a random index
    sample_idx = torch.randint(len(training_data), size=(1,)).item()
    # get the image
    img, label = training_data[sample_idx]
    # add into the plot
    figure.add_subplot(rows, cols, i)
    plt.title(labels_map[label])
    plt.axis("off")
    plt.imshow(img.squeeze(), cmap="gray")
# show the plot
plt.show()