*Code Examples*

Datasets

In [None]:
from torch.utils.data import Dataset  # Import the base Dataset class from PyTorch's data utilities

# Create a toy dataset
class NumberProductDataset(Dataset):  # Define a custom dataset class that inherits from PyTorch's Dataset
    def __init__(self, data_range=(1, 10)):  # Initialize the dataset with a range of numbers (default 1 to 9)
        self.numbers = list(range(data_range[0], data_range[1]))  # Create a list of numbers from start to end-1

    def __getitem__(self, index):  # Define how to retrieve a single data sample by index
        number1 = self.numbers[index]  # Get the first number at the given index
        number2 = self.numbers[index] + 1  # Get the next consecutive number
        return (number1, number2), number1 * number2  # Return the input pair and their product as the target

    def __len__(self):  # Define the length of the dataset
        return len(self.numbers)  # Return the number of samples in the dataset

# Instantiate the dataset
dataset = NumberProductDataset(  # Create an instance of the custom dataset
    data_range=(0, 11)  # Specify the range from 0 to 10 (inclusive start, exclusive end)
)

# Access a data sample
data_sample = dataset[3]  # Retrieve the sample at index 3
print(data_sample)  # Print the retrieved sample
# ((3, 4), 12)

Data Loaders

In [None]:
from torch.utils.data import DataLoader  # Import DataLoader for batching and shuffling data

# Instantiate the dataset
dataset = NumberProductDataset(data_range=(0, 5))  # Create dataset with numbers 0 to 4 (5 samples)

# Create a DataLoader instance
dataloader = DataLoader(dataset, batch_size=3, shuffle=True)  # Wrap dataset in DataLoader with batch size 3 and shuffling enabled

# Iterating over batches
for (num_pairs, products) in dataloader:  # Loop over batches; each batch has inputs (num_pairs) and targets (products)
    print(num_pairs, products)  # Print the batch of inputs and targets
# [tensor([4, 3, 1]), tensor([5, 4, 2])] tensor([20, 12, 2])  # Example output: first batch (shuffled)
# [tensor([2, 0]), tensor([3, 1])] tensor([6, 0])  # Second batch (remaining samples)