In [4]:
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
import numpy as np
import math

# Custom Dataset class for Wine dataset
class WineDataset(Dataset):
    def __init__(self, data_path='data/wine.csv'):
        """
        Initialize the dataset by loading wine data from a CSV file.
        
        Args:
            data_path (str): Path to the wine CSV file
        """
        # Load data from CSV, skipping header row
        xy = np.loadtxt(data_path, delimiter=',', dtype=np.float32, skiprows=1)
        self.n_samples = xy.shape[0]
        
        # Split into features (all columns except first) and labels (first column)
        self.x_data = torch.from_numpy(xy[:, 1:])  # Shape: [n_samples, n_features]
        self.y_data = torch.from_numpy(xy[:, [0]]) # Shape: [n_samples, 1]

    def __getitem__(self, index):
        """
        Enable indexing to retrieve a specific sample.
        
        Args:
            index (int): Index of the sample to retrieve
            
        Returns:
            tuple: (features, label) for the specified index
        """
        return self.x_data[index], self.y_data[index]

    def __len__(self):
        """
        Return the total number of samples in the dataset.
        
        Returns:
            int: Number of samples
        """
        return self.n_samples

# Create dataset instance
dataset = WineDataset()

# Access and print first sample
features, labels = dataset[0]
print(f"First sample - Features: {features}, Label: {labels}")



First sample - Features: tensor([1.4230e+01, 1.7100e+00, 2.4300e+00, 1.5600e+01, 1.2700e+02, 2.8000e+00,
        3.0600e+00, 2.8000e-01, 2.2900e+00, 5.6400e+00, 1.0400e+00, 3.9200e+00,
        1.0650e+03]), Label: tensor([1.])


In [5]:
"""
Create a DataLoader for the wine dataset.

Args:
    dataset (Dataset): The dataset to load
    batch_size (int): Number of samples per batch
    shuffle (bool): Whether to shuffle the data
    num_workers (int): Number of subprocesses for data loading
    
Returns:
    DataLoader: Configured DataLoader instance
"""
train_loader = DataLoader(dataset, batch_size=4, shuffle=True, num_workers=0)

# Examine one batch
dataiter = iter(train_loader)
features, labels = next(dataiter)
print(f"Sample batch - Features: {features.shape}, Labels: {labels.shape}")

Sample batch - Features: torch.Size([4, 13]), Labels: torch.Size([4, 1])


In [6]:
# Training loop parameters
num_epochs = 2
total_samples = len(dataset)
n_iterations = math.ceil(total_samples / 4)
print(f"Total samples: {total_samples}, Iterations per epoch: {n_iterations}")

# Dummy training loop
for epoch in range(num_epochs):
    for i, (inputs, labels) in enumerate(train_loader):
        # Training step
        if (i + 1) % 5 == 0:
            print(f'Epoch: {epoch+1}/{num_epochs}, Step {i+1}/{n_iterations} | '
                    f'Inputs {inputs.shape} | Labels {labels.shape}')

Total samples: 178, Iterations per epoch: 45
Epoch: 1/2, Step 5/45 | Inputs torch.Size([4, 13]) | Labels torch.Size([4, 1])
Epoch: 1/2, Step 10/45 | Inputs torch.Size([4, 13]) | Labels torch.Size([4, 1])
Epoch: 1/2, Step 15/45 | Inputs torch.Size([4, 13]) | Labels torch.Size([4, 1])
Epoch: 1/2, Step 20/45 | Inputs torch.Size([4, 13]) | Labels torch.Size([4, 1])
Epoch: 1/2, Step 25/45 | Inputs torch.Size([4, 13]) | Labels torch.Size([4, 1])
Epoch: 1/2, Step 30/45 | Inputs torch.Size([4, 13]) | Labels torch.Size([4, 1])
Epoch: 1/2, Step 35/45 | Inputs torch.Size([4, 13]) | Labels torch.Size([4, 1])
Epoch: 1/2, Step 40/45 | Inputs torch.Size([4, 13]) | Labels torch.Size([4, 1])
Epoch: 1/2, Step 45/45 | Inputs torch.Size([2, 13]) | Labels torch.Size([2, 1])
Epoch: 2/2, Step 5/45 | Inputs torch.Size([4, 13]) | Labels torch.Size([4, 1])
Epoch: 2/2, Step 10/45 | Inputs torch.Size([4, 13]) | Labels torch.Size([4, 1])
Epoch: 2/2, Step 15/45 | Inputs torch.Size([4, 13]) | Labels torch.Size([4, 1

In [7]:
# Example with MNIST dataset
train_dataset = torchvision.datasets.MNIST(root='./data',
                                            train=True,
                                            transform=torchvision.transforms.ToTensor(),
                                            download=True)

mnist_loader = DataLoader(dataset=train_dataset,
                            batch_size=3,
                            shuffle=True)

# Examine MNIST batch
dataiter = iter(mnist_loader)
inputs, targets = next(dataiter)
print(f"MNIST batch - Inputs: {inputs.shape}, Targets: {targets.shape}")

MNIST batch - Inputs: torch.Size([3, 1, 28, 28]), Targets: torch.Size([3])
