In [1]:
import torch
from torch.utils.data import Dataset, random_split

# 1. Create sample input data (X) as a PyTorch tensor with 100 samples and 5 features (random values).
X = torch.randn(100, 5)

# 2. Create sample output labels (y) as a PyTorch tensor with 100 labels (random values).
y = torch.randn(100)

# 3. Define a custom Dataset class called 'MySampleDataset' that inherits from torch.utils.data.Dataset.
class MySampleDataset(Dataset):
    def __init__(self, features, labels):
        # Initialize the dataset with features and labels.
        super().__init__()
        self.features = features
        self.labels = labels
        assert len(self.features) == len(self.labels), "Number of samples must match!"

    def __len__(self):
        # Return the total number of samples in the dataset.
        return len(self.features)

    def __getitem__(self, index):
        # Retrieve the feature and label at the given index.
        feature = self.features[index]
        label = self.labels[index]
        return feature, label

# 4. Create an instance of your 'MySampleDataset' using the sample data X and y.
dataset = MySampleDataset(X, y)

# 5. Print the size of the dataset using the __len__() method.
print("Dataset size:", len(dataset))

# 6. Access and print the first sample (input and label) from the dataset using indexing.
first_sample_input, first_sample_label = dataset[0]
print("First sample input shape:", first_sample_input.shape)
print("First sample label shape:", first_sample_label.shape)
print("First sample:", (first_sample_input, first_sample_label))

# 7. Split the dataset into training and testing sets with an 80/20 ratio.
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

# 8. Print the sizes of the training and testing datasets.
print("Training dataset size:", len(train_dataset))
print("Testing dataset size:", len(test_dataset))

Dataset size: 100
First sample input shape: torch.Size([5])
First sample label shape: torch.Size([])
First sample: (tensor([-1.0600, -0.4253, -0.3714,  0.3015,  0.5628]), tensor(2.4584))
Training dataset size: 80
Testing dataset size: 20
