**⭐️ CUSTOM DATASET**

In [14]:
import torch
from torch.utils.data import Dataset, DataLoader

class MyDataset(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels
        
    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

x = torch.randn(5, 3, 2, 2)
y = torch.randint(0, 10, (5, ))

dataset = MyDataset(x, y)
loader = DataLoader(dataset, batch_size=2, shuffle=True)
print(len(loader))
for data, labels in loader:
    print(data, labels)
    break

3
tensor([[[[ 0.2184, -2.3250],
          [ 2.1730,  0.4554]],

         [[-0.6523,  1.8584],
          [ 0.3598,  0.0616]],

         [[ 0.1874, -0.5951],
          [ 0.6899, -1.7040]]],


        [[[-0.0488, -0.8907],
          [-1.4556, -1.1449]],

         [[ 0.8465, -1.3984],
          [ 0.5225,  0.7945]],

         [[-0.3174,  0.9322],
          [-1.1033, -1.3410]]]]) tensor([2, 0])


****⭐️ BUILTIN DATASET****

In [None]:
from torchvision import datasets, transforms

transform = transforms.ToTensor()
mnist_train = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
loader = DataLoader(mnist_train, batch_size=64, shuffle=True)

****⭐️ TENSOR DATASET****

In [22]:
from torch.utils.data import TensorDataset, DataLoader

data = torch.randn(100, 3, 32, 32)
labels = torch.randint(0, 10, (100,))
tensor_dataset = TensorDataset(data, labels)
loader = DataLoader(tensor_dataset, batch_size=16, shuffle=True)

**⭐️ LOADING SOMETHING LIKE CSV/JSON**

In [None]:
import csv

class CSVCustomDataset(Dataset):
    def __init__(self, csv_file):
        self.data = []
        with open(csv_file, 'r') as file:
            reader = csv.reader(file)
            next(reader)
            for row in reader:
                self.data.append(row)
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        row = self.data[idx]
        features = torch.tensor([float(val) for val in row[:-1]], dtype=torch.float32)
        label = torch.tensor(int(row[-1]), dtype=torch.long)
        return features, label

csv_dataset = CSVCustomDataset('data.csv')
csv_loader = DataLoader(csv_dataset, batch_size=32, shuffle=True)

**⭐️ ITERABLE DATASET**

In [18]:
from torch.utils.data import IterableDataset

class IterableCustomDataset(IterableDataset):
    def __init__(self, data):
        self.data = data

    def __iter__(self):
        for item in self.data:
            yield torch.tensor(item)

data = [[i] for i in range(10)]
iterable_dataset = IterableCustomDataset(data)
loader = DataLoader(iterable_dataset, batch_size=32)
for item in loader:
    print(item)
    break

tensor([[0],
        [1],
        [2],
        [3],
        [4],
        [5],
        [6],
        [7],
        [8],
        [9]])


**⭐️ PARALLELIZE WITH NUM_WORKERS**

In [23]:
loader = DataLoader(tensor_dataset, batch_size=16, shuffle=True, num_workers=4)