In [1]:
import numpy as np

X_train = np.random.rand(100,5) # 100 samples with 5 features
y_train = np.random.randint(0,2,100) # two class labels for the 100 samples

In [2]:
X_train

array([[0.30183935, 0.80807526, 0.88316382, 0.93931782, 0.1463686 ],
       [0.17541039, 0.0946753 , 0.6274987 , 0.47860904, 0.0620986 ],
       [0.31275056, 0.22513563, 0.57125266, 0.57849909, 0.29291619],
       [0.77445739, 0.84288523, 0.09646446, 0.44010272, 0.84772766],
       [0.10515978, 0.50947701, 0.20564832, 0.82424419, 0.48443632],
       [0.36530847, 0.17338842, 0.09826097, 0.92845408, 0.36069454],
       [0.31209615, 0.27207806, 0.89939215, 0.77508497, 0.81717124],
       [0.48409846, 0.18349185, 0.52923661, 0.07540733, 0.54290478],
       [0.28558258, 0.44849661, 0.87866902, 0.52049552, 0.50221563],
       [0.3140654 , 0.59056491, 0.51611617, 0.19228725, 0.81562867],
       [0.72811988, 0.01135462, 0.24820644, 0.79215498, 0.63444702],
       [0.86928288, 0.97664077, 0.22433093, 0.65013946, 0.13620283],
       [0.95403729, 0.41670243, 0.86559579, 0.1261642 , 0.67010997],
       [0.30143898, 0.66488073, 0.58383146, 0.5442976 , 0.45443616],
       [0.89006382, 0.20652172, 0.

### Pytorch works with Datasets and Dataloaders. 

 Dataset stores the samples and their corresponding labels
 
 DataLoader wraps an iterable around the Dataset to enable easy access to the samples.


- https://pytorch.org/tutorials/beginner/basics/data_tutorial.html

1. We need to implement a basic Dataset class with __len__ and __getitem__ function: 

In [3]:
import torch

class Dataset(torch.utils.data.Dataset):
  'Characterizes a dataset for PyTorch'
  def __init__(self, X, y):
        'Initialization'
        self.labels = torch.tensor(y)
        self.features = torch.tensor(X)

  def __len__(self):
        'Denotes the total number of samples'
        return len(self.labels)

  def __getitem__(self, index):
        'Generates one sample of data'
        # Select sample

        return self.features[index], self.labels[index]

In [4]:
train_dataset = Dataset(X_train, y_train)

In [5]:
train_dataset

<__main__.Dataset at 0x7fb6ebb9d430>

2. Using on the Dataset, we can initialize the dataloader for iteration:

In [6]:
from torch.utils.data import DataLoader
train_loader = DataLoader(train_dataset, batch_size=4)

3. Let's check the dataloader:

In [10]:
for batch_idx, (data, target) in enumerate(train_loader):
    print("X: ", data)
    print("y: ", target)
    break

X:  tensor([[0.6682, 0.7451, 0.0282, 0.9316, 0.8173],
        [0.2749, 0.4914, 0.2397, 0.0840, 0.0573],
        [0.8549, 0.6485, 0.2901, 0.4425, 0.5397],
        [0.1818, 0.6827, 0.2665, 0.9998, 0.0308]], dtype=torch.float64)
y:  tensor([1, 1, 1, 0])
