In [2]:
import torch

In [11]:
from torch.utils.data import Dataset

class CustomDataSet(Dataset):
    def __init__(self, X, y) -> None:
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return (self.X[idx], self.y[idx])



In [4]:
from sklearn.datasets import make_classification

In [12]:
features, target = make_classification(n_samples=1000, n_features=5)

In [24]:
custom_dataset = CustomDataSet(X=features, y=target)
print(len(custom_dataset))
print(f"\n{custom_dataset[0]} and \n{features[0]} are same")

1000

(array([ 1.64845184,  0.02539267,  0.78272728, -0.29046241,  0.58206715]), 1) and 
[ 1.64845184  0.02539267  0.78272728 -0.29046241  0.58206715] are same


#### Doing the same for multilabel classification

In [26]:
from sklearn.datasets import make_multilabel_classification
features, target = make_multilabel_classification(n_samples=1000, n_features=5, n_classes=3)

In [27]:
custom_dataset_mlb = CustomDataSet(X=features, y=target)
print(custom_dataset_mlb[0])


(array([9., 7., 5., 6., 8.]), array([1, 0, 0]))


#### Using DataLoader to load data

In [28]:
from torch.utils.data import DataLoader

In [32]:
# instead of directly accessing and using the dataset values, dataloader is a faster and efficient way to do the same
data_loader = DataLoader(dataset=custom_dataset, batch_size=8, shuffle=True)

In [33]:
# using iter to access values later
data_iter = iter(data_loader)

In [34]:
data_02 = data_iter.next()

In [35]:
data_02

[tensor([[-0.8684, -0.3093, -0.2281,  0.3775, -0.9590],
         [-1.0419, -1.0336, -0.9164, -0.3302,  1.1253],
         [-0.7776, -0.4844, -0.1811,  0.3663, -0.9409],
         [ 1.5432,  1.4438,  0.7331, -0.2714,  0.5435],
         [-0.3668, -0.2897, -0.0081,  0.2670, -0.7177],
         [ 0.9234, -0.7116,  0.7946,  0.2713, -0.9351],
         [ 0.7811,  0.3991,  0.1089, -0.4568,  1.2034],
         [-1.3813,  0.2018, -1.4095, -0.6748,  2.1808]], dtype=torch.float64),
 tensor([0, 0, 0, 1, 0, 1, 1, 0])]

In [37]:
features_02, target_02 = data_02

In [38]:
print(features_02)

tensor([[-0.8684, -0.3093, -0.2281,  0.3775, -0.9590],
        [-1.0419, -1.0336, -0.9164, -0.3302,  1.1253],
        [-0.7776, -0.4844, -0.1811,  0.3663, -0.9409],
        [ 1.5432,  1.4438,  0.7331, -0.2714,  0.5435],
        [-0.3668, -0.2897, -0.0081,  0.2670, -0.7177],
        [ 0.9234, -0.7116,  0.7946,  0.2713, -0.9351],
        [ 0.7811,  0.3991,  0.1089, -0.4568,  1.2034],
        [-1.3813,  0.2018, -1.4095, -0.6748,  2.1808]], dtype=torch.float64)


In [39]:
print(target_02)

tensor([0, 0, 0, 1, 0, 1, 1, 0])
