## Import Library

In [44]:
from sklearn.datasets import make_classification
import torch
import cv2
import numpy as np

## Custom class for Tabular Data

In [33]:
class CustomDataset:
    def __init__(self, data, targets):
        self.data = data
        self.targets = targets
        
    def __len__(self):
        return self.data.shape[0]
    
    def __getitem__(self, idx):
        current_sample = self.data[idx, :]
        current_target = self.targets[idx]
        return {
            "x": torch.tensor(current_sample, dtype=torch.float),
            "y": torch.tensor(current_target, dtype=torch.long)
        }

In [21]:
data, targets = make_classification(n_samples=1000)

In [22]:
data.shape

(1000, 20)

In [23]:
targets.shape

(1000,)

In [34]:
custom_dataset = CustomDataset(data=data, targets=targets)

In [39]:
custom_dataset[333]

{'x': tensor([ 1.4491,  0.9214,  0.5990, -0.0620,  0.3955, -0.4622,  1.3267,  0.2243,
         -0.4066,  0.5722,  1.9725, -1.2574, -1.0772, -0.3190,  0.6697, -0.5596,
         -0.1450, -0.4574, -2.0619, -0.1304]),
 'y': tensor(1)}

In [41]:
for idx in range(len(custom_dataset)):
    print(custom_dataset[idx])
    if idx == 2:
        break

{'x': tensor([ 0.9401, -0.5646,  0.9065, -0.2736,  0.7951, -0.7689, -1.4824,  1.0896,
         0.7769, -2.1315, -0.5732, -1.5208, -0.8527,  0.4720,  1.0370, -0.7303,
        -0.2171,  0.4234,  1.4822,  0.3679]), 'y': tensor(1)}
{'x': tensor([-0.5750, -1.7812,  0.6492,  1.8493,  1.4874, -0.2236,  0.2000,  1.0794,
         0.7218, -2.0462,  1.1931, -1.6442,  0.2957, -0.2869, -0.4060, -1.2922,
         0.1581, -0.5465,  0.7566, -0.6664]), 'y': tensor(1)}
{'x': tensor([-1.3684e+00, -2.5720e-02, -1.1606e+00, -1.9270e+00, -9.2557e-01,
         5.8387e-03,  1.7100e+00, -4.7221e-01,  1.4867e+00,  4.1096e-01,
        -8.6088e-01,  4.7000e-01, -9.0069e-01, -7.7805e-01,  5.0999e-01,
         6.6844e-01, -1.6029e-01, -3.5499e-01,  2.0744e-01,  1.2287e-03]), 'y': tensor(0)}


## Custom class for NLP problem

In [42]:
## Classification/ Regression problems
class CustomDatasetNLP:
    def __init__(self, data, targets, tokenizer):
        self.data = data
        self.targets = targets
        self.tokenizer = tokenizer
        pass
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        text = self.data[idx]
        target = self.targets[idx]
        
        input_ids = tokenizer(text)
        # [101, 42, 47, 27 216 .......]
        # padding
        
        return {
            "input_ids": torch.tensor(input_ids, dtype=torch.long),
            "target": torch.tensor(target)
        }

## Custom class for Image/Vision problem

In [None]:
class CustomDataset:
    def __init__(self, image_paths, targets, augmentations=None):
        self.image_paths = image_paths
        self.targets = targets
        self.augmentations = augmentations
        
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self):
        target = self.targets[idx]
        image = cv2.imread(self.image_paths[idx])
        ## CV2 read in BGR convert to RGB
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.augmentations is not None:
            augmented = self.augmentations(image=image)
            image = augmented["image"]
        image = np.transpose(image, (2,0,1)).astype(np.float32) ## Channel First
        return {
            "image": torch.tensor(image),
            "target": torch.tensor(target)
        }