<a href="https://colab.research.google.com/github/delhian/try_pytorch/blob/main/Datasets_and_Dataloader.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [51]:
import torch
from torch.utils.data import Dataset, DataLoader
import numpy
import math
from torchvision.transforms import Compose
from sklearn.datasets import make_classification

In [65]:
class ClassificationData (Dataset):
  def __init__(self,
               n_samples,
               n_features,
               n_classes,
               n_informative,
               transform = None):
    # dataloading
    x, y = make_classification(n_samples = n_samples,
                               n_features = n_features,
                               n_classes = n_classes,
                               n_informative = n_informative)
    self.x = x
    self.y = y
    self.n_samples = x.shape[0]
    self.transform = transform
  
  def __getitem__(self, index):
    # dataset(index)
    x_i, y_i = self.x[index], self.y[index]
    if self.transform:
      x_i = self.transform(x_i)
    return x_i, y_i

  def __len__(self):
    # len(dataset)
    return self.n_samples

class ToTensor():
  def __call__(self, x_i):
    return torch.FloatTensor(x_i)

class Top_k ():
  def __init__(self, k):
    self.k = k
  def __call__(self, x_i):
    return torch.topk(x_i, self.k).values

In [68]:
transform = Compose([ToTensor(), Top_k(50)])
dataset = ClassificationData(100000, 300, 10, 5, transform = transform)

In [69]:
dataset[6]

(tensor([2.7471, 2.7393, 2.7141, 2.6792, 2.4807, 2.0415, 2.0274, 2.0141, 2.0040,
         1.9358, 1.9205, 1.8608, 1.8330, 1.7576, 1.6857, 1.6715, 1.6195, 1.6053,
         1.5757, 1.5750, 1.5559, 1.5302, 1.5133, 1.5108, 1.4322, 1.4261, 1.4106,
         1.3775, 1.3640, 1.3575, 1.3443, 1.3313, 1.3081, 1.2857, 1.2725, 1.2693,
         1.2507, 1.2035, 1.2016, 1.1839, 1.1585, 1.1512, 1.1416, 1.1176, 1.1140,
         1.0985, 1.0957, 1.0758, 1.0728, 1.0508]), 3)

In [70]:
batch_size = 70
epoches = 3
n_iterations = math.ceil(len(dataset)/ batch_size)
dataloader = DataLoader(dataset = dataset, batch_size= batch_size, shuffle = True, num_workers = 2)

In [71]:
for epoch in range(epoches):
  print(f'epoch: {epoch + 1} of {epoches}')
  for step, (inputs, labels) in enumerate(dataloader):
    if (step + 1) % 100 == 0 or step + 1 == n_iterations:
      print(f'step: {step + 1} of {n_iterations}, batch shape:{inputs.shape[0], inputs.shape[1]}' )

epoch: 1 of 3
step: 100 of 1429, batch shape:(70, 50)
step: 200 of 1429, batch shape:(70, 50)
step: 300 of 1429, batch shape:(70, 50)
step: 400 of 1429, batch shape:(70, 50)
step: 500 of 1429, batch shape:(70, 50)
step: 600 of 1429, batch shape:(70, 50)
step: 700 of 1429, batch shape:(70, 50)
step: 800 of 1429, batch shape:(70, 50)
step: 900 of 1429, batch shape:(70, 50)
step: 1000 of 1429, batch shape:(70, 50)
step: 1100 of 1429, batch shape:(70, 50)
step: 1200 of 1429, batch shape:(70, 50)
step: 1300 of 1429, batch shape:(70, 50)
step: 1400 of 1429, batch shape:(70, 50)
step: 1429 of 1429, batch shape:(40, 50)
epoch: 2 of 3
step: 100 of 1429, batch shape:(70, 50)
step: 200 of 1429, batch shape:(70, 50)
step: 300 of 1429, batch shape:(70, 50)
step: 400 of 1429, batch shape:(70, 50)
step: 500 of 1429, batch shape:(70, 50)
step: 600 of 1429, batch shape:(70, 50)
step: 700 of 1429, batch shape:(70, 50)
step: 800 of 1429, batch shape:(70, 50)
step: 900 of 1429, batch shape:(70, 50)
step: 