# Pytorch Dataset and Dataloader

In [8]:
import os
import numpy as np
import pandas as pd
import torch
from PIL import Image
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn

## Data Loader

In [2]:
class ImageDataset(Dataset):
    def __init__(self, img_dir, label_file):
        super(ImageDataset, self).__init__()
        self.img_dir = img_dir
        self.labels = torch.tensor(np.load(label_file, allow_pickle=True))
        self.transform = transforms.ToTensor()

    def __getitem__(self, index):
        img_path = os.path.join(self.img_dir, "img_{}.jpg".format(index))
        img = Image.open(img_path)
        img = self.transform(img).flatten()
        label = self.labels[index]
        return {"data": img, "label": label}
    
    def __len__(self):
        return len(self.labels)

## Base Classifier

In [9]:
class BaseClassifier(nn.Module):
    def __init__(self, in_dim, feature_dim, out_dim):
        super(BaseClassifier, self).__init__()
        self.layer1 = nn.Linear(in_dim, feature_dim, bias=True)
        self.layer2 = nn.Linear(feature_dim, out_dim, bias=True)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.layer1(x))
        out = self.layer2(x)
        return out

In [4]:
train_dataset = ImageDataset(img_dir='../data/train/', label_file='../data/train/labels.npy')

In [5]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

In [7]:
for minibatch in train_loader:
    data, labels  = minibatch['data'], minibatch['label']
    print(data)
    print(labels)
    print(data.shape)
    print(labels.shape)

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])
tensor([4, 1, 0, 2])
torch.Size([4, 784])
torch.Size([4])


In [10]:
in_dim, feature_dim, out_dim = 784, 256, 10
model = BaseClassifier(in_dim, feature_dim, out_dim)

In [12]:
for minibatch in train_loader:
    data, labels  = minibatch['data'], minibatch['label']
    out =  model(data)
    print(out)

tensor([[-0.0224, -0.0326, -0.0444, -0.0704,  0.1105, -0.0080, -0.0301,  0.0618,
          0.1142,  0.0358],
        [-0.0235, -0.0535, -0.1066, -0.0811,  0.1001,  0.0094,  0.0218,  0.0472,
          0.0441, -0.0208],
        [-0.0206,  0.0057, -0.0249, -0.1202,  0.1036,  0.0367, -0.0051,  0.0922,
          0.0828,  0.0420],
        [-0.0220, -0.1081, -0.0067,  0.0154,  0.0651,  0.0040, -0.0612,  0.0133,
          0.0449, -0.0470]], grad_fn=<AddmmBackward0>)
