# Dataset and DataLoader

In [7]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torchvision
from torchvision import transforms, datasets


In [2]:
# check device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cpu


## Dataset

In [3]:
# dataset
class MyDataset(Dataset):
    def __init__(self, csv_file):
        self.data = pd.read_csv(csv_file)
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        data = self.data.iloc[index, 1:].values.astype(np.uint8).reshape((28, 28, 1))
        label = self.data.iloc[index, 0]
        return data, label

In [None]:
training_data = datasets.FashionMNIST(
    root='Machine_Learning/pytorch/2. datasets_and_dataloader/data',
    train=True,
    download=True,
    transform=transforms.Compose([
        transforms.ToTensor()
    ])
)

testing_data = datasets.FashionMNIST(
    root='Machine_Learning/pytorch/2. datasets_and_dataloader/data',
    train=False,
    download=True,
    transform=transforms.Compose([
        transforms.ToTensor()
    ])
)

## DataLoader

In [19]:
train_dataloader = DataLoader(training_data, batch_size=64, shuffle=True)
test_dataloader = DataLoader(testing_data, batch_size=64, shuffle=True)

In [17]:
print(training_data.data.shape)
print(training_data.targets.shape)
print(training_data.data.dtype)
print(training_data.targets.dtype)
print(testing_data.data.shape)
print(testing_data.targets.shape)
print(testing_data.data.dtype)
print(testing_data.targets.dtype)

torch.Size([60000, 28, 28])
torch.Size([60000])
torch.uint8
torch.int64
torch.Size([10000, 28, 28])
torch.Size([10000])
torch.uint8
torch.int64


In [18]:
training_data.classes

['T-shirt/top',
 'Trouser',
 'Pullover',
 'Dress',
 'Coat',
 'Sandal',
 'Shirt',
 'Sneaker',
 'Bag',
 'Ankle boot']

In [20]:
images, labels = next(iter(train_dataloader))

In [None]:
fig = plt.figure(figsize=(12, 12))
for i in range(25):
    plt.subplot(5,5,i+1)
    plt.tight_layout()
    plt.imshow(images[i][0], cmap='gray', interpolation='none')
    plt.title("gt {}".format(labels[i]))
    plt.xticks([])
    plt.yticks([])

## Custom dataset

In [11]:
import os
import pandas as pd
from torchvision.io import read_image

class CustomImageDataset(Dataset):
    def __init__(self, annotations_file, img_dir, transform=None, target_transform=None):
        self.img_labels = pd.read_csv(annotations_file) # annotations_file: csv file
        self.img_dir = img_dir # img_dir: directory with all images
        self.transform = transform # transform: optional transform to be applied on a sample
        self.target_transform = target_transform # target_transform: optional transform to be applied on a label
        
    def __len__(self):
        return len(self.img_labels)
    
    def __getitem__(self, index):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[index, 0]) # img_labels.iloc[index, 0]: image name
        image = read_image(img_path) # read_image: read an image from a file path into a tensor
        label = self.img_labels.iloc[index, 1] # img_labels.iloc[index, 1]: image label
        if self.transform: 
            image = self.transform(image) 
        if self.target_transform:
            label = self.target_transform(label)
        return image, label
