## Dataset, DataLoader 구현
data feed를 위한 Dataset, DataLoader

### import

In [2]:
import os
import collections
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
from PIL import Image
from tqdm import tqdm

import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import transforms, utils
from torch.utils.data import Dataset, DataLoader

### csv 기반 Dataset 구현
EDA 과정 중 만든 new_train.csv를 이용해 간단한 dataset 구현

In [4]:
type(transforms.Compose([transforms.ToTensor()]))

torchvision.transforms.transforms.Compose

Dataset을 상속하는 ImageDataset 정의

- ImageDataset 옵션
1. base: data 경로
2. filename: csv 파일 이름
3. transform: X에 대해 적용할 transform(기본값은 None)
4. train: train 데이터 여부(기본값은 True)

In [24]:
class ImageDataset(Dataset):
    '''
    csv 데이터를 통해 만들어진 Dataset Class
    input: base(string)
           filename(string)
           transform(torchvision.transforms.transforms, default=None)
           train(bool, default=True)
    '''
    
    def __init__(self, base, filename, transform=None, train=True):
        self.data = pd.read_csv(base+'/'+filename)
        self.transform = transform
        self.train = train
        self.path = base
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        X = np.array(Image.open(self.path+'/'+self.data['image_path'][idx]))
        y = None
        
        if self.transform:
            X = self.transform(X)
        if self.train:
            y = self.data['class'][idx]
            
        return X, y

구현한 ImageDataset을 가지는 DataLoader 선언

- DataLoader 옵션
1. batch_size
2. shuffle
3. sampler
4. collate_fn
5. drop_last

In [25]:
imageDataset = ImageDataset('./train', 'new_train.csv', transform=transforms.Compose([transforms.ToTensor()]))
imageLoader = DataLoader(imageDataset, 
                         batch_size=4, 
                         shuffle=True, 
                         sampler=None, 
                         collate_fn=None, 
                         drop_last=False
                        )

DataLoader 작동해보기

In [26]:
next(iter(imageLoader))

[tensor([[[[0.6980, 0.6980, 0.6980,  ..., 0.7529, 0.7529, 0.7529],
           [0.6980, 0.6980, 0.6980,  ..., 0.7529, 0.7529, 0.7529],
           [0.6980, 0.6980, 0.6980,  ..., 0.7529, 0.7529, 0.7529],
           ...,
           [0.4745, 0.4745, 0.4784,  ..., 0.4078, 0.3922, 0.3843],
           [0.4706, 0.4706, 0.4745,  ..., 0.4196, 0.4118, 0.4000],
           [0.4706, 0.4706, 0.4706,  ..., 0.4275, 0.4275, 0.4196]],
 
          [[0.6627, 0.6627, 0.6627,  ..., 0.7137, 0.7137, 0.7137],
           [0.6627, 0.6627, 0.6627,  ..., 0.7137, 0.7137, 0.7137],
           [0.6627, 0.6627, 0.6627,  ..., 0.7137, 0.7137, 0.7137],
           ...,
           [0.4431, 0.4431, 0.4471,  ..., 0.2902, 0.2745, 0.2549],
           [0.4392, 0.4392, 0.4431,  ..., 0.2941, 0.2784, 0.2667],
           [0.4392, 0.4392, 0.4392,  ..., 0.2941, 0.2863, 0.2784]],
 
          [[0.5961, 0.5961, 0.5961,  ..., 0.6667, 0.6667, 0.6667],
           [0.5961, 0.5961, 0.5961,  ..., 0.6667, 0.6667, 0.6667],
           [0.5961, 0.59