In [1]:
import os
import numpy as np
import torch
import torchvision
from torchvision.datasets import ImageFolder
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import cv2
import torch.nn as nn
from PIL import Image
import matplotlib.pyplot as plt
from tqdm import tqdm
import torch.nn.functional as F
import copy
import time
import csv
from tqdm import tqdm
from torch.utils.data.dataset import random_split

# Dataset 과 DataLoader 만들기

In [7]:
class custom_dataset(Dataset):
    
    def __init__(self, inputs_dir, transform = None):
        
        self.inputs_dir = inputs_dir 
        self.inputs_list =  os.listdir(inputs_dir)
                
        self.transform = transform
            
    def __len__(self):
        return len(self.inputs_list)
    
    def __getitem__(self,idx):
    
        os.chdir(self.inputs_dir)
        target_label = self.inputs_list[idx][-5]
        target_label = int(target_label)
        # 이미 numpy도 변환하여 저장 해 두었다.
        input_image_numpy = np.load(self.inputs_list[idx])  
        input_image_numpy = cv2.resize(input_image_numpy, (256,256), interpolation = cv2.INTER_LANCZOS4) 
        
        combine = {'input':input_image_numpy, 'target':target_label}  # segmenatation과 통일하기 위해 합치지만 굳이 합칠 필요는 없고
        # 심지어 custom transformation에서 아예 화용하지도 않는다. 
        # 다만 메모리의 차지, time cost등이 있지만 무시할 정도여서 그냥 이렇게 한다.
        
        if self.transform:
            combine = self.transform(combine)
            
        input_tensor = torchvision.transforms.functional.to_tensor(combine['input'])
        target_tensor = torch.tensor(combine['target'])


        return (input_tensor , target_tensor)
    
class RandomFlip(object):
    # input으로 numpy를 받는다.
    
    def __init__(self, horizontal = True, vertical = False, p = 0.5): 
        self.horizontal = horizontal
        self.vertical = vertical
        self.p = p # p는 그냥 예의상 넣었다. 건들이는 경우가 있나 싶긴하다
        
        
    def __call__(self, combine):
    
        inputs = combine['input']   # (224, 224, 3)
        targets = combine['target']  # target은 건들이지 않는다

        if (self.horizontal) and (np.random.rand() > self.p):
            inputs = cv2.flip(inputs,1)
        
        if (self.vertical) and (np.random.rand() > self.p):
            inputs = cv2.flip(inputs,0)


        combine = {'input': inputs, 'target': targets}  #출력은 tensor

        return combine

path_train_inputs = '/home/mskang/hyeokjong/cancer/2019/npy_aug'

transformation = transforms.Compose([ RandomFlip(True, True, 0.5)])

dataset = custom_dataset(path_train_inputs, transformation)

train_size = int(0.7*len(dataset))
v_size = len(dataset) - train_size
train_dataset, v_dataset = random_split(dataset, [train_size, v_size])
val_size = int(0.5*len(v_dataset))
test_size = len(v_dataset) - val_size
val_dataset, test_dataset = random_split(v_dataset, [val_size, test_size])


batch_size = 64
train_dl = DataLoader(train_dataset, batch_size, shuffle = True, num_workers = 4, pin_memory = True)
val_dl = DataLoader(val_dataset, batch_size, shuffle = True, num_workers = 4, pin_memory = True)
test_dl = DataLoader(test_dataset, batch_size, shuffle = False, num_workers = 4, pin_memory = True)