In [21]:
import os
import sys
import gzip
import random
import platform
import warnings
import collections
from tqdm import tqdm, tqdm_notebook

In [22]:
import re
import requests
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np

In [23]:
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import transforms, utils
from torchvision.io import read_image

In [24]:
from torch.utils.data import Dataset, DataLoader, random_split, SubsetRandomSampler, WeightedRandomSampler

In [25]:
# 현재 OS 및 라이브러리 버전 체크 체크
current_os = platform.system()
print(f"Current OS: {current_os}")
print(f"CUDA: {torch.cuda.is_available()}")
print(f"Python Version: {platform.python_version()}")
print(f"torch Version: {torch.__version__}")
print(f"torchvision Version: {torchvision.__version__}")

Current OS: Linux
CUDA: True
Python Version: 3.7.11
torch Version: 1.7.1
torchvision Version: 0.8.2


In [26]:
# 테스트 데이터셋 폴더 경로를 지정해주세요.
test_dir = '/opt/ml/input/data/train'

In [27]:
# meta 데이터와 이미지 경로를 불러옵니다.
TRAIN_MY_PATH = {
    'trainCsv' : os.path.join(test_dir, 'train_new_cat.csv'),
    'image' : os.path.join(test_dir, 'images')
}

In [28]:
df = pd.read_csv(TRAIN_MY_PATH['trainCsv'])
df.head(7)

Unnamed: 0,id,gender,age,path,mask,mask_1
0,1,1,18,000001_female_Asian_45,mask1,1
1,1,1,18,000001_female_Asian_45,mask2,1
2,1,1,18,000001_female_Asian_45,mask3,1
3,1,1,18,000001_female_Asian_45,mask4,1
4,1,1,18,000001_female_Asian_45,mask5,1
5,1,1,18,000001_female_Asian_45,normal,0
6,1,1,18,000001_female_Asian_45,incorrect_mask,2


In [48]:
class MyTrainDataset(Dataset) :
    def __init__(self, path, transform, category, train=True):
        # 데이터와 경로 이미지 받아오기
        self.img_data = pd.read_csv(path['trainCsv'])
        self.img_dir = path['image']
        cat_idx = {'gender':1, 'age':2, 'mask':4}
        self.category = cat_idx[category]

        # 라벨 리스트
        self.label = [self.img_data['gender'],
                     self.img_data['age'],
                     self.img_data['mask']]
        
        self.label = pd.DataFrame(self.label)
        
        # 각 feature별 클래스 생성
        self.gen_classes = ['male', 'female']
        self.age_classes = [str(i) for i in range(18,61)]
        self.mask_classes = ['incorrect_mask', 'mask1', 'mask2', 'mask3', 'mask4', 'mask5', 'normal']
        
        # dataloader 특성 받기
        self.train = train
        self.transform = transform
        self.path = path
        self._repr_indent = 4
        
    def __len__(self) :
        return len(self.img_data)
    
    def __getitem__(self, idx) :
        # 이미지 경로 받아서 입력
        person_path = os.path.join(self.img_dir, self.img_data.iloc[idx,3]) # person 
        img_path = os.path.join(person_path, self.img_data.iloc[idx,4]+'.jpg') # person + mask + .jpg
        image = Image.open(img_path)
        
        # 이미지에 transform이 있다면 실행
        if self.transform :
            image = self.transform(image)
        
        # 라벨 입력 
        if self.category != '4':
            label = self.img_data.iloc[idx, self.category]
        else:
            label = self.img_data.iloc[idx, self.category+1]
        label = torch.tensor(label)
        return image, label
    
    def __repr__(self):
        '''
        https://github.com/pytorch/vision/blob/master/torchvision/datasets/vision.py
        '''
        head = "(Inform) My Custom Dataset"
        data_path = self._repr_indent*" " + "Data path: {}".format(self.path['image'])
        label_path = self._repr_indent*" " + "Label path: {}".format(self.path['trainCsv'])
        num_data = self._repr_indent*" " + "Number of datapoints: {}".format(self.__len__())
        num_classes = self._repr_indent*" " + "Number of gender classes: {}".format(len(self.gen_classes))

        return '\n'.join([head,
                          data_path, label_path, 
                          num_data, num_classes])

In [49]:
dataset_train_My = MyTrainDataset(path = TRAIN_MY_PATH,
                                  transform = transforms.ToTensor(),
                                  category = 'gender',
                                  train = True)

In [46]:
dataset_train_My

(Inform) My Custom Dataset
    Data path: /opt/ml/input/data/train/images
    Label path: /opt/ml/input/data/train/train_new_cat.csv
    Number of datapoints: 18900
    Number of gender classes: 2

In [52]:
image, label = next(iter(dataset_train_My))
image.shape, label

(torch.Size([3, 512, 384]), tensor(1))

In [None]:
train_loader = DataLoader()