In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image

import torch
import torchvision 
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import transforms, utils
from torchvision.transforms import ToTensor, Resize, Normalize
from torch.utils.data import Dataset, DataLoader, random_split, SubsetRandomSampler, WeightedRandomSampler

In [2]:
train_path = '/opt/ml/input/data/train'
train_image_dir_path = os.path.join(train_path, 'images')

In [3]:
def search(dirname, result):
  try:
    filenames = os.listdir(dirname)
    for filename in filenames:
      if filename[0] == '.':
        continue
      full_filename = os.path.join(dirname, filename)
      if os.path.isdir(full_filename):
        search(full_filename, result)
      else:
        ext = os.path.splitext(full_filename)[-1]
        if ext:
          result.append(full_filename)
  except PermissionError:
    pass

In [4]:
all_path = []
search(train_image_dir_path, all_path)

FileNotFoundError: [Errno 2] No such file or directory: '/opt/ml/input/data/train/images'

In [None]:
len(all_path)
# 18900장이 나타나면 성공

In [None]:
exts = []
for i in all_path:
  ext = os.path.splitext(i)[-1]
  if ext not in exts:
    exts.append(ext)
print(exts)

In [None]:
all_path = sorted(all_path)

In [None]:
def labelling(name):
  label = 0
  info, mask_type = name.split('/')[-2:]
  info = info.split('_')
  gender, age = info[1], int(info[3])
  if 'incorrect' in mask_type:
    label += 6
  elif 'normal' in mask_type:
    label += 12
  
  if gender == 'female':
    label += 3
  
  if 27 <= age < 57:
    label += 1
  elif age >= 58:
    label += 2
  
  return label

In [None]:
train_path_label = pd.DataFrame(all_path, columns=['path'])
train_path_label

In [None]:
train_path_label['label'] = train_path_label['path'].map(lambda x: labelling(x))
train_path_label

In [None]:
class CustomDataset(Dataset):
  def __init__(self, img_path_label, transforms):
    self.X = img_path_label['path']
    self.y = img_path_label['label']
    self.transforms = transforms
  
  def __getitem__(self, index):
    img = Image.open(self.X.iloc[index])
    if self.transforms:
      img = self.transforms(img)
    label = self.y.iloc[index]
    return img, label

  def __len__(self):
    return len(self.X)

In [13]:
transform = transforms.Compose([
  Resize((512, 384), Image.BILINEAR),
  ToTensor(),
  Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])



In [None]:
from sklearn.model_selection import train_test_split
train, valid = train_test_split(train_path_label, test_size=0.2)

In [None]:
BATCH_SIZE = 64

In [None]:
train_dataset = CustomDataset(train, transform)

train_dataloader = DataLoader(dataset=train_dataset,batch_size=BATCH_SIZE,shuffle=True)

In [None]:
valid_dataset = CustomDataset(valid, transform)
valid_dataloader = DataLoader(dataset=valid_dataset,batch_size=BATCH_SIZE,shuffle=True)

In [None]:
next(iter(train_dataloader))[0].shape