In [1]:
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import numpy as np  # 다차원 배열 처리
import pandas as pd  # 데이터 처리
import matplotlib.pyplot as plt  # 시각화
import seaborn as sns  # 시각화
import os
import torchvision.transforms as transforms
import re
import shutil

In [2]:
# 분류할 이미지 파일이 들어 있는 디렉토리 경로
image_dir = "/opt/ml/input/data/train/images/"

# 분류 결과를 저장할 디렉토리 경로
img_class_dir_list = list()
img_class_dir_list.append("/opt/ml/input/data/train/img_classes/female/mask")
img_class_dir_list.append("/opt/ml/input/data/train/img_classes/female/normal")
img_class_dir_list.append("/opt/ml/input/data/train/img_classes/female/incorrect_mask")
img_class_dir_list.append("/opt/ml/input/data/train/img_classes/male/mask")
img_class_dir_list.append("/opt/ml/input/data/train/img_classes/male/normal")
img_class_dir_list.append("/opt/ml/input/data/train/img_classes/male/incorrect_mask")

for icdl in img_class_dir_list:
    os.makedirs(icdl, exist_ok=True)


In [3]:
class CustomDataset(Dataset):
    def __init__(self, data_dir, transform):
        self.data_dir = data_dir
        self.transform = transform
        self.image_files = os.listdir(data_dir)
        self.img_names = []
        for img_str in self.image_files:
            if img_str[0] == '.':
                continue
            imgs = os.listdir(f"{data_dir}{img_str}")
            for img in imgs:
                if img[0] == '.':
                    continue
                self.img_names.append(f"{img_str}/{img}")

    def __len__(self):
        # 데이터셋의 총 데이터 수 반환
        return len(self.img_names)

    def __getitem__(self, idx):
        # idx 번째 데이터 반환
        # 예시) return self.data[idx]
        image = Image.open(f"{self.data_dir}/{self.img_names[idx]}")
        # return self.image_files[idx]
        return self.img_names[idx]
    

In [7]:
data_trans = transforms.Compose([
#     transforms.Resize(256)
#     transforms.RandomHorizontalFlip(p=0.5),
#     transforms.RandomRotation(degrees=30),
#     transforms.RandomResizedCrop(size=224, scale=(0.5, 1.0)),
#     transforms.ToTensor(),
#     transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

dataset = CustomDataset(image_dir,data_trans)
print(len(dataset))
for i in range(len(dataset)):
    img_comp = re.split(r'[_/.]', dataset[i])  # ['003483', 'male', 'Asian', '56', 'mask5', 'jpg']
    sex = img_comp[1]
    status = img_comp[4]
    img_path = f"{image_dir}/{dataset[i]}"
    if sex == "female":
        if status in ["mask1", "mask2", "mask3", "mask4", "mask5"]:
            shutil.copy(img_path, f"{img_class_dir_list[0]}/{img_comp[0]}_{img_comp[4]}.jpg")
        elif status == "normal":
            shutil.copy(img_path, f"{img_class_dir_list[1]}/{img_comp[0]}_{img_comp[4]}.jpg")
        elif status == "incorrect":
            shutil.copy(img_path, f"{img_class_dir_list[2]}/{img_comp[0]}_{img_comp[4]}.jpg")
        else:
            print(img_comp)
    elif sex == "male":
        if status in ["mask1", "mask2", "mask3", "mask4", "mask5"]:
            shutil.copy(img_path, f"{img_class_dir_list[3]}/{img_comp[0]}_{img_comp[4]}.jpg")
        elif status == "normal":
            shutil.copy(img_path, f"{img_class_dir_list[4]}/{img_comp[0]}_{img_comp[4]}.jpg")
        elif status == "incorrect":
            shutil.copy(img_path, f"{img_class_dir_list[5]}/{img_comp[0]}_{img_comp[4]}.jpg")
        else:
            print(img_comp)

    else:
        print(img_comp)

18900


In [17]:
total = 0
for root, dirs, files in os.walk("/opt/ml/input/data/train/img_classes"):
    total += len(files)
    print(f"{root} : {len(files)}")
for root, dirs, files in os.walk("/opt/ml/input/data/train/img_classes/male/mask/"):
    print(f"{files[1]}")
print(f"total : {total}")
print(f"dataset : {len(dataset)}")

/opt/ml/input/data/train/img_classes : 0
/opt/ml/input/data/train/img_classes/male : 0
/opt/ml/input/data/train/img_classes/male/mask : 5210
/opt/ml/input/data/train/img_classes/male/incorrect_mask : 1042
/opt/ml/input/data/train/img_classes/male/normal : 1042
/opt/ml/input/data/train/img_classes/female : 0
/opt/ml/input/data/train/img_classes/female/mask : 8285
/opt/ml/input/data/train/img_classes/female/incorrect_mask : 1657
/opt/ml/input/data/train/img_classes/female/normal : 1657
001019_mask3.jpg
total : 18893
dataset : 18900


Fri Apr 14 07:30:06 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.80.02    Driver Version: 450.80.02    CUDA Version: 11.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-PCIE...  Off  | 00000000:00:05.0 Off |                  Off |
| N/A   34C    P0    36W / 250W |      0MiB / 32510MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces