## 60대 데이터 늘리기

In [1]:
import torch
from timm.data.mixup import Mixup
from timm.data.dataset import ImageDataset
from timm.data.loader import create_loader
# import torchvision.transforms
import torchvision
import numpy as np
import os
from enum import Enum
from PIL import Image
import PIL

from torchvision.utils import save_image
import shutil
import random

In [2]:
random.seed(42)     # 일정한 데이터 생성을 확인
data_directory = '/opt/ml/input/data/train/images'

In [3]:
def split_profile_by_gender_60s(profiles):      #성별별로 데이터를 나눕니다
    male,female = [], []        
    for profile in profiles:
        if profile.startswith("Fake"):  # 생성한 데이터를 제외한 데이터들을 반환하기 위해
            continue
        id, gender,species,age = profile.split("_")
        if age == "60":
            if gender == "male":
                male.append(os.path.join(data_directory,profile))
            else:
                female.append(os.path.join(data_directory,profile))
    return [male,female]

In [4]:
profiles = os.listdir(data_directory)
profiles = [profile for profile in profiles if not profile.startswith(".")]
male,female = split_profile_by_gender_60s(profiles)

In [5]:
def make_images(profile_i,profile_j,save_dir,save_profile_dir,profiles):    # 만든 이미지 저장
    images = ["incorrect_mask","mask1","mask2","mask3","mask4","mask5","normal"]
    ext = ".jpg"
    # all
    for image in images:
        image_A = np.array(Image.open(os.path.join(profiles[profile_i], image+ext)))//2
        image_B = np.array(Image.open(os.path.join(profiles[profile_j], image+ext)))//2
        new_image = (image_A+image_B)
        img = PIL.Image.fromarray(new_image)
        img.save(os.path.join(save_dir,save_profile_dir,image+ext))    
    
def not_random_make_fakes_by_gender(gender,profiles,save_dir=data_directory):   # 순서대로 mixup
    length = len(profiles)
    cnt = 0
    try:
        for folder in sorted(os.listdir(save_dir)):     # Fake 폴더 있는지 확인
            if folder.startswith("Fake"):
                cnt = int(folder.split("_")[1])
    except: # 오류를 일으킬때는 save 경로가 없을 때
        os.mkdir(save_dir)
    for i in range(length//2):
        cnt += 1
        save_profile_dir = f"Fake_{cnt:0>6}_{gender}_Asian_60"
        os.mkdir(os.path.join(save_dir,save_profile_dir))
        j = i+length//2

        make_images(i,j,save_dir,save_profile_dir,profiles)
        
def random_make_fakes_by_gender(gender,profiles,save_dir=data_directory):       #random하게 mixup
    limit = len(profiles)//2
    splited_A = set(random.sample([i for i in range(len(profiles))],limit))
    splited_B = list(set([i for i in range(len(profiles))]) - splited_A)
    splited_A = list(splited_A)
    cnt = 0
    try:
        for folder in sorted(os.listdir(save_dir)):     # Fake 폴더 있는지 확인
            if folder.startswith("Fake"):
                cnt = int(folder.split("_")[1])
    except: # 오류를 일으킬때는 save 경로가 없을 때
        os.mkdir(save_dir)
    for i in range(len(splited_A)):
        cnt += 1
        save_profile_dir = f"Fake_{cnt:0>6}_{gender}_Asian_60"
        os.mkdir(os.path.join(save_dir,save_profile_dir))

        make_images(splited_A[i],splited_B[i],save_dir,save_profile_dir,profiles)


def not_random_make_fake_pics(save_dir=data_directory):     # 랜덤하지 않게 반을 나누어 결합
    not_random_make_fakes_by_gender("male",male,save_dir)
    not_random_make_fakes_by_gender("female",female,save_dir)
    print("Make Done.")

def random_make_fake_pics(save_dir=data_directory):         # 랜덤하게 반을 나누어 결합
    random_make_fakes_by_gender("male",male,save_dir)
    random_make_fakes_by_gender("female",female,save_dir)
    print("Randomly Make Done.")

    
def rm_fake_pics(save_dir=data_directory):                  # 만든 fake 디렉토리 전체 제거
    for fake in [i for i in os.listdir(save_dir) if i.startswith("Fake")]:
        fake_dir = os.path.join(save_dir,fake)
        shutil.rmtree(fake_dir)
    print("Remove Done.")



In [6]:
# # 비교를 위한 코드입니다. 경로의 Default가 실제 데이터셋 위치입니다.
# rm_fake_pics('./not_random')
# rm_fake_pics('./random')
# not_random_make_fake_pics('./not_random')
# random_make_fake_pics('./random')

In [7]:
rm_fake_pics()
random_make_fake_pics()

Remove Done.
Randomly Make Done.
