In [10]:
import random
import shutil
import os
import subprocess
import cv2
import numpy as np
import torch
import matplotlib.image as mpimg

from os import rename
from glob import glob
from random import shuffle

  (fname, cnt))
  (fname, cnt))


In [11]:
IMG_SIZE = 128

In [12]:
def make_image_dir(data_name, attr_name, target_folders, source_folders, use_shuffle = False) :
    images = []
    image_formats = ['jpg', 'jpeg', 'png']
    
    def add_images(images, folders, is_target) :
        for folder, n in folders :
            n_images = 0
            folder_path = os.path.join('./raw_data', folder)
            for image_format in image_formats :
                temp_images = glob('%s/*.%s'%(folder_path, image_format))
                temp_images = temp_images if n is None else temp_images[:n]
                temp_attr = [is_target]
                temp_attr = temp_attr * len(temp_images)                
                images += zip(temp_images, temp_attr)
                n_images += len(temp_images)
            print('%d images added from %s'%(n_images, folder_path))

    add_images(images, source_folders, False)
    add_images(images, target_folders, True)
    
    if use_shuffle :
        shuffle(images)
    
    dir_path = os.path.join('./processed_data', data_name)
    image_dir_path = os.path.join(dir_path, 'images')
    
    assert not os.path.exists(dir_path)
    subprocess.Popen("mkdir %s" % dir_path, shell=True).wait()
    subprocess.Popen("mkdir %s" % image_dir_path, shell=True).wait()
    
    f = open("%s/list_attr_%s.txt"%(dir_path, data_name), 'w')
    f.write('%d\n%s\n'%(len(images), attr_name))
    
    for idx, (path, attr) in enumerate(images):
        file_name = '%06i.jpg' % (idx + 1)
        data = '%s %d\n' % (file_name, 1 if attr else -1)
        
        shutil.copy(path, os.path.join(image_dir_path, file_name))
        f.write(data)
        
    f.close()
    
    n_images = len(images)
    print("Copying %d images completed"%n_images)
    return n_images

In [13]:
def preprocess_images(data_name, n_images):

    output_name = '%s_images_%i_%i.pth' % (data_name, IMG_SIZE, IMG_SIZE)
    output_path = os.path.join('./processed_data', data_name, output_name)
    assert not os.path.isfile(output_path)

    print("Reading images from %s/ ..."%(data_name))
    
    image_dir = os.path.join('./processed_data', data_name, 'images')
    
    data = np.zeros([n_images, 3, IMG_SIZE, IMG_SIZE], dtype=np.uint8)
    for i in range(1, n_images + 1):
        if i % 10000 == 0:
            print(i)
        
        image = mpimg.imread(image_dir + '/%06i.jpg' % i)
        assert image.shape == (IMG_SIZE, IMG_SIZE, 3)
        data[i - 1, ...] = image.transpose((2, 0, 1))

    data = torch.from_numpy(data)
    assert data.size() == (n_images, 3, IMG_SIZE, IMG_SIZE)

    print("Saving images to %s ..." % output_path)
#     torch.save(data[:20000].clone(), 'images_%i_%i_20000.pth' % (IMG_SIZE, IMG_SIZE))
    torch.save(data, output_path)

In [14]:
def preprocess_attributes(data_name, n_images):

    output_name = '%s_attributes.pth' % (data_name)
    output_path = os.path.join('./processed_data', data_name, output_name)
    assert not os.path.isfile(output_path)
    
    file_path = os.path.join('./processed_data', data_name, 'list_attr_%s.txt' % (data_name))
    attr_lines = [line.rstrip() for line in open(file_path, 'r')]

    assert len(attr_lines) == n_images + 2

    attr_keys = attr_lines[1].split()
    attributes = {k: np.zeros(n_images, dtype=np.bool) for k in attr_keys}
    
    for i, line in enumerate(attr_lines[2:]):
        image_id = i + 1
        split = line.split()
#         assert len(split) == 41
        assert split[0] == ('%06i.jpg' % image_id)
        assert all(x in ['-1', '1'] for x in split[1:])
        for j, value in enumerate(split[1:]):
            attributes[attr_keys[j]][i] = value == '1'

    print("Saving attributes to %s ..." % output_path)
    torch.save(attributes, output_path)

In [15]:
# data_name : 데이터 이름, 학습 시킬 때 --name 파라미터에 꼭 넣어줘야함
# attr : 어트리뷰트 이름, 사람에 따라 다르게 할 필요 없이 하나로 통일 했음
data_name = 'angelina_celeba_200k' 
attr = 'is_target' 

# [(폴더명, 사진 개 수)] / 사진 개 수에 None을 넣으면 폴더 전체 사진을 추가함
# source_folders : 변신 될 사진들 / ex) 셀렙a
# target_folders : 변실 시킬 사람 사진들 / ex) 안젤리나 졸리
source_folders = [('img_celeba_aligned_128', None)]
target_folders = [('Angelina_Jolie_128', None)]

In [2]:
# 사진 폴더와 attr 리스트를 만든다
n_images = make_image_dir(data_name, attr, target_folders, source_folders)

In [4]:
# 이미지를 파이토치 형식으로 저장
preprocess_images(data_name, n_images)

In [23]:
# attr을 파이토치 형식으로 저장
preprocess_attributes(data_name, n_images)

Saving attributes to ./processed_data/angelina_celeba_200k/angelina_celeba_200k_attributes.pth ...
