# Person Face Sketches
In this notebook, I create synthetic dataset of person sketches.

## Download [CelebAMask-HQ](https://github.com/switchablenorms/CelebAMask-HQ#celebamask-hq-dataset-downloads)


In [1]:
!gdown 1badu11NqxGf6qM3PTTooQDJvQbejgbTv
!unzip -q CelebAMask-HQ.zip

Downloading...
From: https://drive.google.com/uc?id=1badu11NqxGf6qM3PTTooQDJvQbejgbTv
To: /local/CelebAMask-HQ.zip
100%|██████████████████████████████████████| 3.15G/3.15G [00:56<00:00, 55.8MB/s]


In [2]:
import os, random
import pandas as pd
import numpy as np
from PIL import Image, ImageFilter, ImageOps, ImageEnhance
from tqdm import tqdm

## Directory Structure
```
person-face-sketches
  ├─ CelebAMask-HQ
  |    ├─ CelebA-HQ-img
  |    ├─ CelebAMask-HQ-mask-anno
  |    ├─ CelebA-HQ-to-CelebA-mapping.txt
  |    ├─ CelebAMask-HQ-pose-anno.txt
  |    ├─ CelebAMask-HQ-attribute-anno.txt
  |    └─ README.txt
  |
  ├─ train
  |    ├─ photos
  |    └─ sketches
  |
  ├─ val
  |    ├─ photos
  |    └─ sketches
  |
  ├─ test
  |    ├─ photos
  |    └─ sketches
  |
  ├─ person-face-sketches.ipynb
  |
  └─ sketch_simplification
```

In [3]:
path_CelebAMaskHQ = './CelebAMask-HQ'
path_CelebAMaskHQ_image = os.path.join(path_CelebAMaskHQ, 'CelebA-HQ-img')
path_CelebAMaskHQ_pose = os.path.join(path_CelebAMaskHQ, 'CelebAMask-HQ-pose-anno.txt')
path_CelebAMaskHQ_attribute = os.path.join(path_CelebAMaskHQ, 'CelebAMask-HQ-attribute-anno.txt')

path_train = './train'
path_train_photos = os.path.join(path_train, 'photos')
path_train_sketches = os.path.join(path_train, 'sketches')
path_val = './val'
path_val_photos = os.path.join(path_val, 'photos')
path_val_sketches = os.path.join(path_val, 'sketches')
path_test = './test'
path_test_photos = os.path.join(path_test, 'photos')
path_test_sketches = os.path.join(path_test, 'sketches')

In [4]:
os.makedirs(path_train_photos, exist_ok=True)
os.makedirs(path_train_sketches, exist_ok=True)
os.makedirs(path_val_photos, exist_ok=True)
os.makedirs(path_val_sketches, exist_ok=True)
os.makedirs(path_test_photos, exist_ok=True)
os.makedirs(path_test_sketches, exist_ok=True)

## Images Cleaning

In [5]:
path_images = os.listdir(path_CelebAMaskHQ_image)
print(len(path_images), path_images[:5])

30000 ['1982.jpg', '27041.jpg', '28259.jpg', '19134.jpg', '27668.jpg']


#### Clean Poses

In [6]:
poses_threshold = 20

In [7]:
poses = pd.DataFrame()
with open(path_CelebAMaskHQ_pose) as f:
    f.readline()
    columns = f.readline().split()
    data = f.readlines()
    data = np.array([datum.split() for datum in data])
    for i, column in enumerate(columns):
        poses[column] = data[:,i+1]
    poses = poses.astype(float)
    poses = poses.set_index(data[:,0])
    
poses

Unnamed: 0,Yaw,Pitch,Raw
0.jpg,-16.761650,-3.540695,-0.468292
1.jpg,8.853630,-16.055931,-1.150886
2.jpg,35.265182,-6.890411,-1.581253
3.jpg,-16.793152,1.010948,0.133667
4.jpg,5.474228,-12.340668,-0.894409
...,...,...,...
29995.jpg,0.108932,-0.664413,0.135849
29996.jpg,4.065002,-1.689423,-1.262672
29997.jpg,5.378021,-12.933311,1.843475
29998.jpg,16.967903,-6.059593,-3.954010


In [8]:
new_path_images = []
for path in path_images:
    eligible = True
    for value in poses.loc[path]:
        if abs(value) > poses_threshold:
            eligible = False
    if eligible: new_path_images.append(path)
    
path_images = new_path_images
print(len(path_images), path_images[:5])

23543 ['1982.jpg', '27041.jpg', '19134.jpg', '27668.jpg', '23013.jpg']


#### Clean Attributes

In [9]:
not_allowed_attributes = ['Eyeglasses', 'Wearing_Hat']

In [10]:
attributes = pd.DataFrame()
with open(path_CelebAMaskHQ_attribute) as f:
    f.readline()
    columns = f.readline().split()
    data = f.readlines()
    data = np.array([datum.split() for datum in data])
    for i, column in enumerate(columns):
        attributes[column] = data[:,i+1]
    attributes = attributes.astype(int)
    attributes = attributes.set_index(data[:,0])
    
attributes

Unnamed: 0,5_o_Clock_Shadow,Arched_Eyebrows,Attractive,Bags_Under_Eyes,Bald,Bangs,Big_Lips,Big_Nose,Black_Hair,Blond_Hair,...,Sideburns,Smiling,Straight_Hair,Wavy_Hair,Wearing_Earrings,Wearing_Hat,Wearing_Lipstick,Wearing_Necklace,Wearing_Necktie,Young
0.jpg,-1,1,1,1,-1,-1,1,-1,-1,-1,...,-1,1,-1,1,-1,-1,1,-1,-1,1
1.jpg,-1,1,1,-1,-1,-1,-1,-1,-1,1,...,-1,1,-1,1,-1,-1,1,-1,-1,1
2.jpg,-1,-1,1,1,-1,-1,1,-1,-1,-1,...,-1,1,-1,1,1,-1,1,-1,-1,1
3.jpg,-1,-1,1,-1,-1,-1,-1,1,1,-1,...,-1,1,-1,1,-1,-1,1,-1,-1,1
4.jpg,-1,-1,-1,-1,-1,-1,1,-1,-1,-1,...,-1,-1,1,-1,1,-1,-1,-1,-1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29995.jpg,-1,-1,-1,-1,-1,-1,-1,1,-1,-1,...,-1,1,-1,-1,-1,-1,-1,1,-1,-1
29996.jpg,1,-1,-1,1,-1,-1,-1,1,-1,-1,...,1,1,-1,-1,-1,-1,-1,-1,-1,1
29997.jpg,-1,-1,-1,-1,-1,1,-1,-1,1,-1,...,-1,-1,1,-1,-1,-1,-1,-1,-1,1
29998.jpg,-1,1,1,-1,-1,-1,-1,-1,-1,-1,...,-1,-1,-1,1,-1,-1,1,-1,-1,1


In [11]:
new_path_images = []
for path in path_images:
    eligible = True
    for attribute in not_allowed_attributes:
        if attributes.loc[path][attribute] == 1:
            eligible = False
    if eligible: new_path_images.append(path)
    
path_images = new_path_images
print(len(path_images), path_images[:5])

21679 ['1982.jpg', '19134.jpg', '27668.jpg', '23013.jpg', '12165.jpg']


## Images Splitting

In [12]:
n_train = 20000
n_val = 1000
n_test = len(path_images) - n_train - n_val

# n_train = 10
# n_val = 5
# n_test = 5

In [13]:
random.shuffle(path_images)
train_path_images = path_images[:n_train]
val_path_images = path_images[n_train:n_train+n_val]
test_path_images = path_images[n_train+n_val:n_train+n_val+n_test]

print('train', len(train_path_images), train_path_images[:5])
print('val', len(val_path_images), val_path_images[:5])
print('test', len(test_path_images), test_path_images[:5])

train 20000 ['27905.jpg', '11769.jpg', '980.jpg', '11090.jpg', '29873.jpg']
val 1000 ['10334.jpg', '21202.jpg', '11384.jpg', '14507.jpg', '10567.jpg']
test 679 ['406.jpg', '20457.jpg', '17654.jpg', '9262.jpg', '9457.jpg']


## Copy Images to Photos

In [14]:
def copy(source, target, paths, desc=''):
    for path in tqdm(paths, desc=desc):
        image = Image.open(os.path.join(source, path))
        image = image.resize((512, 512))
        image.save(os.path.join(target, path))

copy(path_CelebAMaskHQ_image, path_train_photos, train_path_images, desc='train')
copy(path_CelebAMaskHQ_image, path_val_photos, val_path_images, desc='val')
copy(path_CelebAMaskHQ_image, path_test_photos, test_path_images, desc='test')

train: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20000/20000 [07:45<00:00, 42.93it/s]
val: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:21<00:00, 45.58it/s]
test: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 679/679 [00:15<00:00, 43.00it/s]


## Convert Photos to Sketches

In [15]:
def edge(image):
    image = image.filter(ImageFilter.FIND_EDGES)
    image = image.filter(ImageFilter.BLUR)
    image = image.filter(ImageFilter.SHARPEN)
    image = ImageOps.invert(image)
    image = ImageEnhance.Contrast(image).enhance(1.5)
    return image

def convert(source, target, desc=''):
    for path in tqdm(os.listdir(source), desc=desc):
        image = Image.open(os.path.join(source, path))
        image = edge(image)
        image.save(os.path.join(target, path))
        
convert(path_train_photos, path_train_sketches, desc='train')
convert(path_val_photos, path_val_sketches, desc='val')
convert(path_test_photos, path_test_sketches, desc='test')

train: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20655/20655 [13:09<00:00, 26.15it/s]
val: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:39<00:00, 25.53it/s]
test: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 679/679 [00:26<00:00, 25.25it/s]


## Simplify Sketches

In [None]:
import sketch_simplification
model = sketch_simplification.Simplification(device='cuda')

def simplify(source, desc=''):
    for path in tqdm(os.listdir(source), desc=desc):
        image = Image.open(os.path.join(source, path))
        image = model.simplify(image)
        image.save(os.path.join(source, path))

simplify(path_train_sketches, desc='train')
simplify(path_val_sketches, desc='val')
simplify(path_test_sketches, desc='test')

  from .autonotebook import tqdm as notebook_tqdm


Loaded sketch simplification from model_gan.pth.


train: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20655/20655 [50:08<00:00,  6.87it/s]
val: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [02:25<00:00,  6.86it/s]
test:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏| 675/679 [01:38<00:00,  6.91it/s]