In [None]:
import cv2
import numpy as np
import os
import json
import matplotlib.pyplot as plt
import matplotlib
from PIL import Image

import data.data_iterators as data

In [None]:
# Load datasets
IMAGES_DIR = '/media/pupa/DataStorage/datasets'
ANNOTATIONS_DIR = '/home/pupa/dev/apple_face/annotations'

_300w_train, _300w_test  = (
    list(data.get_300W_train(IMAGES_DIR, ANNOTATIONS_DIR)),
    list(data.get_300W_test(IMAGES_DIR, ANNOTATIONS_DIR)),
)
celeba_train, celeba_test = (
    list(data.get_celeba_train(IMAGES_DIR, ANNOTATIONS_DIR)),
    list(data.get_celeba_test(IMAGES_DIR, ANNOTATIONS_DIR)),
)
ffhq_train, ffhq_test = (
    list(data.get_ffhq_train(IMAGES_DIR, ANNOTATIONS_DIR)),
    list(data.get_ffhq_test(IMAGES_DIR, ANNOTATIONS_DIR)),
)
wflw_train, wflw_test = (
    list(data.get_WFLW_train(IMAGES_DIR, ANNOTATIONS_DIR)),
    list(data.get_WFLW_test(IMAGES_DIR, ANNOTATIONS_DIR)),
)
all_train = [*_300w_train, *celeba_train, *ffhq_train, *wflw_train]
all_test = [*_300w_test, *celeba_test, *ffhq_test, *wflw_test]
all_data = [*all_train, *all_test]

print(f'300W Train: {len(_300w_train)} faces {sum(len(x.faces) for x in _300w_train)}; ' \
        + f'Test: {len(_300w_test)} faces {sum(len(x.faces) for x in _300w_test)}')
print(f'CelebA Train: {len(celeba_train)} faces {sum(len(x.faces) for x in celeba_train)}; ' \
        + f'Test: {len(celeba_test)} faces {sum(len(x.faces) for x in celeba_test)}')
print(f'FFHQ Train: {len(ffhq_train)} faces {sum(len(x.faces) for x in ffhq_train)}; ' \
        + f'Test: {len(ffhq_test)} faces {sum(len(x.faces) for x in ffhq_test)}')
print(f'WFLW Train: {len(wflw_train)} faces {sum(len(x.faces) for x in wflw_train)}; ' \
        + f'Test: {len(wflw_test)} faces {sum(len(x.faces) for x in wflw_test)}')

# please note that the number of images in test set is not the same as in original repo
# eg for wflw it supposed to be 2500 records in test split, however:
# 1. if image contains several faces, it is counted as several records
# 2. apple engine may find less/more images than was manually annotated

In [None]:
def draw_landmarks_crop(img, landmarks):
    for landmark in landmarks:
        x, y = int(landmark[0]), int(landmark[1])
        img = cv2.circle(img, (x, y), 3, (0, 0, 255), -1)
    return img

def draw_bboxes(img, bbox):
    left, top, right, bottom = bbox['left'], bbox['top'], bbox['right'], bbox['bottom']
    img = cv2.rectangle(img, (left, top), (right, bottom), (255, 0, 0), 1)
    img = cv2.circle(img, (left, top), 7, (0, 0, 0), -1)
    img = cv2.circle(img, (right, bottom), 7, (255, 255, 255), -1)
    return img

samples_data: list[data.ImageWithFaces] = [
    _300w_train[0],
    celeba_train[0],
    ffhq_train[0],
    wflw_train[0],
]
samples_images = []
for s in samples_data:
    img = cv2.cvtColor(cv2.imread(s.image_path), cv2.COLOR_BGR2RGB)
    print(s.image_path)
    for face in s.faces:
        bbox, landmarks = face.bbox, face.landmarks
        print(bbox)
        img = draw_bboxes(img, bbox)
        img = draw_landmarks_crop(img, landmarks)
    samples_images.append(img)
min_height = min([img.shape[0] for img in samples_images])
samples_images = [cv2.resize(img, (int(img.shape[1] * min_height / img.shape[0]), min_height)) for img in samples_images]
samples_images = np.concatenate(samples_images, axis=1)
plt.figure(figsize=(20, 20))
plt.imshow(samples_images)

In [None]:
images_count = 0
face_per_img = {}
image_sizes = []
face_sizes = []
for item in all_data:
    images_count += 1
    image_sizes.append((item.image_size[0], item.image_size[1]))
    face_per_img[len(item.faces)] = face_per_img.get(len(item.faces), 0) + 1
    for face in item.faces:
        bbox = face.bbox
        width = bbox['right'] - bbox['left']
        height = bbox['bottom'] - bbox['top']
        assert width > 0 and height > 0, item.image_path
        face_sizes.append((width, height))

In [None]:
mylist = [key for key, val in face_per_img.items() for _ in range(val)]
plt.hist(mylist, bins=64, log=True, )
plt.title('Face count per image')
plt.show()

In [None]:
image_sizes = np.array(image_sizes)
plt.hist2d(image_sizes[:, 0], image_sizes[:, 1], bins=64, norm=matplotlib.colors.LogNorm())
plt.xlabel('width')
plt.ylabel('height')
plt.title('Image sizes')
plt.show()

In [None]:
face_sizes = np.array(face_sizes)
plt.hist2d(face_sizes[:, 0], face_sizes[:, 1], bins=48, norm=matplotlib.colors.LogNorm())
plt.xlabel('width')
plt.ylabel('height')
plt.title('Face sizes')
plt.show()