# prepare_dataset

Track Zeno's face images using an [exisiting tool](https://github.com/IntelligentBehaviourUnderstandingGroup/dlib_and_chehra_stuff) and prepare them for manual annotation.

## Step 1: Import modules

In [1]:
import os
import sys
import cv2
import dlib
import glob
import time
import shutil
import numpy as np
try:
    from ConfigParser import ConfigParser    # If using Python 2.7
except ImportError:
    from configparser import ConfigParser    # If using Python 3.5
config = ConfigParser()
config.read('config.ini')
sys.path.append(os.path.realpath(config.get('facial_landmark_tracker', 'repository_path')))
import ibug_face_tracker
from zeno_face_tracker_helpers import *
print('All modules imported.')

All modules imported.


## Step 2: Initialise the landmark localiser

In [2]:
tracker = ibug_face_tracker.FaceTracker(os.path.realpath(config.get('facial_landmark_tracker', 
                                                                    'ert_model_path')), 
                                        os.path.realpath(config.get('facial_landmark_tracker', 
                                                                    'auxiliary_model_path')))
tracker.face_detection_scale = config.getfloat('facial_landmark_tracker', 'face_detection_scale')
tracker.minimum_face_size = config.getint('facial_landmark_tracker', 'minimum_face_size')
tracker.hard_failure_threshold = -1e6
tracker.estimate_head_pose = False
tracker.eye_iterations = 0
print('Facial landmark localiser initialised.')

Facial landmark localiser initialised.


## Step 3: Localise Facial Landmarks

In [3]:
# Enumerate jobs
source_images = sorted(glob.glob(os.path.realpath(os.path.join('./dataset', 'cam*', '*.png'))))
source_images = [x for x in source_images if '.pts.png' not in x]
print('%d images to be processed.' % len(source_images))

# Get landmarks
successes = 0
last_check_time = time.time()
for idx, image_path in enumerate(source_images):
    image = cv2.imread(image_path)
    tracker.reset()
    tracker.track(image)
    if tracker.has_facial_landmarks:
        pts_path = os.path.splitext(image_path)[0] + '.init.pts'
        save_pts(pts_path, tracker.facial_landmarks)
        tracker.plot_current_result(image)
        rendering_path = pts_path + '.png'
        cv2.imwrite(rendering_path, image)
        successes += 1
    current_time = time.time()
    if current_time - last_check_time > 10.0:
        last_check_time = current_time
        print('%d images have been processed.' % (idx + 1))
print('Done, all %d images have been processed, %d of which have landmarks localised.' % 
      (len(source_images), successes))

1200 images to be processed.
78 images have been processed.
135 images have been processed.
191 images have been processed.
266 images have been processed.
330 images have been processed.
384 images have been processed.
450 images have been processed.
520 images have been processed.
579 images have been processed.
634 images have been processed.
693 images have been processed.
755 images have been processed.
809 images have been processed.
861 images have been processed.
926 images have been processed.
981 images have been processed.
1036 images have been processed.
1099 images have been processed.
1159 images have been processed.
Done, all 1200 images have been processed, 1034 of which have landmarks localised.


## Step 4: Prepare the annotation jobs than can be loaded in FLAT

In [4]:
# Enumerate the recording sessions
recording_sessions = sorted(glob.glob(os.path.realpath(os.path.join('./dataset', 'cam*'))))
recording_sessions = [x for x in recording_sessions if os.path.isdir(x)]

# How many examples per session?
num_samples = 1e6
for session in recording_sessions:
    images = glob.glob(os.path.join(session, '*.png'))
    images = [x for x in images if '.pts.' not in x]
    num_samples = min(num_samples, len(images))

# Permutate samples in the sessions so that each annotation batch gets a bit of everything
indices = np.zeros((len(recording_sessions), num_samples), dtype=int)
for idx in range(indices.shape[1]):
    indices[:, idx] = np.random.permutation(indices.shape[0])

# Create jobs
for batch_idx, batch in enumerate(indices):
    batch_content = []
    for idx, session in enumerate(batch):
        image_path = os.path.join(recording_sessions[session], '%06d.png' % idx)
        pts_path = os.path.splitext(image_path)[0] + '.init.pts'
        if os.path.exists(pts_path):
            pts = load_pts(pts_path)
            if pts.shape[0] == 68:
                batch_content.append((image_path, pts))
    job_path = os.path.realpath(os.path.join('./dataset', 'batch_%03d.fad' % batch_idx))
    duplication = 0
    while True:
        if os.path.exists(job_path):
            duplication += 1
            job_path = os.path.realpath(os.path.join(
                './dataset', 'batch_%03d.%03d.fad' % (batch_idx, duplication)))
        else:
            break
    save_annotation_job(job_path, batch_content, 68)
    print('Annotation job created: ' + job_path)
    backup_path = job_path + '.bak'
    shutil.copyfile(job_path, backup_path)
    print('Backup file created: ' + backup_path)

Annotation job created: C:\zeno_face_tracker\dataset\batch_000.fad
Backup file created: C:\zeno_face_tracker\dataset\batch_000.fad.bak
Annotation job created: C:\zeno_face_tracker\dataset\batch_001.fad
Backup file created: C:\zeno_face_tracker\dataset\batch_001.fad.bak
Annotation job created: C:\zeno_face_tracker\dataset\batch_002.fad
Backup file created: C:\zeno_face_tracker\dataset\batch_002.fad.bak
Annotation job created: C:\zeno_face_tracker\dataset\batch_003.fad
Backup file created: C:\zeno_face_tracker\dataset\batch_003.fad.bak
Annotation job created: C:\zeno_face_tracker\dataset\batch_004.fad
Backup file created: C:\zeno_face_tracker\dataset\batch_004.fad.bak
Annotation job created: C:\zeno_face_tracker\dataset\batch_005.fad
Backup file created: C:\zeno_face_tracker\dataset\batch_005.fad.bak
Annotation job created: C:\zeno_face_tracker\dataset\batch_006.fad
Backup file created: C:\zeno_face_tracker\dataset\batch_006.fad.bak
Annotation job created: C:\zeno_face_tracker\dataset\ba

## Step 5: Do something about the undetected faces: prepare for face box annotation

In [2]:
# See what had been included in previous batches
number_of_sessions = len(glob.glob(os.path.realpath(os.path.join('./dataset', 'cam*.pkl'))))
batch_paths = [os.path.realpath(os.path.join('./dataset', 'batch_%03d.fad' % x)) 
               for x in range(number_of_sessions)]
included_images = []
for batch_path in batch_paths:
    batch_data = load_annotation_job(batch_path, config.getint('data_organisation', 
                                                               'number_of_landmarks'))
    included_images += [x['image_path'] for x in batch_data]

# Get the leftover images
leftover_images = sorted(glob.glob(os.path.realpath(os.path.join('./dataset', 'cam*', '*.png'))))
leftover_images = [x for x in leftover_images if '.pts.png' not in x and x not in included_images]

# Do something about these images
face_detection_scale = config.getfloat('facial_landmark_tracker', 'face_detection_scale')
face_detection_scale = max(face_detection_scale, 1e-6)
face_detector = dlib.get_frontal_face_detector()
last_check_time = time.time()
batch_content = []
for idx, leftover_image in enumerate(leftover_images):
    index = int(os.path.basename(leftover_image).split('.')[0])
    face_boxes = None
    for delta in [-1, 1]:
        next_index = index
        while True:
            next_index += delta
            next_image_path = os.path.join(os.path.dirname(leftover_image), '%06d.png' % next_index)
            if os.path.exists(next_image_path):
                if next_image_path in included_images:
                    next_image = cv2.cvtColor(cv2.imread(next_image_path), cv2.COLOR_BGR2GRAY)
                    image_size = (next_image.shape[1], next_image.shape[0])
                    target_size = (int(max(round(image_size[0] * face_detection_scale), 1)), 
                                   int(max(round(image_size[1] * face_detection_scale), 1)))
                    if target_size != image_size:
                        next_image = cv2.resize(next_image, target_size)
                    detected_faces = sorted(
                        [dlib.rectangle(int(round(face_box.left() / face_detection_scale)), 
                                        int(round(face_box.top() / face_detection_scale)), 
                                        int(round(face_box.right() / face_detection_scale)), 
                                        int(round(face_box.bottom() / face_detection_scale))) 
                         for face_box in face_detector(next_image)], 
                        key=dlib.rectangle.area, reverse=True)
                    if len(detected_faces) > 0:
                        next_face_box = np.array([detected_faces[0].left(), 
                                                  detected_faces[0].top(), 
                                                  detected_faces[0].right(), 
                                                  detected_faces[0].bottom()])
                        if face_boxes is None:
                            face_boxes = np.expand_dims(next_face_box, 0)
                        else:
                            face_boxes = np.vstack((face_boxes, next_face_box))
                        break
            else:
                break
    estimated_face_box = face_boxes.mean(axis=0)
    batch_content.append((leftover_image, 
                          np.array([[estimated_face_box[0], estimated_face_box[1]], 
                                    [estimated_face_box[2], estimated_face_box[1]], 
                                    [estimated_face_box[2], estimated_face_box[3]], 
                                    [estimated_face_box[0], estimated_face_box[3]]])))
    current_time = time.time()
    if last_check_time < current_time - 10.0:
        last_check_time = current_time
        print('%d leftover images have been processed.' % (idx + 1))
print('All %d leftover images have been processed.' % len(leftover_images))

# Prepare the FAD file
job_path = os.path.realpath(os.path.join(
    './dataset', 'batch_%03d.face_box.fad' % number_of_sessions))
duplication = 0
while True:
    if os.path.exists(job_path):
        duplication += 1
        job_path = os.path.realpath(os.path.join(
            './dataset', 'batch_%03d.%03d.face_box.fad' % (number_of_sessions, duplication)))
    else:
        break
save_annotation_job(job_path, batch_content, 4)
print('Annotation job created: ' + job_path)
backup_path = job_path + '.bak'
shutil.copyfile(job_path, backup_path)
print('Backup file created: ' + backup_path)

93 leftover images have been processed.
All 166 leftover images have been processed.
Annotation job created: D:\hhj\zeno_face_tracker\dataset\batch_012.face_box.fad
Backup file created: D:\hhj\zeno_face_tracker\dataset\batch_012.face_box.fad.bak


## Step 6: Do something about the undetected faces: prepare for landmark annotation

__Do not run this before completing the face box annotation work!__

In [21]:
# Get face box from the FAD file
samples = []
fad_paths = sorted(glob.glob(os.path.realpath(
    os.path.join('./dataset', 'batch*.face_box.fad'))))
for fad_path in fad_paths:
    samples += load_annotation_job(fad_path, 4)
print('%d samples have been loaded.' % len(samples))

# Get landmarks
successes = 0
batch_content = []
last_check_time = time.time()
for idx, sample in enumerate(samples):
    image = cv2.imread(sample['image_path'])
    tracker.reset()
    top_left = np.round(sample['facial_landmarks'].min(axis=0)).astype(int)
    bottom_right = np.round(sample['facial_landmarks'].max(axis=0)).astype(int)
    tracker.track(image, (top_left[0], top_left[1], bottom_right[0] - top_left[0] + 1, 
                          bottom_right[1] - top_left[1] + 1))
    if tracker.has_facial_landmarks:
        batch_content.append((sample['image_path'], tracker.facial_landmarks))
        pts_path = os.path.splitext(sample['image_path'])[0] + '.init.pts'
        save_pts(pts_path, tracker.facial_landmarks)
        tracker.plot_current_result(image)
        rendering_path = pts_path + '.png'
        cv2.imwrite(rendering_path, image)
        successes += 1
    current_time = time.time()
    if current_time - last_check_time > 10.0:
        last_check_time = current_time
        print('%d leftover images have been processed.' % (idx + 1))
print('Done, all %d leftover images have been processed, %d of which have landmarks localised.' % 
      (len(samples), successes))

# Prepare the FAD file
number_of_sessions = len(glob.glob(os.path.realpath(os.path.join('./dataset', 'cam*.pkl'))))
job_path = os.path.realpath(os.path.join(
    './dataset', 'batch_%03d.fad' % number_of_sessions))
duplication = 0
while True:
    if os.path.exists(job_path):
        duplication += 1
        job_path = os.path.realpath(os.path.join(
            './dataset', 'batch_%03d.%03d.fad' % (number_of_sessions, duplication)))
    else:
        break
save_annotation_job(job_path, batch_content, 68)
print('Annotation job created: ' + job_path)
backup_path = job_path + '.bak'
shutil.copyfile(job_path, backup_path)
print('Backup file created: ' + backup_path)

166 samples have been loaded.
150 leftover images have been processed.
Done, all 166 leftover images have been processed, 166 of which have landmarks localised.
Annotation job created: D:\hhj\zeno_face_tracker\dataset\batch_012.fad
Backup file created: D:\hhj\zeno_face_tracker\dataset\batch_012.fad.bak
