# train_models

Train the face detection and landmark localisation models for Zeno's face based on our annotation.

## Step 1: Import models

In [27]:
import os
import cv2
import glob
import dlib
import time
import numpy as np
import pandas as pd
try:
    from ConfigParser import ConfigParser    # If using Python 2.7
except ImportError:
    from configparser import ConfigParser    # If using Python 3.5
config = ConfigParser()
print('All modules imported.')

All modules imported.


## Step 2: Prepare the data structure for training

In [2]:
# Load images and annotation
config.read('config.ini')
annotations = pd.read_pickle(os.path.realpath(os.path.join('./dataset', 'annotations.pkl')))
face_detection_scale = config.getfloat('facial_landmark_tracker', 'face_detection_scale')
face_detection_images = []
face_detection_groundtruth = []
shape_predictor_images = []
shape_predictor_groundtruth = []
last_check_time = time.time()
for idx in range(annotations.shape[0]):
    entry = annotations.iloc[idx]
    image_path = os.path.realpath(os.path.join('./dataset', entry['session'], 
                                               '%06d.png' % entry['index']))
    if type(entry['face_box']) != type(np.nan) or not np.isnan(entry['face_box']):
        image = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2GRAY)
        image_size = (image.shape[1], image.shape[0])
        face_detection_size = (max(round(image_size[0] * face_detection_scale), 1), 
                               max(round(image_size[1] * face_detection_scale), 1))
        if face_detection_size != image_size:
            face_detection_images.append(cv2.resize(image, face_detection_size))
        else:
            face_detection_images.append(face_detection_image)
        face_box = [int(round(x * face_detection_scale)) for x in list(entry['face_box'])]
        face_detection_groundtruth.append([dlib.rectangle(face_box[0], face_box[1], 
                                                          face_box[0] + face_box[2] - 1, 
                                                          face_box[1] + face_box[3] - 1)])
        if type(entry['landmarks']) != type(np.nan) or not np.isnan(entry['landmarks']):
            shape_predictor_images.append(image)
            bounding_box = dlib.rectangle(entry['face_box'][0], entry['face_box'][1], 
                                          entry['face_box'][0] + entry['face_box'][2] - 1, 
                                          entry['face_box'][1] + entry['face_box'][3] - 1)
            landmarks = [dlib.point(int(round(pts[0])), int(round(pts[1]))) 
                         for pts in entry['landmarks']]
            shape_predictor_groundtruth.append([dlib.full_object_detection(bounding_box, 
                                                                           landmarks)])
    current_time = time.time()
    if last_check_time < current_time - 10.0:
        last_check_time = current_time
        print('%d face detection samples and %d landmark localisation have been prepared.' % 
              (len(face_detection_groundtruth), len(shape_predictor_groundtruth)))
print('All %d face detection samples and %d landmark localisation have been prepared.' % 
      (len(face_detection_groundtruth), len(shape_predictor_groundtruth)))

450 face detection samples and 202 landmark localisation have been prepared.
924 face detection samples and 403 landmark localisation have been prepared.
All 1200 face detection samples and 515 landmark localisation have been prepared.


## Step 3: Split the data into a training set (2/3) and a validation set (1/3)

In [3]:
# For face detection
sample_indices = np.random.permutation(len(face_detection_images))
training_set_size = int(round(len(face_detection_images) / 3 * 2))
face_detection_training_images = [face_detection_images[idx] for idx in 
                                  sample_indices[0:training_set_size]]
face_detection_training_groundtruth = [face_detection_groundtruth[idx] for idx in 
                                       sample_indices[0:training_set_size]]
face_detection_validation_images = [face_detection_images[idx] for idx in 
                                    sample_indices[training_set_size:]]
face_detection_validation_groundtruth = [face_detection_groundtruth[idx] for idx in 
                                         sample_indices[training_set_size:]]
print('Train / test split for face detection: %d / %d.' % 
      (len(face_detection_training_images), len(face_detection_validation_images)))

# For landmark localisation
sample_indices = np.random.permutation(len(shape_predictor_images))
training_set_size = int(round(len(shape_predictor_images) / 3 * 2))
shape_predictor_training_images = [shape_predictor_images[idx] for idx in 
                                   sample_indices[0:training_set_size]]
shape_predictor_training_groundtruth = [shape_predictor_groundtruth[idx] for idx in 
                                        sample_indices[0:training_set_size]]
shape_predictor_validation_images = [shape_predictor_images[idx] for idx in 
                                     sample_indices[training_set_size:]]
shape_predictor_validation_groundtruth = [shape_predictor_groundtruth[idx] for idx in 
                                          sample_indices[training_set_size:]]
print('Train / test split for landmark localisation: %d / %d.' % 
      (len(shape_predictor_training_images), len(shape_predictor_validation_images)))

Train / test split for face detection: 800 / 400.
Train / test split for landmark localisation: 343 / 172.


## Step 4: Train and test the face detector model

In [5]:
config.read('config.ini')
options = dlib.simple_object_detector_training_options()
options.be_verbose = config.getboolean('simple_object_detector_training_options', 'be_verbose')
options.add_left_right_image_flips = config.getboolean('simple_object_detector_training_options', 
                                                       'add_left_right_image_flips')
options.num_threads = config.getint('simple_object_detector_training_options', 'num_threads')
options.detection_window_size = config.getint('simple_object_detector_training_options', 
                                              'detection_window_size')
options.C = config.getfloat('simple_object_detector_training_options', 'C')
options.epsilon = config.getfloat('simple_object_detector_training_options', 'epsilon')
options.upsample_limit = config.getint('simple_object_detector_training_options', 
                                       'upsample_limit')
options.nuclear_norm_regularization_strength = config.getfloat('simple_object_detector_training_options', 
                                                               'nuclear_norm_regularization_strength')
# face_detector = dlib.train_simple_object_detector(face_detection_training_images, 
#                                                   face_detection_training_groundtruth, 
#                                                   options)
# print(dlib.test_simple_object_detector(face_detection_validation_images, 
#                                        face_detection_validation_groundtruth, face_detector))
face_detector = dlib.train_simple_object_detector(face_detection_images, 
                                                  face_detection_groundtruth, 
                                                  options)
face_detector_model_path = os.path.realpath(os.path.join('./models', 'zeno_face_detector.model'))
face_detector.save(face_detector_model_path)
print('Face detector model has been trained and saved to: ' + face_detector_model_path)
dlib_face_deector_model_path = os.path.realpath(os.path.join('./models', 'dlib_face_detector.model'))
dlib.get_frontal_face_detector().save(dlib_face_deector_model_path)
print('Dlib face detector model has been saved to: ' + dlib_face_deector_model_path)

Face detector model has been trained and saved to: D:\hhj\zeno_face_tracker\models\zeno_face_detector.model
Dlib face detector model has been saved to: D:\hhj\zeno_face_tracker\models\dlib_face_detector.model


## Step 4: Train the shape predictor model

In [None]:
options = dlib.shape_predictor_training_options()
options.be_verbose = config.getboolean('shape_predictor_training_options', 
                                       'be_verbose')
options.cascade_depth = config.getint('shape_predictor_training_options', 
                                      'cascade_depth')
options.feature_pool_region_padding = config.getfloat('shape_predictor_training_options', 
                                                      'feature_pool_region_padding')
options.feature_pool_size = config.getint('shape_predictor_training_options', 
                                          'feature_pool_size')
options.lambda_param = config.getfloat('shape_predictor_training_options', 
                                       'lambda_param')
options.nu = config.getfloat('shape_predictor_training_options', 'nu')
options.num_test_splits = config.getint('shape_predictor_training_options', 
                                        'num_test_splits')
options.num_trees_per_cascade_level = config.getint('shape_predictor_training_options', 
                                                    'num_trees_per_cascade_level')
options.oversampling_amount = config.getint('shape_predictor_training_options', 
                                            'oversampling_amount')
options.tree_depth = config.getint('shape_predictor_training_options', 'tree_depth')
shape_predictor = dlib.train_shape_predictor(shape_predictor_images, 
                                             shape_predictor_groundtruth, 
                                             options)
shape_predictor_model_path = os.path.realpath(os.path.join('./models', 'zeno_face_tracker.model'))
shape_predictor.save(shape_predictor_model_path)
print('Shape predictor model has been trained and saved to: ' + shape_predictor_model_path)

## Step 5: Test the detector model on live video

In [29]:
face_detector_model_path = os.path.realpath(os.path.join(
    './models', 'zeno_face_detector.model'))
dlib_face_deector_model_path = os.path.realpath(os.path.join(
    './models', 'dlib_face_detector.model'))
face_detector = dlib.simple_object_detector(face_detector_model_path)
webcam = cv2.VideoCapture(0)
if webcam.isOpened():
    print('Webcam #0 opened.')
    while True:
        _, frame = webcam.read()
        if frame.ndim == 3 and frame.shape[2] == 3:
            face_detection_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        else:
            face_detection_frame = frame
        face_boxes = face_detector(face_detection_frame)
        for face_box in face_boxes:
            cv2.rectangle(frame, (face_box.left(), face_box.top()), 
                         (face_box.right(), face_box.bottom()), 
                         color=(0, 0, 255), thickness=2, lineType=cv2.LINE_AA)
        cv2.imshow('Webcam #0', frame)
        key = cv2.waitKey(1)
        if key == ord('q') or key == ord('Q'):
            break
    webcam.release()
    cv2.destroyAllWindows()
else:
    print('Failed to open webcam #0.')

Webcam #0 opened.


In [3]:
options = dlib.simple_object_detector_training_options()

In [4]:
options

simple_object_detector_training_options(be_verbose=0, add_left_right_image_flips=0, num_threads=4, detection_window_size=6400, C=1, epsilon=0.01, max_runtime_seconds=3.1536e+09, upsample_limit=2, nuclear_norm_regularization_strength=0)

In [5]:
options = dlib.shape_predictor_training_options()

In [6]:
options

shape_predictor_training_options(be_verbose=0, cascade_depth=10, tree_depth=4, num_trees_per_cascade_level=500, nu=0.1, oversampling_amount=20, oversampling_translation_jitter=0, feature_pool_size=400, lambda_param=0.1, num_test_splits=20, feature_pool_region_padding=0, random_seed=, num_threads=0, landmark_relative_padding_mode=1)

In [14]:
options

simple_object_detector_training_options(be_verbose=1, add_left_right_image_flips=1, num_threads=6, detection_window_size=6400, C=5, epsilon=0.01, max_runtime_seconds=3.1536e+09, upsample_limit=2, nuclear_norm_regularization_strength=0)

In [16]:
dlib.fhog_object_detector.run_multiple([face_detector, default_face_detector], face_detection_frame)

(rectangles[[(209, 209) (424, 424)]], [2.1619599499238844], [1])

In [60]:
print(dlib.test_simple_object_detector(face_detection_validation_images, 
                                 face_detection_validation_groundtruth, face_detector))

precision: 1, recall: 0.9725, average precision: 0.9725


In [57]:
face_detection_groundtruth[0]

[rectangle(261,128,410,277)]

In [3]:
webcam.release()
cv2.destroyAllWindows()

In [4]:
face_boxes

rectangles[[(305, 281) (520, 496)]]

In [19]:
face_detector_model_path = os.path.realpath(os.path.join(
    './models', 'zeno_face_detector.model'))
face_detector = dlib.simple_object_detector([dlib.get_frontal_face_detector()])

RuntimeError: Unable to cast Python instance to C++ type (compile in debug mode for details)

In [7]:
face_detector

<dlib.simple_object_detector at 0x9131228>

In [11]:
face_detector_model_path = os.path.realpath(os.path.join(
    './models', 'zeno_face_detector.model'))
dlib_face_deector_model_path = os.path.realpath(os.path.join(
    './models', 'dlib_face_detector.model'))
face_detector = dlib.simple_object_detector([dlib.simple_object_detector(face_detector_model_path),
                                             dlib.fhog_object_detector(dlib_face_deector_model_path)])

RuntimeError: Unable to cast Python instance to C++ type (compile in debug mode for details)

In [8]:
dlib.__version__

'19.15.0'

In [9]:
dlib_face_deector_model_path

'D:\\hhj\\zeno_face_tracker\\models\\dlib_face_detector.model'

In [13]:
print(dlib.test_simple_object_detector(face_detection_validation_images, 
                                       face_detection_validation_groundtruth, dlib.get_frontal_face_detector()))

precision: 1, recall: 0.845, average precision: 0.845


In [17]:
dlib_face_deector_model_path = os.path.realpath(os.path.join('./models', 'dlib_face_detector.svm'))
dlib.simple_object_detector([dlib.get_frontal_face_detector()]).save(dlib_face_deector_model_path)
print('Dlib face detector model has been saved to: ' + dlib_face_deector_model_path)

RuntimeError: Unable to cast Python instance to C++ type (compile in debug mode for details)

In [21]:
lala = (dlib.fhog_object_detector(dlib_face_deector_model_path))

In [22]:
lala.num_detectors

5

In [23]:
face_detector.num_detectors

1

In [26]:
import pickle
aa = pickle.load(open(dlib_face_deector_model_path))

UnicodeDecodeError: 'gbk' codec can't decode byte 0xe8 in position 17: illegal multibyte sequence