In [1]:
import sys
import os

# Workaround to make packages work in both Jupyter notebook and Python
MODULE_ROOT_NAME = "AgeEstimator"
MODULE_PATHS = [
    os.path.abspath(os.path.join('..')),
    os.path.abspath(os.path.join('../..')),
    os.path.abspath(os.path.join('../../..'))
]
MODULE_PATHS = list(
    filter(lambda x: x.endswith(MODULE_ROOT_NAME), MODULE_PATHS))
MODULE_PATH = MODULE_PATHS[0] if len(MODULE_PATHS) == 1 else ""
if MODULE_PATH not in sys.path:
    sys.path.append(MODULE_PATH)
    
from server.data.dataset import DataLoader
from server.models.cnn.model import IMAGE_SIZE, INPUT_SHAPE, CURR_DIR

Using TensorFlow backend.
  infer_datetime_format=infer_datetime_format)


In [2]:
import numpy as np
import cv2
import matplotlib.pyplot as plt
from mtcnn.mtcnn import MTCNN
from keras_vggface.utils import preprocess_input
from keras_vggface.vggface import VGGFace
from multiprocessing import cpu_count
from threading import Thread
from PIL import Image
from functools import partial
import queue

In [3]:
# Shared model to create bottleneck features

model = VGGFace(model="resnet50", include_top=False,
                   input_shape=INPUT_SHAPE)

dl = DataLoader()
x_train_all, y_train_all = dl.load_train()
x_test_all, y_test_all   = dl.load_test()

detector = MTCNN()

WORKERS = cpu_count() - 1
WORKERS = 2
THREADS_PER_CORE = 2

In [4]:
def extract_face(filename, input_image=None, img_size=IMAGE_SIZE):
    r"""Extract a single face from a given photograph"""

    img = input_image if input_image else plt.imread(filename)
    
    # Create the detector, using default weights

    detection = detector.detect_faces(img)
    
    assert len(detection) > 0, "Probably sth wrong with this image %s" % filename

    # Extract the bounding box from the first face
    x1, y1, width, height = detection[0]['box']
    x2, y2 = x1 + width, y1 + height
    face = img[y1:y2, x1:x2]
    
    # Resize pixels to the model size
    image = Image.fromarray(face)
    image = image.resize(img_size)
    face_array = np.asarray(image)
    
    return face_array.astype('float32')

In [5]:
def preprocess_image(img):
    print("Processing image...")
    img = extract_face(input_image=img)
    img = np.expand_dims(img, axis=0)
    
    print("Preprocessing input...")
    processed = preprocess_input(img, version=2)
    
    print("Creating bottleneck features...")
    y_hat = model.predict(processed)
    
    print("Done.")
    return y_hat

In [6]:
def checkpoint(x, y, prefix="tmp-", suffix="", worker_id=0):
    np.save(os.path.join(CURR_DIR, "checkpoint",
                         "%sx-worker%d%s" % (prefix, worker_id, suffix)), x)
    np.save(os.path.join(CURR_DIR, "checkpoint",
                         "%sy-worker%d%s" % (prefix, worker_id, suffix)), y)
    
def load_batch_img(x_train, y_train, x_test, y_test, idx=0, checkpoint_limit=4000):
    print("[Worker%d]: Processing training images..." % idx)
    x_train_labels   = []
    x_train_features = []
    x_train_labels_crash = []
    x_train_names_crash  = []
    counter = 0
    for i, fname in enumerate(x_train):
        try:
            feature = extract_face(fname)
        except AssertionError as e:
            print(">>>> [Worker%d]: %s" % (idx, str(e)))
            x_train_labels_crash.append(y_train[i])
            x_train_names_crash.append(fname)
            continue
        except Exception as e:
            print(">>>> [Worker%d]: Exception: %s" % (idx, str(e)))
            print(fname)
            x_train_labels_crash.append(y_train[i])
            x_train_names_crash.append(fname)
            continue
        x_train_labels.append(y_train[i])
        x_train_features.append(feature)
        
        counter += 1
        if counter % checkpoint_limit == 0:
            checkpoint(
                x=x_train_features,
                y=x_train_labels,
                prefix="tmp-train-",
                worker_id=idx,
                suffix="_batch%d" % (counter / checkpoint_limit))
            x_train_features = []
            x_train_labels   = []
    
    # Last batch
    if counter % checkpoint_limit != 0:
        checkpoint(
            x=x_train_features,
            y=x_train_labels,
            prefix="tmp-train-",
            worker_id=idx,
            suffix="_batch%d" % (counter // checkpoint_limit + 1))
        x_train_features = []
        x_train_labels   = []
        
    print("[Worker%d]: Processing test images..." % idx)
    x_test_labels   = []
    x_test_features = []
    x_test_labels_crash = []
    x_test_names_crash  = []
    counter = 0
    for i, fname in enumerate(x_test):
        try:
            feature = extract_face(fname)
        except AssertionError as e:
            print(">>>> [Worker%d]: %s" % (idx, str(e)))
            x_test_labels_crash.append(y_train[i])
            x_test_names_crash.append(fname)
            continue
        except Exception as e:
            print(">>>> [Worker%d]: Exception: %s" % (idx, str(e)))
            print(fname)
            x_test_labels_crash.append(y_train[i])
            x_test_names_crash.append(fname)
            continue
        x_test_labels.append(y_test[i])
        x_test_features.append(feature)
        
        counter += 1
        if counter % checkpoint_limit == 0:
            checkpoint(
                x=x_test_features,
                y=x_test_labels,
                prefix="tmp-test-",
                worker_id=idx,
                suffix="_batch%d" % (counter / checkpoint_limit))
            x_test_features = []
            x_test_labels   = []
    
    # Last batch
    if counter % checkpoint_limit != 0:
        checkpoint(
            x=x_test_features,
            y=x_test_labels,
            prefix="tmp-train-",
            worker_id=idx,
            suffix="_batch%d" % (counter // checkpoint_limit + 1))
        x_test_features = []
        x_test_labels   = []
        
    crashed = (x_train_labels_crash, x_train_names_crash, x_test_labels, x_test_names_crash)
    return crashed
     
    
def preprocess_dataset(idx=0, total_workers=1):
    
    train_limit = len(x_train_all) // total_workers
    test_limit  = len(x_test_all) // total_workers
    
    x_train = x_train_all[idx * train_limit : (idx + 1) * train_limit]
    y_train = y_train_all[idx * train_limit : (idx + 1) * train_limit]
    x_test  = x_test_all[idx * test_limit : (idx + 1) * test_limit]
    y_test  = y_test_all[idx * test_limit : (idx + 1) * test_limit]

    crashed = load_batch_img(x_train, y_train, x_test, y_test, idx)
    
    return crashed

#     print("[Worker%d]: Preprocessing inputs..." % idx)
#     x_train_processed = preprocess_input(x_train_features, version=2)
#     x_test_processed  = preprocess_input(x_test_features, version=2)
    
#     print("[Worker%d]: Creating bottleneck features..." % idx)
#     p_train = model.predict(x_train_processed)
#     p_test  = model.predict(x_test_processed)
    
#     print("[Worker%d]: Saving features..." % idx)
#     np.save(os.path.join(CURR_DIR, "features-train-%d" % idx), p_train)
#     np.save(os.path.join(CURR_DIR, "features-test-%d" % idx), p_test)
#     np.save(os.path.join(CURR_DIR, "labels-train-%d" % idx), x_train_labels)
#     np.save(os.path.join(CURR_DIR, "labels-test-%d" % idx), x_test_labels)
    
#     print("[Worker%d]: Done." % idx)
#     crashed = (x_train_labels_crash, x_train_names_crash, x_test_labels, x_test_names_crash)
#     return p_train, p_test, x_train_labels, x_test_labels, crashed, idx

In [7]:
def preprocess_dataset_parallel():  
    """
    Multithread conflicts MTCNN.
    
    Change its src code (network/factory.py) from:
        from keras.layers import Input, Dense, Conv2D, MaxPooling2D, PReLU, Flatten, Softmax
        from keras.models import Model
    into
        from tensorflow.keras.layers import Input, Dense, Conv2D, MaxPooling2D, PReLU, Flatten, Softmax
        from tensorflow.keras.models import Model
    """

    threads_count = WORKERS * THREADS_PER_CORE
    print("Using %d threads." % threads_count)
    
    q = queue.Queue()  # For putting and getting results of thread
    do_preprocess = lambda idx, total, q: q.put(preprocess_dataset(idx, total))
    threads = [Thread(
        target=do_preprocess,
        args=(idx, threads_count, q)
    ) for idx in range(threads_count)]
    
    for t in threads: t.start()
    for t in threads: t.join()
        
    res = []
    for t in threads: res.append(q.get())
        
    return res

In [None]:
res = preprocess_dataset_parallel()

Using 4 threads.
[Worker0]: Processing training images...
[Worker1]: Processing training images...
[Worker2]: Processing training images...[Worker3]: Processing training images...

>>>> [Worker0]: Exception: list index out of range
/WAVE/users/unix/tchiang/AgeEstimator/server/data/dataset/train/57_Jim_Jarmusch_0013.jpg
>>>> [Worker1]: Exception: list index out of range
/WAVE/users/unix/tchiang/AgeEstimator/server/data/dataset/train/21_Diora_Baird_0011.jpg
>>>> [Worker3]: Exception: list index out of range
/WAVE/users/unix/tchiang/AgeEstimator/server/data/dataset/train/1_utk_21120.jpg
>>>> [Worker2]: Exception: list index out of range
/WAVE/users/unix/tchiang/AgeEstimator/server/data/dataset/train/26_Agnes_Bruckner_0008.jpg
>>>> [Worker1]: Exception: list index out of range
/WAVE/users/unix/tchiang/AgeEstimator/server/data/dataset/train/26_utk_04445.jpg
>>>> [Worker1]: Exception: list index out of range
/WAVE/users/unix/tchiang/AgeEstimator/server/data/dataset/train/1_utk_13246.jpg
>>>>

>>>> [Worker1]: Exception: list index out of range
/WAVE/users/unix/tchiang/AgeEstimator/server/data/dataset/train/55_utk_06738.jpg
>>>> [Worker1]: Exception: list index out of range
/WAVE/users/unix/tchiang/AgeEstimator/server/data/dataset/train/1_utk_22323.jpg
>>>> [Worker1]: Exception: list index out of range
/WAVE/users/unix/tchiang/AgeEstimator/server/data/dataset/train/37_Sanaa_Lathan_0007.jpg
>>>> [Worker3]: Exception: list index out of range
/WAVE/users/unix/tchiang/AgeEstimator/server/data/dataset/train/10_utk_12643.jpg
>>>> [Worker1]: Exception: list index out of range
/WAVE/users/unix/tchiang/AgeEstimator/server/data/dataset/train/32_utk_04901.jpg
>>>> [Worker1]: Exception: list index out of range
/WAVE/users/unix/tchiang/AgeEstimator/server/data/dataset/train/4_utk_11848.jpg
>>>> [Worker0]: Exception: list index out of range
/WAVE/users/unix/tchiang/AgeEstimator/server/data/dataset/train/1_utk_10123.jpg
>>>> [Worker2]: Exception: list index out of range
/WAVE/users/unix/tch

>>>> [Worker3]: Exception: list index out of range
/WAVE/users/unix/tchiang/AgeEstimator/server/data/dataset/train/50_Colm_Feore_0011.jpg
>>>> [Worker0]: Exception: list index out of range
/WAVE/users/unix/tchiang/AgeEstimator/server/data/dataset/train/45_utk_17384.jpg
>>>> [Worker2]: Exception: list index out of range
/WAVE/users/unix/tchiang/AgeEstimator/server/data/dataset/train/36_utk_02964.jpg
>>>> [Worker1]: Exception: list index out of range
/WAVE/users/unix/tchiang/AgeEstimator/server/data/dataset/train/1_utk_10126.jpg
>>>> [Worker0]: Exception: list index out of range
/WAVE/users/unix/tchiang/AgeEstimator/server/data/dataset/train/46_Iain_Glen_0005.jpg
>>>> [Worker2]: Exception: list index out of range
/WAVE/users/unix/tchiang/AgeEstimator/server/data/dataset/train/42_Sheri_Moon_Zombie_0005.jpg
>>>> [Worker1]: Exception: list index out of range
/WAVE/users/unix/tchiang/AgeEstimator/server/data/dataset/train/2_utk_00089.jpg
>>>> [Worker2]: Exception: list index out of range
/WA