## Settings

In [None]:
SAVED_MODEL_GDRIVE_ID = '139hpFUxh9toyU0LwDq7mibYr6HjJgh6f'
SAVED_MODEL_FILE_PATH = 'saved_model_infr_fer.h5'
TEST_KAGGLE_GDRIVE_ID = '1bGHeWeWYXj5biL9s-qTc9gyv91WNAbWE'
TEST_KAGGLE_DIRECTORY = 'test_kaggle'

## Downloads

In [None]:
!gdown -O {SAVED_MODEL_FILE_PATH} --id {SAVED_MODEL_GDRIVE_ID}
!gdown -O {TEST_KAGGLE_DIRECTORY}.zip --id {TEST_KAGGLE_GDRIVE_ID}

!unzip -q {TEST_KAGGLE_DIRECTORY}.zip

Downloading...
From: https://drive.google.com/uc?id=139hpFUxh9toyU0LwDq7mibYr6HjJgh6f
To: /content/saved_model_infr_fer.h5
100% 1.19M/1.19M [00:00<00:00, 45.0MB/s]
Downloading...
From: https://drive.google.com/uc?id=1bGHeWeWYXj5biL9s-qTc9gyv91WNAbWE
To: /content/test_kaggle.zip
100% 222M/222M [00:01<00:00, 112MB/s]


In [None]:
from pathlib import Path

assert Path(SAVED_MODEL_FILE_PATH).is_file(), f'"{SAVED_MODEL_FILE_PATH}" file is required'
assert Path(TEST_KAGGLE_DIRECTORY).is_dir(), f'"{TEST_KAGGLE_DIRECTORY}" directory is required'

## Imports

In [None]:
import time
from pathlib import Path

import numpy as np
import cv2

import tensorflow as tf

from tqdm import tqdm

/print tf.__version__

2.7.0


## Definitions

In [None]:
#@title Functions { form-width: "1px" }
#@markdown ```python
#@markdown def read_images(paths, size=None, mode='rgb', return_skipped=False)
#@markdown ```
def read_images(paths, size=None, mode='rgb', return_skipped=False):
    """Given list of image file paths, read image date to uint8 numpy arrays.

    Output image formats:
    * rgb, shape=(h, w, 3), color channels: R, G, B
        * Common image format for color images
    * bgr, shape=(h, w, 3), color channels: B, G, R
        * OpenCV format for color images
    * gray, shape=(h, w), color channels: none
        * Common format (PIL, matplotlib, opencv) for grayscale images
    * gray_1ch, shape=(h, w, 1), color channels: MONO
        * TensorFlow format for grayscale images

    paths : list of str
        List of paths. Note: `cv2.imread()` not working with `pathlib.Path()`,
        str required.
    size : None or tuple(int, int)
        If None -- don't risize images. Else must be a destination size as
        tuple(width: int, height: int).
    mode : {'rgb', 'bgr', 'gray', 'gray_1ch'}
        Output image format.
    return_skipped : bool
        Return a list of paths failed to read (doesn't exist, bad format, no access, etc.).

    Returns:
        Images as uint8 numpy arrays.
        * If size=None: list of non-uniformly sized arrays ("ragged array").
        * If size is not None: batch of images as ndarray, where the first axis (axis=0)
          equals to the number of read images.
        * If mode='gray', each image represented as a 2D array (h, w),
          else as a 3D array (h, w, c).
        * If skipped_files=True: tuple where second element is a list of skipped
          paths.

    Example, without resize (size=None):

    >>> [img.shape for img in read_images(paths[:3], size=None)]
    [(201, 201, 3), (231, 231, 3), (296, 296, 3)]

    Example, with resize:

    >>> imgs = read_images(paths[:2], size=(100, 50))
    >>> imgs.shape
    (2, 50, 100, 3)
    """
    assert mode in ['rgb', 'bgr', 'gray', 'gray_1ch']

    n = len(paths)
    skipped_files = []

    if size is None:
        imgs = [None] * n
    else:
        w, h = size
        if mode == 'gray':
            imgs = np.zeros((n, h, w), dtype=np.uint8)
        elif mode == 'gray_1ch':
            imgs = np.zeros((n, h, w, 1), dtype=np.uint8)
        else:
            imgs = np.zeros((n, h, w, 3), dtype=np.uint8)

    i = 0
    for path in tqdm(paths):
        if mode in ['rgb', 'bgr']:
            img = cv2.imread(path, cv2.IMREAD_COLOR)      # (h, w, 3)
        elif mode in ['gray', 'gray_1ch']:
            img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)  # (h, w)
        if img is None:
            skipped_files.append(path)
            continue
        if mode == 'rgb':
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        if size is not None:
            if sum(img.shape[:2]) > sum(size):
                # Current size > Target size
                img = cv2.resize(img, (w, h), cv2.INTER_AREA)  # downscale
            else:
                img = cv2.resize(img, (w, h), cv2.INTER_LINEAR)  # upscale
        if mode == 'gray_1ch':
            img = img[..., None]  # (h, w) -> (h, w, 1)

        imgs[i] = img
        i += 1
    
    if i < n:
        print(f"{i}/{n} files have been read")
        imgs = imgs[:i]
    
    if return_skipped:
        return (imgs, skipped_files)
    else:
        return imgs

In [None]:
#@title Classes { form-width: "1px" }
#@markdown ```python
#@markdown class ModelFER(saved_model_path)
#@markdown ```
class ModelFER:
    """Model for Facial Expression Recognition
    
    Classifies given image(s) of a face into one of the 9 categories: anger,
    contempt, disgust, fear, happy, neutral, sad, surprise and uncertain.

    Input format:
        * dtype=uint8
        * shape
            * (h, w, c) -- single image input for `predict()`
            * (b, h, w, c) -- (b=batch_size) batch input for `predict_batch()`
        * c=channels
            * c=1 -- for grayscale images
            * c=3 -- for RGB images

    Grayscale images without channel dimention is not supported, i.e. arrays
    with shapes (h, w) or (b, h, w) are not allowed.
    """
    def __init__(self, saved_model_path):
        self.tf_model = tf.keras.models.load_model(saved_model_path)


    @staticmethod
    def preprocess(images):
        """Preprocess batch of images for input.
        
        Valid input:
        1. shape: (b, h, w, {1, 3})
        2. dtype: uint8 or float32
        3. values range: [0, 255]

        Output:
        1. shape: (b, 128, 128, 1)
        2. dtype: uint8
        3. values range: [0, 255]
        """
        # Assert input shape
        n_channels = images.shape[-1]
        if images.ndim == 4 and n_channels in [1, 3]:
            pass
        else:
            raise ValueError(
                f'Expected `images.shape` in [(b, h, w, 3), (b, h, w, 1)],'
                f' got: {images.shape}')

        raw_images = images

        # RGB to grayscale if needed: (b, h, w, 3) -> (b, h, w, 1)
        if n_channels == 3:
            images = tf.image.rgb_to_grayscale(images).numpy()

        # Resize if needed
        (in_h, in_w, in_c) = (128, 128, 1)  # model_input_size -- can be read from `model.input`
        (b, h, w, c) = images.shape  # actual size

        if in_h != h or in_w != w:
            images = tf.image.resize(images, (in_h, in_w)).numpy()  # returns float32
            images = images.astype(np.uint8)                        # float32 -> uint8

        assert (images.shape == (b, in_h, in_w, in_c)
                and images.dtype == np.uint8), 'This function has a bug. Check the code.'
        return images


    @staticmethod
    def decode_preds(preds):
        class_names = np.array(['anger', 'contempt', 'disgust', 'fear', 'happy',
                             'neutral', 'sad', 'surprise', 'uncertain'])
        return class_names[preds.argmax(axis=1)]


    def predict_batch(self, images):
        """Convinience function that: (1) verifies format and prepares images for
            inference, (2) calls `tf_model.predict()`, (3) decodes predictions.

        images : array
            Batch of images in form of 4D uint8 array.
        """
        assert images.dtype == np.uint8, f'got {images.dtype}'

        if images.ndim == 3:
            raise ValueError(
                f'Expected batch input, got: {images.shape}. For single image'
                f' input use `predict()` method.')
        raw_images = images
        images = self.preprocess(raw_images)

        preds = self.tf_model.predict(images)
        classes = self.decode_preds(preds)
        return classes


    def predict(self, image):
        """
        image : array
            Gray or RGB 8bit image in form of 2D or 3D uint8 array.
        """
        assert image.dtype == np.uint8, f'got {image.dtype}'

        if image.ndim == 4:
            raise ValueError(
                f'Expected single image input, got: {image.shape}. For batch'
                f' input use `predict_batch()` method.')
        batch = image[None, ...]
        class_name, = self.predict_batch(batch)
        return class_name

## Read test images

In [None]:
# Create paths list
test_filenames = [f'{i}.jpg' for i in range(5000)]
paths_test = [f'{TEST_KAGGLE_DIRECTORY}/{filename}' for filename in test_filenames]
paths_test[:3]

['test_kaggle/0.jpg', 'test_kaggle/1.jpg', 'test_kaggle/2.jpg']

In [None]:
# Load images in RAM
imgs_test = read_images(paths_test, size=(128, 128), mode='gray_1ch')
imgs_test.shape

100%|██████████| 5000/5000 [00:14<00:00, 347.07it/s]


(5000, 128, 128, 1)

## Load model

In [None]:
model = ModelFER(SAVED_MODEL_FILE_PATH)
model.tf_model.summary()

Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_5 (InputLayer)        [(None, 128, 128, 1)]     0         
                                                                 
 tf.image.grayscale_to_rgb_3  (None, 128, 128, 3)      0         
  (TFOpLambda)                                                   
                                                                 
 tf.cast_3 (TFOpLambda)      (None, 128, 128, 3)       0         
                                                                 
 tf.math.truediv_3 (TFOpLamb  (None, 128, 128, 3)      0         
 da)                                                             
                                                                 
 tf.math.subtract_3 (TFOpLam  (None, 128, 128, 3)      0         
 bda)                                                            
                                                           

## Predict

In [None]:
predicted_classes = model.predict_batch(imgs_test)
len(predicted_classes), predicted_classes

(5000, array(['sad', 'neutral', 'sad', ..., 'neutral', 'sad', 'happy'],
       dtype='<U9'))

## Write submission.csv

In [None]:
# submission.csv
rows = ['image_path,emotion']
rows.extend(map('{},{}'.format, test_filenames, predicted_classes))
csv_content = '\r\n'.join(rows) + '\r\n'

Path('submission.csv').write_text(csv_content, encoding='utf8')

# kaggle score (private/public): 0.46440 / 0.46640

82816

## Measure inference time

In [None]:
def measure_performance(tf_model, batches, how_long_s=None):
    t0 = time.time()
    for frame, batch in enumerate(batches, start=1):
        tf_model(batch, training=False)
        if time.time() - t0 > how_long_s:
            break

    sec = time.time() - t0
    fps = frame / sec
    print(f'{1/fps:.3f}s/img ({fps:.0f}fps)')

In [None]:
# 1-sized batches
dummy_data = np.random.RandomState(0).randint(0, 256, (1000, 1, 128, 128, 1), dtype=np.uint8)
tf_model = model.tf_model

# Warmup
tf_model(dummy_data[0])

<tf.Tensor: shape=(1, 9), dtype=float32, numpy=
array([[-1.0647646 , -2.385496  , -2.0123975 , -0.36399448,  1.6006172 ,
         1.3274012 ,  1.2064764 ,  0.94273823,  0.26824668]],
      dtype=float32)>

### CPU

In [None]:
#@title Hardware report { display-mode: "form" }
def report_hardware():
    split_space = lambda s: list(filter(None, s.split()))

    # CPU
    cpu_info = !cat /proc/cpuinfo
    print('CPU: {}x {} @ cache {}'.format(len(cpu_info)//27, cpu_info[4][13:], cpu_info[8][13:]))

    # RAM
    mem_info = !cat /proc/meminfo
    mem_total_gb = int(split_space(mem_info[0])[1]) / 1024 / 1024
    mem_avail_gb = int(split_space(mem_info[2])[1]) / 1024 / 1024
    print('RAM: {0:.1f} GB total, {1:.1f} GB avail'.format(mem_total_gb, mem_avail_gb))

    # GPU
    from tensorflow.python.client import device_lib
    has_gpu = False
    for device in device_lib.list_local_devices():
        if device.device_type == 'GPU':
            has_gpu = True
            device_specs = dict(item.split(': ') for item in device.physical_device_desc.split(', '))
            print('GPU: {0} (name: "{1}", compute capability: {2}, memory: {3:.1f}GB)'.format(
                device_specs['name'],
                device.name,
                device_specs['compute capability'],
                device.memory_limit / 1024 / 1024 / 1024))
    if not has_gpu:
        print('GPU: none')

    # Disk
    disc_info = !df -h /content
    print('Disk: {1} total, {4} used, {3} avail'.format(*split_space(disc_info[1])))

report_hardware()

CPU: 2x Intel(R) Xeon(R) CPU @ 2.20GHz @ cache 56320 KB
RAM: 12.7 GB total, 11.9 GB avail
GPU: none
Disk: 108G total, 39% used, 67G avail


In [None]:
measure_performance(tf_model, dummy_data, 5)

0.037s/img (27fps)


### GPU

In [None]:
#@title Hardware report { display-mode: "form" }
def report_hardware():
    split_space = lambda s: list(filter(None, s.split()))

    # CPU
    cpu_info = !cat /proc/cpuinfo
    print('CPU: {}x {} @ cache {}'.format(len(cpu_info)//27, cpu_info[4][13:], cpu_info[8][13:]))

    # RAM
    mem_info = !cat /proc/meminfo
    mem_total_gb = int(split_space(mem_info[0])[1]) / 1024 / 1024
    mem_avail_gb = int(split_space(mem_info[2])[1]) / 1024 / 1024
    print('RAM: {0:.1f} GB total, {1:.1f} GB avail'.format(mem_total_gb, mem_avail_gb))

    # GPU
    from tensorflow.python.client import device_lib
    has_gpu = False
    for device in device_lib.list_local_devices():
        if device.device_type == 'GPU':
            has_gpu = True
            device_specs = dict(item.split(': ') for item in device.physical_device_desc.split(', '))
            print('GPU: {0} (name: "{1}", compute capability: {2}, memory: {3:.1f}GB)'.format(
                device_specs['name'],
                device.name,
                device_specs['compute capability'],
                device.memory_limit / 1024 / 1024 / 1024))
    if not has_gpu:
        print('GPU: none')

    # Disk
    disc_info = !df -h /content
    print('Disk: {1} total, {4} used, {3} avail'.format(*split_space(disc_info[1])))

report_hardware()

CPU: 2x Intel(R) Xeon(R) CPU @ 2.30GHz @ cache 46080 KB
RAM: 12.7 GB total, 11.0 GB avail
GPU: Tesla K80 (name: "/device:GPU:0", compute capability: 3.7, memory: 10.5GB)
Disk: 79G total, 55% used, 36G avail


In [None]:
measure_performance(tf_model, dummy_data, 5)

0.042s/img (24fps)
