In [1]:
import os
import sys
sys.path.append('/home/peeranat_absoroute_io/workspace/asr-face-recognition/src')
import csv
import math
import time
import yaml
import argparse
import logging
import numpy as np
from tqdm.auto import tqdm
import tensorflow as tf
from tensorflow.keras.applications import densenet
import tensorflow.keras.backend as K
from datetime import datetime
from functools import partial
import sklearn

import lfw
import losses
from generators import DataGenerator, TFRecordDataGenerator

tf.__version__

'2.1.0'

In [2]:
n_classes = 180855
embedding_size = 512
initial_epoch = 0
epochs = 1
per_replica_batch_size = 256
steps_per_epoch = 20
n_folds = 10

m1 = 1.
m2 = .5
m3 = 0.
s = 64.

lr_steps = [5]
lr_values = [0.001, 0.001]

data_dir = '/mnt/disks/data/sirius/datasets/asian_and_msra_mtcnnpy_112_margin32'

In [3]:
def evaluate(model, lfw_paths, actual_issame, batch_size, embedding_size, n_folds):
    n_images = len(actual_issame) * 2
    assert len(lfw_paths) == n_images

    embs_array = np.zeros((n_images, embedding_size))
    it = tqdm(range(0, n_images, batch_size), 'evaluate on LFW')
    for start in it:
        end = start + batch_size
        preprocessed = np.array([preprocess(path, training=False).numpy() for path in lfw_paths[start:end]])
        embs_array[start:end] = predict_embedding(model, preprocessed)
        
    _, _, accuracy, val, val_std, far, frr = lfw.evaluate(embs_array, actual_issame, n_folds=n_folds)
    
    print('Accuracy: %1.3f+-%1.3f' % (np.mean(accuracy), np.std(accuracy)))
    print('Validation rate: %2.5f+-%2.5f @ FAR=%2.5f, FRR=%2.5f' % (val, val_std, far, frr))

    return np.mean(accuracy), val, far, frr

def parse_example(proto):
    feature_description = {
        'image/encoded': tf.io.FixedLenFeature([], tf.string),
        'image/label': tf.io.FixedLenFeature([], tf.int64),
        'image/width': tf.io.FixedLenFeature([], tf.int64),
        'image/height': tf.io.FixedLenFeature([], tf.int64)
    }
    tf_example = tf.io.parse_single_example(proto, feature_description)
    return tf_example

def _preprocess(image, training):
    if training:
        image = tf.image.random_flip_left_right(image)
    image = tf.cast(image, tf.float32)
    image -= 127.5
    image *= 0.0078125
    return image

def preprocess_tf_example(example, training=True):
    width = example['image/width']
    height = example['image/height']
    label = tf.cast(example['image/label'], tf.int32)
    image = tf.io.decode_image(example['image/encoded'])
    return _preprocess(image, training=training), label
    

def preprocess(path, training=True):
    raw = tf.io.read_file(path)
    image = tf.image.decode_image(raw)
    return _preprocess(image, training=training)

In [4]:
def arcface_loss(embeddings, labels, weights, n_classes, m1, m2, m3, s, reduction=tf.keras.losses.Reduction.AUTO):
    norm_embeddings = tf.nn.l2_normalize(embeddings, axis=1) * s
    norm_weights = tf.nn.l2_normalize(weights, axis=1)
    fc7 = tf.matmul(norm_embeddings, tf.transpose(norm_weights), name='cos_t')
    indices = tf.stack([tf.range(tf.shape(norm_embeddings)[0])[:, None], labels[:, None]], axis=-1)
    zy = tf.gather_nd(fc7, indices=indices)
    cos_t = zy / s
    cos_t = tf.clip_by_value(cos_t, -1.0+K.epsilon(), 1.0-K.epsilon()) # clip to prevent nan
    theta = tf.acos(cos_t)
    new_zy = (tf.cos(theta*m1 + m2) - m3) * s
    diff = new_zy - zy
    prelogits = fc7 + tf.one_hot(labels, n_classes) * diff
    cce = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction=reduction)
    loss = cce(labels, prelogits)

    return loss

In [5]:
class DistributedTrainer:
    
    def __init__(self, epochs, global_step, model, emb_weights, 
                 batch_size, steps_per_epoch, loss_fn):
        self.epochs = epochs
        self.steps_per_epoch = steps_per_epoch
        self.model = model
        self.emb_weights = emb_weights
        self.batch_size = batch_size
        self.global_step = global_step
        self.loss_fn = loss_fn
        
    def compute_loss(self, inputs, labels, emb_weights):
        per_example_loss = self.loss_fn(inputs, labels, emb_weights)
        return tf.nn.compute_average_loss(per_example_loss, global_batch_size=self.batch_size)
    
    @tf.function
    def train_step(self, inputs, labels):
        print('train_step inputs', inputs.shape)
        with tf.GradientTape(persistent=False) as tape:
            embeddings = self.model(inputs, training=True)
            loss = self.compute_loss(embeddings, labels, self.emb_weights)

        trainable_vars = model.trainable_variables + [emb_weights]
        gradients = tape.gradient(loss, trainable_vars)
        optimizer.apply_gradients(zip(gradients, trainable_vars))

        return loss
    
    def train(self, strategy, train_ds, test_ds=None):
        log_template = 'Epoch: %d[%d/%d]\tStep %d\tTime %.3f\tLoss %2.3f\tlr %.5f'\
        
        for epoch in range(self.epochs):
            
            for step, (inputs, labels) in enumerate(train_ds):
                t1 = time.time()
                per_replica_loss = strategy.experimental_run_v2(self.train_step, args=(inputs, labels,))
                loss = strategy.reduce(tf.distribute.ReduceOp.SUM, per_replica_loss, axis=None)
                elapsed = time.time() - t1

                print(log_template % (epoch+1, step+1, self.steps_per_epoch, self.global_step, 
                                      elapsed, loss.numpy(), 0.001))
                                         
#                 TODO: write to graph
                self.global_step.assign_add(1)
            

In [6]:
class ArcFaceModel(tf.keras.Model):
    
    def __init__(self, backbone, embedding_size):
        super().__init__()
        self.backbone = backbone
        self.bn1 = tf.keras.layers.BatchNormalization()
        # self.dropout = tf.keras.layers.Dropout(0.4)
        self.dense = tf.keras.layers.Dense(embedding_size, use_bias=False)
        self.bn2 = tf.keras.layers.BatchNormalization(scale=False)
        
    def call(self, inputs, training=False):
        x = self.backbone(inputs, training=training)
        x = self.bn1(x, training=training)
        x = self.dense(x)
        x = self.bn2(x, training=training)
        return x

In [7]:
strategy = tf.distribute.MirroredStrategy()
strategy

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1', '/job:localhost/replica:0/task:0/device:GPU:2', '/job:localhost/replica:0/task:0/device:GPU:3')


<tensorflow.python.distribute.mirrored_strategy.MirroredStrategy at 0x7f1f82ad4978>

In [8]:
global_step = tf.Variable(0, name="global_step", dtype=tf.int64, trainable=False)

learning_rate = tf.compat.v1.train.piecewise_constant(global_step, boundaries=lr_steps, values=lr_values, name='lr_schedule')
optimizer = tf.keras.optimizers.Adam(learning_rate)

loss_fn = partial(arcface_loss, n_classes=n_classes, m1=m1, m2=m2, m3=m3, s=s, 
              reduction=tf.keras.losses.Reduction.NONE)


In [10]:
batch_size = per_replica_batch_size * strategy.num_replicas_in_sync
batch_size

1024

In [12]:
train_gen = TFRecordDataGenerator(data_dir, batch_size=batch_size)
train_gen_it = train_gen.generate(example_parser=parse_example, preprocess_fn=preprocess_tf_example)

In [13]:
with strategy.scope():
    initializer = tf.initializers.VarianceScaling()
    emb_weights = tf.Variable(initializer(shape=[n_classes, embedding_size]), 
                            name='embedding_weights', dtype=tf.float32)
    backbone = tf.keras.applications.DenseNet121(weights=None, include_top=False, pooling='avg')
    model = ArcFaceModel(backbone, embedding_size)

    train_dist_dataset = strategy.experimental_distribute_dataset(train_gen_it)
    
    trainer = DistributedTrainer(epochs, global_step, model, emb_weights, 
                             batch_size, steps_per_epoch, loss_fn)
    trainer.train(strategy, train_dist_dataset)

INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Redu

KeyboardInterrupt: 

In [11]:
with strategy.scope():
    initializer = tf.initializers.VarianceScaling()
    emb_weights = tf.Variable(initializer(shape=[n_classes, embedding_size]), 
                            name='embedding_weights', dtype=tf.float32)
    backbone = tf.keras.applications.DenseNet121(weights=None, include_top=False, pooling='avg')
    model = ArcFaceModel(backbone, embedding_size)

    train_dist_dataset = strategy.experimental_distribute_dataset(train_gen_it)
    
    trainer = DistributedTrainer(epochs, global_step, model, emb_weights, 
                             batch_size, steps_per_epoch, loss_fn)
    trainer.train(strategy, train_dist_dataset)

INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Redu

KeyboardInterrupt: 

In [12]:
@tf.function
def predict_embedding(model, images):
    embeddings = model(images, training=False)
    return embeddings

def evaluate(model, lfw_paths, actual_issame, batch_size, embedding_size, n_folds):
    n_images = len(actual_issame) * 2
    assert len(lfw_paths) == n_images

    embs_array = np.zeros((n_images, embedding_size))
    it = tqdm(range(0, n_images, batch_size), 'evaluate on LFW')
    for start in it:
        end = start + batch_size
        preprocessed = np.array([preprocess(path, training=False).numpy() for path in lfw_paths[start:end]])
        embs_array[start:end] = sklearn.preprocessing.normalize(predict_embedding(model, preprocessed))
        
    _, _, accuracy, val, val_std, far, frr = lfw.evaluate(embs_array, actual_issame, n_folds=n_folds)
    
    print('Accuracy: %1.3f+-%1.3f' % (np.mean(accuracy), np.std(accuracy)))
    print('Validation rate: %2.5f+-%2.5f @ FAR=%2.5f, FRR=%2.5f' % (val, val_std, far, frr))

    return np.mean(accuracy), val, far, frr

In [13]:
lfw_dir = '/mnt/disks/data/sirius/datasets/lfw-deepfunneled_mtcnnpy_112_margin32'
lfw_pairs = lfw_dir + '/pairs.txt'

pairs = lfw.read_pairs(lfw_pairs)
lfw_paths, actual_issame = lfw.get_paths(lfw_dir, pairs)

In [18]:
accuracy, val, far, frr = evaluate(model, lfw_paths, actual_issame, batch_size=256, 
                                   embedding_size=embedding_size, n_folds=10)

HBox(children=(FloatProgress(value=0.0, description='evaluate on LFW', max=47.0, style=ProgressStyle(descripti…


Best threshold for fold 0: 0.150000
Best threshold for fold 1: 0.150000
Best threshold for fold 2: 0.150000
Best threshold for fold 3: 0.150000
Best threshold for fold 4: 0.150000
Best threshold for fold 5: 0.150000
Best threshold for fold 6: 0.160000
Best threshold for fold 7: 0.150000
Best threshold for fold 8: 0.150000
Best threshold for fold 9: 0.150000
Accuracy: 0.552+-0.016
Validation rate: 0.15133+-0.02272 @ FAR=0.09533, FRR=0.84867
