# TensorFlow ArcFace

### Ref https://github.com/peteryuX/arcface-tf2

In [1]:
from insightface.recognition.arcface_torch.dataset import MXFaceDataset, DataLoaderX
import torch.utils.data.distributed
import torch.distributed as dist
from easydict import EasyDict as edict
import time
import os

cfg = edict()
cfg.rec = "/raid/workspace/honghee/faces_emore/"
local_rank = 0
cfg.batch_size = 64

try:
    world_size = int(os.environ['WORLD_SIZE'])
    rank = int(os.environ['RANK'])
    dist_url = "tcp://{}:{}".format(os.environ["MASTER_ADDR"], os.environ["MASTER_PORT"])
except KeyError:
    world_size = 1
    rank = 0
    dist_url = "tcp://127.0.0.1:12584"

os.environ["CUDA_VISIBLE_DEVICES"] = "1"
dist.init_process_group(backend='nccl', init_method=dist_url, rank=rank, world_size=world_size)
torch.cuda.set_device(local_rank)    

A100-SXM4-40GB with CUDA capability sm_80 is not compatible with the current PyTorch installation.
The current PyTorch install supports CUDA capabilities sm_37 sm_50 sm_60 sm_70 sm_75.
If you want to use the A100-SXM4-40GB GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/



In [2]:
!echo $CUDA_VISIBLE_DEVICES

1


In [3]:
train_set = MXFaceDataset(root_dir=cfg.rec, local_rank=local_rank)
train_sampler = torch.utils.data.distributed.DistributedSampler(
    train_set, shuffle=True)
train_loader = DataLoaderX(
    local_rank=local_rank, dataset=train_set, batch_size=cfg.batch_size,
    sampler=train_sampler, num_workers=2, pin_memory=True, drop_last=True)

In [4]:
# for step, (x_batch_train, y_batch_train) in enumerate(train_loader):
#     x_batch_train = x_batch_train.cpu().numpy()
#     y_batch_train = y_batch_train.cpu().numpy()
#     print(y_batch_train)
#     time.sleep(1)

In [5]:
from tensorflow.keras.callbacks import LearningRateScheduler, EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, TensorBoard
from models import ArcFaceModel
# from losses import SoftmaxLoss
from losses import softmax_loss
import dataset
import tensorflow as tf
import os
import logging

tf.get_logger().setLevel(logging.ERROR)

In [6]:
### IJB-C Dataset
# batch_size = 128
# input_size = 112
# embd_shape = 512
# head_type = 'ArcHead'
# backbone_type = 'MobileNetV2'
# w_decay=5e-4
# num_classes = 3584 
# base_lr = 0.01
# dataset_len = 13033 
# epochs = 100
# steps_per_epoch = dataset_len // batch_size

### MS1M dataset
num_classes = 85742 
dataset_len = 5822653
batch_size = 64 # Initially 128
input_size = 112
embd_shape = 512
train_size = int(0.8 * dataset_len)
steps_per_epoch = train_size // batch_size
val_size = dataset_len - train_size
validation_steps = val_size // batch_size

w_decay=5e-4
epochs = 100

save_steps = 1000
steps = 1
is_ccrop=False
binary_img=True

is_Adam = False   # True
projection_head = False  # True
dgx = True

head_type = 'ArcHead' # ''ArcHead', CosHead', 'SphereHead', 'CurHead', 'AdaHead', CadHead'  
# Backbones w/ pretrained weights:
#     MobileNet, MobileNetV2, InceptionResNetV2, InceptionV3, ResNet50, ResNet50V2, ResNet101V2, NASNetLarge, NASNetMobile, Xception, MobileNetV3Large, MobileNetV3Small, EfficientNetLite0~6, EfficientNetB0~7
# Backbones w/o pretrained weights:
#      MnasNetA1, MnasNetB1, MnasNetSmall 
backbone_type = 'ResNet50' 

if head_type == 'SphereHead':
    base_lr = 0.01 
    margin = 1.35
    logist_scale = 30.0 
elif head_type == 'CosHead':
    base_lr = 0.01 
    margin=0.35
    logist_scale=64
elif head_type == 'ArcHead':
    base_lr = 0.1 
    margin=0.5
    logist_scale=64
elif head_type == 'CurHead': 
    base_lr = 0.1 
    margin=0.5
    logist_scale=64
elif head_type == 'CadHead': 
    base_lr = 0.1 
    margin=0.0
    logist_scale=64
elif head_type == 'AdaHead':
    base_lr = 0.1 
    margin=0.0
    logist_scale=64
else:
    base_lr = 0.01 # initially 0.01
    
print(head_type)
print(backbone_type)
print("projection head:", projection_head)
print("Adam:", is_Adam)
print("epoch:", epochs)
print("batch size:", batch_size)

ArcHead
ResNet50
projection head: False
Adam: False
epoch: 100
batch size: 64


In [7]:
model = ArcFaceModel(size=input_size,
                         backbone_type=backbone_type,
                         num_classes=num_classes,
                         margin=margin, 
                         logist_scale=logist_scale,
                         head_type=head_type,
                         embd_shape=embd_shape,
                         w_decay=w_decay,
                         training=True,
                         projection_head=projection_head)
model.summary()

learning_rate = tf.constant(base_lr)
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(learning_rate,
                                                             decay_steps=300000, 
                                               decay_rate=0.1,staircase=True)
if is_Adam:
    optimizer = tf.keras.optimizers.Adam(
        learning_rate=lr_schedule)
else:
#         optimizer = tf.keras.optimizers.SGD(
#             learning_rate=learning_rate, momentum=0.9)
    optimizer = tf.keras.optimizers.SGD(
        learning_rate=lr_schedule, momentum=0.9)

# checkpoint = tf.train.Checkpoint(optimizer=optimizer, model=model)
# manager = tf.train.CheckpointManager(
#     checkpoint, directory="tmp/ckpt", max_to_keep=5)
# status = checkpoint.restore(manager.latest_checkpoint)

# Prepare the metrics.
train_acc_metric = tf.keras.metrics.SparseCategoricalAccuracy()
val_acc_metric = tf.keras.metrics.SparseCategoricalAccuracy()

#     model.compile(optimizer=optimizer, loss=loss_fn)

Model: "arcface_model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_image (InputLayer)        [(None, 112, 112, 3) 0                                            
__________________________________________________________________________________________________
resnet50 (Functional)           (None, 4, 4, 2048)   23587712    input_image[0][0]                
__________________________________________________________________________________________________
OutputLayer (Functional)        (None, 512)          16787968    resnet50[0][0]                   
__________________________________________________________________________________________________
label (InputLayer)              [(None,)]            0                                            
______________________________________________________________________________________

In [8]:
import time
from absl import app, flags, logging
from absl.flags import FLAGS
import cv2
import os

import numpy as np
import tensorflow as tf
from glob import glob

from evaluations import get_val_data, perform_val
from models import ArcFaceModel
from utils import set_memory_growth, load_yaml


lfw, agedb_30, cfp_fp, lfw_issame, agedb_30_issame, cfp_fp_issame = get_val_data("/raid/workspace/honghee/")

loss_fn = softmax_loss
epochs = 25

best_val_acc = 0

for epoch in range(epochs):
    print("======================================================")
    print("================= Start of epoch %d ==================" % (epoch,))
    print("======================================================")
    start_time = time.time()

    # Iterate over the batches of the dataset.
    for step, (x_batch_train, y_batch_train) in enumerate(train_loader):
        x_batch_train = x_batch_train.cpu().numpy()
        y_batch_train = y_batch_train.cpu().numpy()
        with tf.GradientTape() as tape:
            logits = model((x_batch_train, y_batch_train), training=True)
            loss_value = loss_fn(y_batch_train, logits)
        grads = tape.gradient(loss_value, model.trainable_weights)
        optimizer.apply_gradients(zip(grads, model.trainable_weights))

        # Update training metric.
        train_acc_metric.update_state(y_batch_train, logits)

        # Log every 5000 batches.
        if step % 5000 == 0:
            print(
                "Training loss (for one batch) at step %d: %.4f"
                % (step, float(loss_value))
            )
            print("Seen so far: %d samples" % ((step + 1) * batch_size))
            model.save_weights("tmp/ckpt-h5/ckpt-1.h5")
            
            # Run a validation loop at the end of each epoch.
            best_checkpoint = "tmp/ckpt-h5/ckpt-1.h5"
            vf_model = ArcFaceModel(size=input_size,
                                 backbone_type=backbone_type,               
                                 training=False)
            vf_model.load_weights(best_checkpoint, by_name=True, skip_mismatch= True)

            acc_lfw, best_th = perform_val(
                embd_shape, batch_size, vf_model, lfw, lfw_issame,
                is_ccrop=False)
            val_acc = acc_lfw
            print("Validation acc: %.4f" % (float(val_acc),))
            print("Time taken: %.2fs" % (time.time() - start_time))
            if best_val_acc < val_acc:
                best_val_acc = val_acc
                model.save_weights("tmp/ckpt-h5/ckpt-best.h5")               
            
            # Display metrics at the end of each epoch.
            train_acc = train_acc_metric.result()
            print("Training acc over epoch: %.4f" % (float(train_acc),))
            # Reset training metrics at the end of each epoch
            train_acc_metric.reset_states()
    model.save_weights(f'tmp/ckpt-h5/epoch-{epoch}.h5')



ValueError: Input 0 is incompatible with layer arcface_model: expected shape=(None, 112, 112, 3), found shape=(64, 3, 112, 112)