# Обучение сети для извлечения дескрипторов лица
## Реализация InsightFace

In [1]:
import os
import sys
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras

gpus = tf.config.experimental.list_physical_devices("GPU")
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

In [2]:
keras.mixed_precision.set_global_policy("mixed_float16")

Your GPU may run slowly with dtype policy mixed_float16 because it does not have compute capability of at least 7.0. Your GPU:
  GeForce GTX 1060 6GB, compute capability 6.1
See https://developer.nvidia.com/cuda-gpus for a list of GPUs and their compute capabilities.


In [3]:
# Подготовим наш датасет для обучения
# Данные в реализации Insightface хранятся в формате MXNet Record из которых необходимо извлечь картинки для обучения
def MXnet_record_to_folder(dataset_dir, save_dir=None):
    import os
    import numpy as np
    import mxnet as mx
    from tqdm import tqdm

    if save_dir == None:
        save_dir = (dataset_dir[:-1] if dataset_dir.endswith("/") else dataset_dir) + "_112x112_folders"
    idx_path = os.path.join(dataset_dir, "train.idx")
    bin_path = os.path.join(dataset_dir, "train.rec")

    print("save_dir = %s, idx_path = %s, bin_path = %s" % (save_dir, idx_path, bin_path))
    if os.path.exists(save_dir):
        print("%s already exists." % save_dir)
        return

    imgrec = mx.recordio.MXIndexedRecordIO(idx_path, bin_path, "r")
    rec_header, _ = mx.recordio.unpack(imgrec.read_idx(0))

    for ii in tqdm(range(1, int(rec_header.label[0]))):
        img_info = imgrec.read_idx(ii)
        header, img = mx.recordio.unpack(img_info)
        # img_idx = str(int(np.sum(header.label)))
        img_idx = str(int(header.label if isinstance(header.label, float) else header.label[0]))
        img_save_dir = os.path.join(save_dir, img_idx)
        if not os.path.exists(img_save_dir):
            os.makedirs(img_save_dir)
        with open(os.path.join(img_save_dir, str(ii) + ".jpg"), "wb") as ff:
            ff.write(img)


def MXnet_bin_files_to_tf(test_bins, limit=0):
    import io
    import pickle
    import tensorflow as tf
    from skimage.io import imread

    print("test_bins =", test_bins)
    for test_bin_file in test_bins:
        with open(test_bin_file, "rb") as ff:
            bins, issame_list = pickle.load(ff, encoding="bytes")

        bb = [bytes(ii) for ii in bins[: limit * 2] + bins[-limit * 2 :]]
        print("Saving to %s" % test_bin_file)
        with open(test_bin_file, "wb") as ff:
            pickle.dump([bb, issame_list[:limit] + issame_list[-limit:]], ff)

In [4]:
# Пути к датасету
DATASET = 'ms1m-retinaface-t1'
DATASET_DIR = f'/app/data/{DATASET}'

EVAL_BINS = [DATASET_DIR+'/lfw.bin', DATASET_DIR+'/cfp_fp.bin', DATASET_DIR+'/agedb_30.bin']

# Извлекаем картинки
MXnet_record_to_folder(DATASET_DIR)

# Конвертим bin файлы для работы с TF
MXnet_bin_files_to_tf(EVAL_BINS)

save_dir = /app/data/ms1m-retinaface-t1_112x112_folders, idx_path = /app/data/ms1m-retinaface-t1/train.idx, bin_path = /app/data/ms1m-retinaface-t1/train.rec


MXNetError: Traceback (most recent call last):
  File "../3rdparty/dmlc-core/src/io/local_filesys.cc", line 209
LocalFileSystem: Check failed: allow_null: :Open "/app/data/ms1m-retinaface-t1/train.rec": No such file or directory

In [11]:
from tensorflow import keras
import losses, train, models
import tensorflow_addons as tfa

# ResNet100 MXNet backbone + DepthWise Conv
basic_model = models.buildin_models("r100", dropout=0, emb_shape=512, output_layer="GDC")

data_path = f'/app/data/{DATASET}_112x112_folders'

tt = train.Train(data_path, save_path='keras_resnet100_emore_add5epoch.h5', eval_paths=EVAL_BINS,
                basic_model=None, model='checkpoints/keras_resnet100_emore_add5epoch_basic_agedb_30_epoch_3_0.967333.h5', batch_size=100, random_status=0,
                lr_base=0.1, lr_decay=0.5, lr_decay_steps=16, lr_min=1e-5)

optimizer = tfa.optimizers.SGDW(learning_rate=0.01, momentum=0.9, weight_decay=5e-5)

sch = [
  #{"loss": losses.ArcfaceLoss(scale=16), "epoch": 5, "optimizer": optimizer},
  #{"loss": losses.ArcfaceLoss(scale=32), "epoch": 5, "optimizer": optimizer},
  {"loss": losses.ArcfaceLoss(scale=64), "epoch": 5, "optimizer": optimizer},
]

tt.train(sch, 0)
#tt.train_single_scheduler(loss=losses.ArcfaceLoss(scale=64), epoch=5, optimizer=optimizer, initial_epoch=5)


>>>> Load model from h5 file: checkpoints/keras_resnet100_emore_add5epoch_basic_agedb_30_epoch_3_0.967333.h5...
>>>> L2 regularizer value from basic_model: 0
>>>> Init type by loss function name...
>>>> Train arcface...
>>>> Init softmax dataset...
>>>> reloaded from dataset backup: ms1m-retinaface-t1_112x112_folders_shuffle.npz
>>>> Image length: 5179510, Image class length: 5179510, classes: 93431
>>>> Use specified optimizer: <tensorflow_addons.optimizers.weight_decay_optimizers.SGDW object at 0x7fa22c147fd0>
>>>> Insert weight decay callback...
>>>> Add arcface layer, loss_top_k=1...
>>>> loss_weights: {'arcface': 1}
Epoch 1/5

Learning rate for iter 1 is 0.10000000149011612
Weight decay is 0.0005000000237487257
  246/51795 [..............................] - ETA: 5:46:18 - loss: nan - accuracy: 0.0016    Batch 245: Invalid loss, terminating training



Evaluating lfw: 100%|██████████| 120/120 [00:33<00:00,  3.59it/s]

NAN in embs, not a good one




Evaluating cfp_fp: 100%|██████████| 140/140 [00:38<00:00,  3.60it/s]

NAN in embs, not a good one




Evaluating agedb_30: 100%|██████████| 120/120 [00:33<00:00,  3.61it/s]

NAN in embs, not a good one






Epoch 00001: saving model to checkpoints/keras_resnet100_emore_add5epoch.h5

Continue? ([Y]/n): 
>>>> Train arcface DONE!!! epochs = [0], model.stop_training = True
>>>> My history:
{
  'lr': [0.09903939813375473, 0.09619443863630295, 0.09157442301511765, 0.0853569433093071, 0.09999997913837433],
  'loss': [15.516167640686035, 8.281216621398926, 7.494349002838135, 7.060657024383545, nan],
  'accuracy': [0.6002210378646851, 0.9365062117576599, 0.9491091966629028, 0.9548008441925049, 0.00162601622287184],
  'lfw': [0.996, 0.9956666666666667, 0.9968333333333333, 0.997, 0.0],
  'lfw_thresh': [0.3250987231731415, 0.33404529094696045, 0.3266749680042267, 0.32773804664611816, 0.0],
  'cfp_fp': [0.9505714285714286, 0.9644285714285714, 0.9687142857142857, 0.9708571428571429, 0.0],
  'cfp_fp_thresh': [0.21803595125675201, 0.22251766920089722, 0.21157196164131165, 0.22387471795082092, 0.0],
  'agedb_30': [0.9443333333333334, 0.9616666666666667, 0.9673333333333334, 0.9665, 0.0],
  'agedb_30_thres

# Проверим точность на LFW

In [6]:
import evals
EVALSET_DIR = f'/app/data/faces_emore'
basic_model = keras.models.load_model('checkpoints/keras_resnet100_emore_add5epoch_basic_agedb_30_epoch_3_0.967333.h5', compile=False)
ee = evals.eval_callback(basic_model, f'{EVALSET_DIR}/lfw.bin')
ee.on_epoch_end(0)




Evaluating lfw: 100%|██████████| 94/94 [02:26<00:00,  1.56s/it]


>>>> lfw evaluation max accuracy: 0.996333, thresh: 0.350268, previous max accuracy: 0.000000
>>>> Improved = 0.996333



