# Face Identification Classifier

### Ref https://github.com/peteryuX/arcface-tf2

In [3]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, TensorBoard
from models import ArcFaceModel
# from losses import SoftmaxLoss
from losses import softmax_loss
import dataset
import tensorflow as tf
import os
import logging

tf.get_logger().setLevel(logging.ERROR)
os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [4]:
!echo $CUDA_VISIBLE_DEVICES

0


In [5]:
### IJB-C Dataset

# batch_size = 128
# input_size = 112
# embd_shape = 512
# head_type = 'ArcHead'
# backbone_type = 'MobileNetV2'
# w_decay=5e-4
# num_classes = 3584 
# base_lr = 0.01
# dataset_len = 13033 
# epochs = 100
# steps_per_epoch = dataset_len // batch_size

### MS1M dataset

batch_size = 128 # Initially 128
input_size = 112
embd_shape = 512
head_type = 'ArcHead'
backbone_type = 'MobileNetV2'
w_decay=5e-4
num_classes = 85742 
dataset_len = 5822653 
base_lr = 0.01 # initially 0.01
epochs = 20
save_steps = 1000
steps_per_epoch = dataset_len // batch_size
steps = 1
is_ccrop=False
binary_img=True
is_Adam = False

### Find latest checkpoint

In [6]:
from glob import glob
base_dir = "/hd/jbpark/models/checkpoints/wo_tfidentity/"
save_name = "e*"
file_list = []
for files in glob(base_dir+save_name):
    file_list.append(files.split('/')[-1].split('l_')[-1])
file_list.sort()

load_file_name = []
for files in glob(base_dir+save_name):
    if file_list[0] == files.split('/')[-1].split('l_')[-1]:
        load_file_name = files
best_checkpoint = load_file_name.split('.data')[0]
best_checkpoint

'/hd/jbpark/models/checkpoints/wo_tfidentity/e_5_l_16.65623664855957.ckpt'

# Training Face Identification Classifier

In [7]:
import numpy as np
import tensorflow as tf
from pathlib import Path
import tensorflow.keras.backend as K
from tensorflow.keras.models import Model, load_model
from layers import ArcMarginPenaltyLogists
from losses import softmax_loss
from models import ArcFaceModel
import os

os.environ["CUDA_VISIBLE_DEVICES"]="0"

weight_file = best_checkpoint

model = ArcFaceModel(size=input_size,
                         backbone_type=backbone_type,
                         training=False)
model.load_weights(weight_file)
model.summary()

Model: "arcface_model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_image (InputLayer)     [(None, 112, 112, 3)]     0         
_________________________________________________________________
mobilenetv2_1.00_224 (Functi (None, 4, 4, 1280)        2257984   
_________________________________________________________________
OutputLayer (Functional)     (None, 512)               10493440  
Total params: 12,751,424
Trainable params: 12,713,728
Non-trainable params: 37,696
_________________________________________________________________


### Extract Embedding Vectors & Create Classifier Training Dataset

In [None]:
import cv2
import numpy as np
from utils import l2_norm
from glob import glob
from tqdm import tqdm

dataset_path = "/hd/jbpark/IJB-C_Asian/Aligned/"

In [32]:
import os 
from tqdm import tqdm
id_list = os.listdir(dataset_path)
id_list.sort()
source_id = []
for id_name in tqdm(id_list):
    source_id.append(int(id_list.index(id_name)))
len(np.unique(np.array(source_id)))

100%|██████████| 3584/3584 [00:00<00:00, 66627.60it/s]


3584

In [43]:
subjects = id_list
label_int = source_id
embed_list = []
label_list = []
for subject in tqdm(subjects):
#     print("[*] Encode {} to Embedding Vector ({})".format(subject,embd_shape))
    img_paths = glob(dataset_path+subject+"/*")
    for img_path in img_paths:
#         print("[*] Encode {} to ./output_embeds.npy".format(img))
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.resize(img, (112,112))
        img = img.astype(np.float32) / 255.
        # print("len(img.shape): "+str(len(img.shape))+ ", img.shape = "+str(img.shape))
        if len(img.shape) == 3:
            img = np.expand_dims(img, 0)

    #     print("len(img.shape): "+str(len(img.shape))+ ", img.shape = "+str(img.shape))
        embeds = l2_norm(model(img, training=False))
    #     embeds.shape
    #     embeds
        embed_list.append(embeds[0].numpy())
        label_list.append(label_int[subjects.index(subject)])
embed_list = np.asarray(embed_list)
label_list = np.asarray(label_list)
# embed_list

100%|██████████| 3584/3584 [05:57<00:00, 10.02it/s]


In [44]:
save_path = "/hd/jbpark/IJB-C_Asian/"
np.save(save_path+'ijbc_embed_vectors.npy', embed_list)
np.save(save_path+'ijbc_labels.npy', label_list)

### Create Face Identification Classifer with Dot product & CMC

##### CMC with Top 1 Value

In [6]:
import numpy as np
import os 
from tqdm import tqdm
from glob import glob
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import cv2
from utils import l2_norm
%matplotlib inline

save_path = "/hd/jbpark/IJB-C_Asian/"
embed_list = np.load(save_path+'ijbc_embed_vectors.npy')
label_list = np.load(save_path+'ijbc_labels.npy')

dataset_path = "/hd/jbpark/IJB-C_Asian/Aligned/"
id_list = os.listdir(dataset_path)
id_list.sort()

unique_labels = np.unique(label_list)

In [58]:
Rank = 5

# org_img_path = 'Test/Kelly Clarkson.jpg'
# org_img_path = 'Test/Baby Margaretha.jpg' # Good
# org_img_path = 'Test/Gareth Bale.jpg' 
# org_img_path = 'Test/Kelly Holmes.jpg'
# org_img_path = 'Test/Daniele Suzuki.jpg'
org_img_path = 'Test/Ayu Ting Ting.jpg'

img = cv2.imread(org_img_path)
convert_img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

img = cv2.resize(convert_img, (112,112))
img = img.astype(np.float32) / 255.

if len(img.shape) == 3:
    img = np.expand_dims(img, 0)

#     print("len(img.shape): "+str(len(img.shape))+ ", img.shape = "+str(img.shape))
embeds = l2_norm(model(img, training=False))


dot_sim_list = []
temp_sim_list= []
for i in unique_labels:
    temp_sim_list= []
    temp_list = np.where(label_list==i)[0]
    for embeding in embed_list[temp_list]:
        dot_sim = np.dot(embeds,embeding)
        temp_sim_list.append(dot_sim)
    dot_sim_list.append(temp_sim_list[np.argmax(temp_sim_list)][0])
est = np.argmax(dot_sim_list)

df = pd.DataFrame(dot_sim_list)
sorted_df = df.sort_values(by=0,ascending=False)
estimated_label = np.asarray(sorted_df[:Rank].index)
id_list = np.array(id_list)
print(id_list[estimated_label])

['Tata Young' 'Kim Tae-hee' 'Ayu Ting Ting' 'Chae Jung-an' 'Nam Bo-ra']


In [31]:
sorted_df[:Rank]

Unnamed: 0,0
3252,0.779108
1829,0.758277
425,0.724424
631,0.715903
2421,0.696663


##### CMC with average 

In [None]:
import numpy as np
import os 
from tqdm import tqdm
from glob import glob
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import cv2
from utils import l2_norm
%matplotlib inline

save_path = "/hd/jbpark/IJB-C_Asian/"
embed_list = np.load(save_path+'ijbc_embed_vectors.npy')
label_list = np.load(save_path+'ijbc_labels.npy')

dataset_path = "/hd/jbpark/IJB-C_Asian/Aligned/"
id_list = os.listdir(dataset_path)
id_list.sort()

unique_labels = np.unique(label_list)

In [64]:
Rank = 5

# org_img_path = 'Test/Kelly Clarkson.jpg'
# org_img_path = 'Test/Baby Margaretha.jpg' # Good
# org_img_path = 'Test/Gareth Bale.jpg' 
# org_img_path = 'Test/Kelly Holmes.jpg'
# org_img_path = 'Test/Daniele Suzuki.jpg'
org_img_path = 'Test/Ayu Ting Ting.jpg'

img = cv2.imread(org_img_path)
convert_img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

img = cv2.resize(convert_img, (112,112))
img = img.astype(np.float32) / 255.

if len(img.shape) == 3:
    img = np.expand_dims(img, 0)

#     print("len(img.shape): "+str(len(img.shape))+ ", img.shape = "+str(img.shape))
embeds = l2_norm(model(img, training=False))


dot_sim_list = []
temp_sim_list= []
for i in unique_labels:
    temp_sim_list= []
    temp_list = np.where(label_list==i)[0]
    for embeding in embed_list[temp_list]:
        dot_sim = np.dot(embeds,embeding)
        temp_sim_list.append(dot_sim)
    dot_sim_list.append(np.average(temp_sim_list,axis=0))
est = np.argmax(dot_sim_list)

df = pd.DataFrame(dot_sim_list)
sorted_df = df.sort_values(by=0,ascending=False)
estimated_label = np.asarray(sorted_df[:Rank].index)
id_list = np.array(id_list)
print(id_list[estimated_label])

['Nam Bo-ra' 'Siti Nurhaliza' 'Nguyễn Thị Kim Ngân' 'Yuni Shara'
 'Sameera Reddy']


In [52]:
sorted_df[:Rank]

Unnamed: 0,0
1680,0.703205
199,0.653246
575,0.645726
3134,0.633721
1191,0.605781


### Create Face Identification Classifer with Euclidean distance & CMC

##### CMC with Top 1 Value

In [65]:
import numpy as np
import os 
from tqdm import tqdm
from glob import glob
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import cv2
from utils import l2_norm
%matplotlib inline

save_path = "/hd/jbpark/IJB-C_Asian/"
embed_list = np.load(save_path+'ijbc_embed_vectors.npy')
label_list = np.load(save_path+'ijbc_labels.npy')

dataset_path = "/hd/jbpark/IJB-C_Asian/Aligned/"
id_list = os.listdir(dataset_path)
id_list.sort()

unique_labels = np.unique(label_list)

In [97]:
Rank = 5

# org_img_path = 'Test/Kelly Clarkson.jpg'
# org_img_path = 'Test/Baby Margaretha.jpg' # Good
# org_img_path = 'Test/Gareth Bale.jpg' 
# org_img_path = 'Test/Kelly Holmes.jpg'
# org_img_path = 'Test/Daniele Suzuki.jpg'
org_img_path = 'Test/Ayu Ting Ting.jpg'

img = cv2.imread(org_img_path)
convert_img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

img = cv2.resize(convert_img, (112,112))
img = img.astype(np.float32) / 255.

if len(img.shape) == 3:
    img = np.expand_dims(img, 0)

#     print("len(img.shape): "+str(len(img.shape))+ ", img.shape = "+str(img.shape))
embeds = l2_norm(model(img, training=False))


dot_sim_list = []
temp_sim_list= []
for i in unique_labels:
    temp_sim_list= []
    temp_list = np.where(label_list==i)[0]
    for embeding in embed_list[temp_list]:
        temp_sim_list.append(np.linalg.norm(embeds-embeding))
    dot_sim_list.append(temp_sim_list[np.argmax(temp_sim_list)])
est = np.argmax(dot_sim_list)

df = pd.DataFrame(dot_sim_list)
sorted_df = df.sort_values(by=0,ascending=True)
estimated_label = np.asarray(sorted_df[:Rank].index)
id_list = np.array(id_list)
print(id_list[estimated_label])

['Siti Nurhaliza' 'Nguyễn Thị Kim Ngân' 'Yuni Shara' 'Sameera Reddy'
 'Burcu Esmersoy']


In [77]:
sorted_df[:Rank]

Unnamed: 0,0
1680,0.770448
199,0.831066
575,0.841753
3134,0.855896
1191,0.883148


##### CMC with average

In [78]:
import numpy as np
import os 
from tqdm import tqdm
from glob import glob
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import cv2
from utils import l2_norm
%matplotlib inline

save_path = "/hd/jbpark/IJB-C_Asian/"
embed_list = np.load(save_path+'ijbc_embed_vectors.npy')
label_list = np.load(save_path+'ijbc_labels.npy')

dataset_path = "/hd/jbpark/IJB-C_Asian/Aligned/"
id_list = os.listdir(dataset_path)
id_list.sort()

unique_labels = np.unique(label_list)

In [103]:
Rank = 5

# org_img_path = 'Test/Kelly Clarkson.jpg'
# org_img_path = 'Test/Baby Margaretha.jpg' # Good
# org_img_path = 'Test/Gareth Bale.jpg' 
# org_img_path = 'Test/Kelly Holmes.jpg'
# org_img_path = 'Test/Daniele Suzuki.jpg'
org_img_path = 'Test/Ayu Ting Ting.jpg'

img = cv2.imread(org_img_path)
convert_img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

img = cv2.resize(convert_img, (112,112))
img = img.astype(np.float32) / 255.

if len(img.shape) == 3:
    img = np.expand_dims(img, 0)

#     print("len(img.shape): "+str(len(img.shape))+ ", img.shape = "+str(img.shape))
embeds = l2_norm(model(img, training=False))


dot_sim_list = []
temp_sim_list= []
for i in unique_labels:
    temp_sim_list= []
    temp_list = np.where(label_list==i)[0]
    for embeding in embed_list[temp_list]:
        temp_sim_list.append(np.linalg.norm(embeds-embeding))
    dot_sim_list.append(np.average(temp_sim_list,axis=0))
est = np.argmax(dot_sim_list)

df = pd.DataFrame(dot_sim_list)
sorted_df = df.sort_values(by=0,ascending=True)
estimated_label = np.asarray(sorted_df[:Rank].index)
id_list = np.array(id_list)
print(id_list[estimated_label])

['Nam Bo-ra' 'Siti Nurhaliza' 'Nguyễn Thị Kim Ngân' 'Yuni Shara'
 'Sameera Reddy']


In [91]:
sorted_df[:Rank]

Unnamed: 0,0
3109,0.862392
2492,0.870074
3534,0.873376
2963,0.878371
561,0.880157


### Create Face Identification Classifer with Cosine Similarity & CMC

##### CMC with Top 1 Value

In [15]:
import numpy as np
import os 
from tqdm import tqdm
from glob import glob
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import cv2
from utils import l2_norm
import pandas as pd
%matplotlib inline

save_path = "/hd/jbpark/dataset/IJB-C_Asian/"
embed_list = np.load(save_path+'ijbc_embed_vectors.npy')
label_list = np.load(save_path+'ijbc_labels.npy')

dataset_path = "/hd/jbpark/dataset/IJB-C_Asian/Aligned/"
id_list = os.listdir(dataset_path)
id_list.sort()

unique_labels = np.unique(label_list)

In [24]:
Rank = 5

# org_img_path = 'Test/Kelly Clarkson.jpg'
# org_img_path = 'Test/Baby Margaretha.jpg' # Good
# org_img_path = 'Test/Gareth Bale.jpg' 
# org_img_path = 'Test/Kelly Holmes.jpg'
# org_img_path = 'Test/Daniele Suzuki.jpg'
org_img_path = 'Test/Ayu Ting Ting.jpg'

img = cv2.imread(org_img_path)
convert_img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

img = cv2.resize(convert_img, (112,112))
img = img.astype(np.float32) / 255.

if len(img.shape) == 3:
    img = np.expand_dims(img, 0)

#     print("len(img.shape): "+str(len(img.shape))+ ", img.shape = "+str(img.shape))
embeds = l2_norm(model(img, training=False))


dot_sim_list = []
temp_sim_list= []
for i in unique_labels:
    temp_sim_list= []
    temp_list = np.where(label_list==i)[0]
    for embeding in embed_list[temp_list]:
        temp_sim_list.append(np.dot(embeds,embeding)/(np.linalg.norm(embeds)*np.linalg.norm(embeding)))
    dot_sim_list.append(temp_sim_list[np.argmax(temp_sim_list)])
est = np.argmax(dot_sim_list)

df = pd.DataFrame(dot_sim_list)
sorted_df = df.sort_values(by=0,ascending=False)
estimated_label = np.asarray(sorted_df[:Rank].index)
id_list = np.array(id_list)
print(id_list[estimated_label])

['Tata Young' 'Kim Tae-hee' 'Ayu Ting Ting' 'Chae Jung-an' 'Nam Bo-ra']


##### CMC with average

In [17]:
import numpy as np
import os 
from tqdm import tqdm
from glob import glob
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import cv2
from utils import l2_norm
import pandas as pd
%matplotlib inline

save_path = "/hd/jbpark/dataset/IJB-C_Asian/"
embed_list = np.load(save_path+'ijbc_embed_vectors.npy')
label_list = np.load(save_path+'ijbc_labels.npy')

dataset_path = "/hd/jbpark/dataset/IJB-C_Asian/Aligned/"
id_list = os.listdir(dataset_path)
id_list.sort()

unique_labels = np.unique(label_list)

In [27]:
Rank = 5

# org_img_path = 'Test/Kelly Clarkson.jpg'
# org_img_path = 'Test/Baby Margaretha.jpg' # Good
org_img_path = 'Test/Gareth Bale.jpg' 
# org_img_path = 'Test/Kelly Holmes.jpg'
# org_img_path = 'Test/Daniele Suzuki.jpg'
# org_img_path = 'Test/Ayu Ting Ting.jpg'

img = cv2.imread(org_img_path)
convert_img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

img = cv2.resize(convert_img, (112,112))
img = img.astype(np.float32) / 255.

if len(img.shape) == 3:
    img = np.expand_dims(img, 0)

#     print("len(img.shape): "+str(len(img.shape))+ ", img.shape = "+str(img.shape))
embeds = l2_norm(model(img, training=False))


dot_sim_list = []
temp_sim_list= []
for i in unique_labels:
    temp_sim_list= []
    temp_list = np.where(label_list==i)[0]
    for embeding in embed_list[temp_list]:
        temp_sim_list.append(np.dot(embeds,embeding)/(np.linalg.norm(embeds)*np.linalg.norm(embeding)))
    dot_sim_list.append(np.average(temp_sim_list,axis=0))
est = np.argmax(dot_sim_list)

df = pd.DataFrame(dot_sim_list)
sorted_df = df.sort_values(by=0,ascending=False)
estimated_label = np.asarray(sorted_df[:Rank].index)
id_list = np.array(id_list)
print(id_list[estimated_label])

['Pedro Rodríguez' 'Gareth Bale' 'Carl Hester' 'Islam Slimani'
 'Jorge Linares']
