In [None]:
import cv2
import os
import numpy as np
import insightface
from insightface.app import FaceAnalysis
from PIL import Image, ImageOps

print(insightface.__version__) # 0.7.3
model_pack_name = 'antelopev2'
app = FaceAnalysis(name=model_pack_name, allowed_modules=['detection', 'recognition'], providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
app.prepare(ctx_id=0, det_size=(640, 640))

In [None]:
folder_path = '../photos'
faces = {}

for filename in os.listdir(folder_path):
    name_parts = filename.replace('.jpg', '').split('_')
    show = name_parts[0]  # bbt
    identity = name_parts[1]  # Alex
    number = name_parts[2]  # 0

    if show not in faces:
        faces[show] = {}
    if identity not in faces[show]:
        faces[show][identity] = {}

    image_path = os.path.join(folder_path, filename)
    image = Image.open(image_path).convert("RGB")
    face_info = app.get(cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR))
    if len(face_info) != 1:
        image = ImageOps.expand(image, border=100, fill='black')
        face_info = app.get(cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR))
        assert len(face_info) == 1, f"Show: {show}, Identity: {identity}, Number: {number}"
    
    faces[show][identity][number] = face_info[0]

In [None]:
def sim(x, y):
    return x.dot(y) / np.sqrt(x.dot(x) * y.dot(y))

import h5py

with h5py.File('face_feat.h5', 'w') as hf:
    for show in faces.keys():
        for name, info in faces[show].items():
            embedding = info['0']['embedding'] if '0' in info else info['1']['embedding']
            dataset_name = f"{show}_{name}"
            hf.create_dataset(dataset_name, data=embedding, dtype='float16')

            # if '0' in info and '1' in info:
            #     s = sim(info['0']['embedding'], info['1']['embedding'])
            #     if s < 0.4:
            #         print(show, name, s)
    
    print(hf["castle_Scarlett"][:].shape, hf["castle_Scarlett"][:].dtype)