In [3]:
from deepface import DeepFace
import cv2
import os
import shutil
from collections import defaultdict
import pandas as pd
import matplotlib.pyplot as plt
from environs import Env
import numpy as np

env = Env()
env.read_env('../.env', recurse=False)

True

### 1. Создадим датафрейм для базы изображений

In [4]:
img_path = '../datasets/facescrub_images/'

In [5]:
def list_images(path):
    """Create default dict with img paths"""
    imgs = defaultdict(list)
    imgs_list = []
    for dir in os.listdir(path):
        full_path = os.path.join(path, dir)
        for img in os.listdir(full_path):
            img_path = os.path.join(full_path, img)         
            if img_path.endswith('.png'):
                imgs[dir].append(img_path)
                imgs_list.append((img_path, dir))
    return imgs, imgs_list

In [6]:
imgs_dict, imgs_list = list_images(img_path)

In [7]:
imgs_df = pd.DataFrame(imgs_list, columns=['img_path', 'name'])
imgs_df.head()

Unnamed: 0,img_path,name
0,../datasets/facescrub_images/Marilu_Henner/Mar...,Marilu_Henner
1,../datasets/facescrub_images/Marilu_Henner/Mar...,Marilu_Henner
2,../datasets/facescrub_images/Marilu_Henner/Mar...,Marilu_Henner
3,../datasets/facescrub_images/Marilu_Henner/Mar...,Marilu_Henner
4,../datasets/facescrub_images/Marilu_Henner/Mar...,Marilu_Henner


In [133]:
imgs_df['name'].value_counts()

name
James_Remar        50
Lindsay_Hartley    49
Kimberlin_Brown    49
Sam_Rockwell       49
Christel_Khalil    49
                   ..
Emily_Deschanel    37
Brad_Garrett       37
Brendan_Fraser     30
Adam_Brody         28
Bill_Cosby         27
Name: count, Length: 80, dtype: int64

In [8]:
from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()
imgs_df['label'] = encoder.fit_transform(imgs_df['name'])
imgs_df.head()

Unnamed: 0,img_path,name,label
0,../datasets/facescrub_images/Marilu_Henner/Mar...,Marilu_Henner,49
1,../datasets/facescrub_images/Marilu_Henner/Mar...,Marilu_Henner,49
2,../datasets/facescrub_images/Marilu_Henner/Mar...,Marilu_Henner,49
3,../datasets/facescrub_images/Marilu_Henner/Mar...,Marilu_Henner,49
4,../datasets/facescrub_images/Marilu_Henner/Mar...,Marilu_Henner,49


### 2. Извлечение по несколько фото для каждого объекта для загрузки в базу данных.

In [134]:
IMGS_IN_DB_COUNT = 40

imgs_df['obj_row'] = imgs_df.groupby(['label']).cumcount() + 1
imgs_df.head()

Unnamed: 0,img_path,name,label,obj_row
0,../datasets/facescrub_images/Marilu_Henner/Mar...,Marilu_Henner,49,1
1,../datasets/facescrub_images/Marilu_Henner/Mar...,Marilu_Henner,49,2
2,../datasets/facescrub_images/Marilu_Henner/Mar...,Marilu_Henner,49,3
3,../datasets/facescrub_images/Marilu_Henner/Mar...,Marilu_Henner,49,4
4,../datasets/facescrub_images/Marilu_Henner/Mar...,Marilu_Henner,49,5


In [135]:
imgs_db_df = imgs_df[imgs_df['obj_row'] < IMGS_IN_DB_COUNT]
imgs_test_df = imgs_df[imgs_df['obj_row'] >= IMGS_IN_DB_COUNT]

### 3. Загрузка эмбеддингов изображений в БД

In [136]:
import psycopg2 as pg

conn=pg.connect(
    dbname='maska',
      user='postgres',
        password= env.str('PSQL_PASSWORD'),
          host='localhost',
          port='5432' )
cursor = conn.cursor()

Создадим таблицу в БД

In [137]:
RECOGNITION_MODEL = 'ArcFace'
DETECT_MODEL = 'opencv'

In [138]:
cursor.execute(f'DROP TABLE IF EXISTS faces_{RECOGNITION_MODEL}')
cursor.execute(f'CREATE TABLE IF NOT EXISTS faces_{RECOGNITION_MODEL} (id bigserial PRIMARY KEY, name varchar(30), label int4, row int4, embedding vector(512))')
conn.commit()

Сформируем эмбеддинги изображений:

In [146]:
def get_emb_list_from_df(img_df: pd.DataFrame, embedding_model, backend_model) -> list:
    emb_list = []
    errors_list = []
    for path, name, label, row in img_df.values:
        try:
            faces_obj = DeepFace.represent(cv2.imread(path), model_name=embedding_model, detector_backend=backend_model)
            embedding = np.array(faces_obj[0]['embedding'])
            emb_list.append((name, label, row, embedding))
        except:
            print("Can't detect face", name, row)
            errors_list.append((name, path, row))
        
    return emb_list, errors_list

In [140]:
imgs_db_embeddings, errors_list = get_emb_list_from_df(imgs_db_df, embedding_model=RECOGNITION_MODEL, backend_model=DETECT_MODEL)
print(f'На векторизацию отправлено {imgs_db_df.shape[0]} изображений. Изображений, где не обнаружены лица: {len(errors_list)} ')

Can't detect face Brianna_Brown 17
Can't detect face Adrienne_Barbeau 1
Can't detect face Adrienne_Barbeau 4
Can't detect face Adrienne_Barbeau 5
Can't detect face Adrienne_Barbeau 7
Can't detect face Adrienne_Barbeau 10
Can't detect face Adrienne_Barbeau 12
Can't detect face Adrienne_Barbeau 17
Can't detect face Adrienne_Barbeau 18
Can't detect face Adrienne_Barbeau 23
Can't detect face Adrienne_Barbeau 30
Can't detect face Adrienne_Barbeau 32
Can't detect face Adrienne_Barbeau 37
Can't detect face J.K._Simmons 1
Can't detect face J.K._Simmons 20
Can't detect face J.K._Simmons 21
Can't detect face J.K._Simmons 23
Can't detect face J.K._Simmons 28
Can't detect face J.K._Simmons 29
Can't detect face J.K._Simmons 30
Can't detect face Geoffrey_Rush 3
Can't detect face Geoffrey_Rush 7
Can't detect face Geoffrey_Rush 9
Can't detect face Geoffrey_Rush 11
Can't detect face Geoffrey_Rush 14
Can't detect face Geoffrey_Rush 17
Can't detect face Geoffrey_Rush 18
Can't detect face Geoffrey_Rush 21

Количество изображений, которые не попали в БД по каждому объекту:

In [147]:
from collections import Counter

names = [name for name, path, row in errors_list]

counts = Counter(names)
counts
    

ValueError: too many values to unpack (expected 3)

Запись значений в БД

In [142]:
for name, label, row, embedding in imgs_db_embeddings:
    embedding = list(embedding)
    cursor.execute(f'INSERT INTO faces_{RECOGNITION_MODEL} (name, label, row, embedding) VALUES (%s, %s, %s, %s)', (name, label, row, embedding))
    conn.commit()

## Предсказываем людей по тестовым фото, сравнивая фото с изображениями в БД

Сохраним эмбеддинги тестовых изображений:

In [148]:
imgs_test_embeddings, errors_test_list = get_emb_list_from_df(imgs_test_df, embedding_model=RECOGNITION_MODEL, backend_model=DETECT_MODEL)
print(f'На векторизацию отправлено {imgs_test_df.shape[0]} изображений. Изображений, где не обнаружены лица: {len(errors_test_list)} ')

Can't detect face Marilu_Henner 40
Can't detect face Adrienne_Barbeau 45
Can't detect face Geoffrey_Rush 41
Can't detect face Geoffrey_Rush 45
Can't detect face Melina_Kanakaredes 44
Can't detect face Chris_Evans 42
Can't detect face Karl_Urban 42
Can't detect face Robert_Knepper 44
Can't detect face Patrick_Swayze 41
Can't detect face Paul_Bettany 42
Can't detect face Jackie_Chan 41
Can't detect face Jackie_Chan 47
Can't detect face Bobbie_Eakes 42
Can't detect face Heath_Ledger 40
Can't detect face Heath_Ledger 43
Can't detect face Heath_Ledger 45
Can't detect face Didi_Conn 42
Can't detect face James_Remar 44
Can't detect face James_Remar 48
Can't detect face Shia_LaBeouf 44
Can't detect face Joanna_Kerns 42
Can't detect face Joanna_Kerns 43
Can't detect face Ilene_Kristen 43
Can't detect face Christian_Bale 41
Can't detect face Robert_Redford 41
Can't detect face Robert_Redford 42
Can't detect face Christine_Lakin 42
Can't detect face James_Marsden 42
Can't detect face James_Marsde

Количество изображений из тестовой выборки, где не удалось обнаружить лицо

In [149]:
imgs_test_df.shape[0] - len(imgs_test_embeddings)

32

Найдем в БД ближайшего человека

In [150]:
predictions = []
for name, label, row, embedding in imgs_test_embeddings:
    embedding = list(embedding)
    cursor.execute(f'SELECT * FROM faces_{RECOGNITION_MODEL} ORDER BY embedding <=> %s::vector LIMIT 1', (embedding,))
    predictions.append(cursor.fetchone())
len(predictions)

418

In [151]:
predictions_labels_names = [(label, name) for id, name, label, row, embedding in predictions]
predictions_labels_names[0]

(49, 'Marilu_Henner')

In [152]:
final_df = pd.concat((pd.DataFrame([(name, label) for name, label, row, emb in imgs_test_embeddings], columns=['name', 'label']), pd.DataFrame(predictions_labels_names, columns=['label_pred', 'name_pred'])), axis=1)

In [153]:
final_df.head()

Unnamed: 0,name,label,label_pred,name_pred
0,Marilu_Henner,49,49,Marilu_Henner
1,Marilu_Henner,49,49,Marilu_Henner
2,Marilu_Henner,49,49,Marilu_Henner
3,Marilu_Henner,49,49,Marilu_Henner
4,Marilu_Henner,49,49,Marilu_Henner


### Измерим качество предсказания

In [154]:
from sklearn.metrics imporst accuracy_score, precision_score, recall_score, f1_score


actuals = final_df['label'].tolist()
predictions = final_df['label_pred'].tolist()
accuracy = 100*accuracy_score(actuals, predictions)
precision = 100*precision_score(actuals, predictions, average='weighted')
recall = 100*recall_score(actuals, predictions, average='weighted')
f1 = 100*f1_score(actuals, predictions, average='weighted')
print(f'Accuracy_DetectModel:{DETECT_MODEL}, RecModel: {RECOGNITION_MODEL}:, images in DB: {IMGS_IN_DB_COUNT} ', accuracy)
print(f'Precision_DetectModel:{DETECT_MODEL}, RecModel: {RECOGNITION_MODEL}: :', precision)
print(f'recall_DetectModel:{DETECT_MODEL}, RecModel: {RECOGNITION_MODEL}: :', recall)
print(f'f1_DetectModel:{DETECT_MODEL}, RecModel: {RECOGNITION_MODEL}: :', f1)

Accuracy_DetectModel:opencv, RecModel: ArcFace:, images in DB: 40  99.28229665071771
Precision_DetectModel:opencv, RecModel: ArcFace: : 99.58798511430092
recall_DetectModel:opencv, RecModel: ArcFace: : 99.28229665071771
f1_DetectModel:opencv, RecModel: ArcFace: : 99.38024417337672


In [None]:
from typing import List

import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.python.keras.engine import training
from tensorflow.keras.layers import (
    ZeroPadding2D,
    Input,
    Conv2D,
    BatchNormalization,
    PReLU,
    Add,
    Dropout,
    Flatten,
    Dense,
)

In [None]:
def load_model(
    url="https://github.com/serengil/deepface_models/releases/download/v1.0/arcface_weights.h5",
) -> Model:
    """
    Construct ArcFace model, download its weights and load
    Returns:
        model (Model)
    """
    base_model = ResNet34()
    inputs = base_model.inputs[0]
    arcface_model = base_model.outputs[0]
    arcface_model = BatchNormalization(momentum=0.9, epsilon=2e-5)(arcface_model)
    arcface_model = Dropout(0.4)(arcface_model)
    arcface_model = Flatten()(arcface_model)
    arcface_model = Dense(512, activation=None, use_bias=True, kernel_initializer="glorot_normal")(
        arcface_model
    )
    embedding = BatchNormalization(momentum=0.9, epsilon=2e-5, name="embedding", scale=True)(
        arcface_model
    )
    model = Model(inputs, embedding, name=base_model.name)

    # ---------------------------------------
    # check the availability of pre-trained weights

    home = folder_utils.get_deepface_home()

    file_name = "../models/arcface_weights.h5"

    # if os.path.isfile(file_name) != True:

    #     logger.info(f"{file_name} will be downloaded to {file_name}")
    #     gdown.download(url, output, quiet=False)

    # ---------------------------------------

    model.load_weights(file_name)

    return model

In [None]:
def ResNet34() -> Model:
    """
    ResNet34 model
    Returns:
        model (Model)
    """
    img_input = Input(shape=(112, 112, 3))

    x = ZeroPadding2D(padding=1, name="conv1_pad")(img_input)
    x = Conv2D(
        64, 3, strides=1, use_bias=False, kernel_initializer="glorot_normal", name="conv1_conv"
    )(x)
    x = BatchNormalization(axis=3, epsilon=2e-5, momentum=0.9, name="conv1_bn")(x)
    x = PReLU(shared_axes=[1, 2], name="conv1_prelu")(x)
    x = stack_fn(x)

    model = training.Model(img_input, x, name="ResNet34")

    return model

In [None]:
def block1(x, filters, kernel_size=3, stride=1, conv_shortcut=True, name=None):
    bn_axis = 3

    if conv_shortcut:
        shortcut = Conv2D(
            filters,
            1,
            strides=stride,
            use_bias=False,
            kernel_initializer="glorot_normal",
            name=name + "_0_conv",
        )(x)
        shortcut = BatchNormalization(
            axis=bn_axis, epsilon=2e-5, momentum=0.9, name=name + "_0_bn"
        )(shortcut)
    else:
        shortcut = x

    x = BatchNormalization(axis=bn_axis, epsilon=2e-5, momentum=0.9, name=name + "_1_bn")(x)
    x = ZeroPadding2D(padding=1, name=name + "_1_pad")(x)
    x = Conv2D(
        filters,
        3,
        strides=1,
        kernel_initializer="glorot_normal",
        use_bias=False,
        name=name + "_1_conv",
    )(x)
    x = BatchNormalization(axis=bn_axis, epsilon=2e-5, momentum=0.9, name=name + "_2_bn")(x)
    x = PReLU(shared_axes=[1, 2], name=name + "_1_prelu")(x)

    x = ZeroPadding2D(padding=1, name=name + "_2_pad")(x)
    x = Conv2D(
        filters,
        kernel_size,
        strides=stride,
        kernel_initializer="glorot_normal",
        use_bias=False,
        name=name + "_2_conv",
    )(x)
    x = BatchNormalization(axis=bn_axis, epsilon=2e-5, momentum=0.9, name=name + "_3_bn")(x)

    x = Add(name=name + "_add")([shortcut, x])
    return x


def stack1(x, filters, blocks, stride1=2, name=None):
    x = block1(x, filters, stride=stride1, name=name + "_block1")
    for i in range(2, blocks + 1):
        x = block1(x, filters, conv_shortcut=False, name=name + "_block" + str(i))
    return x


def stack_fn(x):
    x = stack1(x, 64, 3, name="conv2")
    x = stack1(x, 128, 4, name="conv3")
    x = stack1(x, 256, 6, name="conv4")
    return stack1(x, 512, 3, name="conv5")

In [None]:
from abc import ABC, abstractmethod
from typing import Any, Union, List, Tuple

class FacialRecognition(ABC):
    model: Union[Model, Any]
    model_name: str
    input_shape: Tuple[int, int]
    output_shape: int


    @abstractmethod
    def find_embeddings(self, img: np.ndarray) -> List[float]:
        pass

In [None]:
class ArcFaceClient(FacialRecognition):
    """
    ArcFace model class
    """

    def __init__(self):
        self.model = load_model()
        self.model_name = "ArcFace"
        self.input_shape = (112, 112)
        self.output_shape = 512

    def find_embeddings(self, img: np.ndarray) -> List[float]:
        """
        find embeddings with ArcFace model
        Args:
            img (np.ndarray): pre-loaded image in BGR
        Returns
            embeddings (list): multi-dimensional vector
        """
        # model.predict causes memory issue when it is called in a for loop
        # embedding = model.predict(img, verbose=0)[0].tolist()
        return self.model(img, training=False).numpy()[0].tolist()

In [None]:
model = tf.keras.models.load_model("../models/arcface_weights.h5")

ValueError: No model config found in the file at <tensorflow.python.platform.gfile.GFile object at 0x7f977f935120>.

Находим в базе наибольшего похожего человека:

In [None]:
actuals = []; predictions = []
for i in range(0, pairs.shape[0]):
    pair = pairs[i]
    img1 = pair[0]
    img2 = pair[1]
    img1 = img1[:,:,::-1]
    img2 = img2[:,:,::-1]
    obj = DeepFace.verify(img1, img2
    , model_name = 'ArcFace', distance_metric = 'euclidean', enforce_detection=False)
    prediction = obj["verified"]
    predictions.append(prediction)

    actual = True if labels[i] == 1 else False
    actuals.append(actual)

KeyboardInterrupt: 

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
accuracy = 100*accuracy_score(actuals, predictions)
precision = 100*precision_score(actuals, predictions)
recall = 100*recall_score(actuals, predictions)
f1 = 100*f1_score(actuals, predictions)
print(precision)


50.0


In [None]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(actuals, predictions)
print(cm)
 
tn, fp, fn, tp = cm.ravel()
print(tn, fp, fn, tp)

[[  0 500]
 [  0 500]]
0 500 0 500
