In [2]:
import os
from pathlib import Path
from deepface import DeepFace
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from tqdm import tqdm

Directory  C:\Users\yaroslav /.deepface created
Directory  C:\Users\yaroslav /.deepface/weights created


In [3]:
from IPython.utils import io

In [4]:
try:
    os.mkdir("dataset")
    os.mkdir("dataset/train")
    os.mkdir("dataset/test")
except FileExistsError:
    pass

In [None]:
TEST_SIZE = 0.333
persons = []
for dir_ in os.listdir('lfw/'):
    folder = Path('lfw/' + dir_)
    num_photos = len(list(folder.iterdir()))
    
    # Если изображений больше, чем 2 отберем их для train/test выборки
    if num_photos > 2:
        z = 0
        
        # Сколько изображений взять для теста
        n_tests_images = int(num_photos * TEST_SIZE)
        n_tests_images = 1 if n_tests_images==0 else n_tests_images
        
        persons.append(dir_)
        
        for img in os.listdir('lfw/' + dir_):
            src = Path(f'lfw/{dir_}/{img}')
            
            # Распределение файлов в зависимсоти от размера test части
            if z < n_tests_images:
                dest_to_test = Path(f'dataset/test/{img}')
                dest_to_test.write_bytes(src.read_bytes())
                z += 1
            else:
                dest_to_train = Path(f'dataset/train/{img}')
                dest_to_train.write_bytes(src.read_bytes())

## Сохранение массивов в файл

In [75]:
def save_emb2arr(mode='train'):
    X = np.empty((1, 2622))
    y = np.empty(1)
    missing_values = np.empty(1, 2)
    for filename in tqdm(os.listdir('dataset/train')):
        path = f'dataset/train/{filename}'
        try:
            # Перехват вывода исполнения команды в контексте
            with io.capture_output() as captured:
                embedding = np.array(DeepFace.represent(img_path = path)).reshape(1, -1)
        
        # Если не удалось распознать лицо, то запишем NaN
        except ValueError:
            nan_arr = np.empty((1,2622))
            nan_arr[:] = np.nan
            X = np.append(X, nan_arr, axis=0)
            y = np.append(y, filename)
            missing_values = np.append(missing_values, np.array([filename, len(y)]))
            continue

        X = np.append(X, embedding, axis=0)
        y = np.append(y, filename)
    
    with open(f'X_{mode}.npy', 'wb') as f:
        np.save(f, X)
    with open(f'y_{mode}.npy', 'wb') as f:
        np.save(f, y)
    return None

In [None]:
save_emb2arr(mode='train')
save_emb2arr(mode='test')

## Чтение записанных массивов

In [35]:
with open('X_train.npy', 'rb') as f:
    # Объект с 0-ым индексом отбросим, 
    # так как там был np.empty()
    X_train = np.load(f)[1:]
with open('y_train.npy', 'rb') as f:
    y_train = np.load(f)[1:]

with open('X_test.npy', 'rb') as f:
    X_test = np.load(f)[1:]
with open('y_test.npy', 'rb') as f:
    y_test = np.load(f)[1:]

## Уберём лишнее в лейблах

In [36]:
y_train = np.array([str_[:-9] for str_ in y_train])
y_test = np.array([str_[:-9] for str_ in y_test])

## Избавимся от пропущенных значений

## Инициализируем модель

In [53]:
from tensorflow.keras.applications.resnet50 import ResNet50

In [66]:
model = ResNet50(include_top=False, classes=901)

In [67]:
model.summary()

Model: "resnet50"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_2 (InputLayer)           [(None, None, None,  0           []                               
                                 3)]                                                              
                                                                                                  
 conv1_pad (ZeroPadding2D)      (None, None, None,   0           ['input_2[0][0]']                
                                3)                                                                
                                                                                                  
 conv1_conv (Conv2D)            (None, None, None,   9472        ['conv1_pad[0][0]']              
                                64)                                                        

                                                                                                  
 conv2_block2_add (Add)         (None, None, None,   0           ['conv2_block1_out[0][0]',       
                                256)                              'conv2_block2_3_bn[0][0]']      
                                                                                                  
 conv2_block2_out (Activation)  (None, None, None,   0           ['conv2_block2_add[0][0]']       
                                256)                                                              
                                                                                                  
 conv2_block3_1_conv (Conv2D)   (None, None, None,   16448       ['conv2_block2_out[0][0]']       
                                64)                                                               
                                                                                                  
 conv2_blo

                                128)                                                              
                                                                                                  
 conv3_block2_2_bn (BatchNormal  (None, None, None,   512        ['conv3_block2_2_conv[0][0]']    
 ization)                       128)                                                              
                                                                                                  
 conv3_block2_2_relu (Activatio  (None, None, None,   0          ['conv3_block2_2_bn[0][0]']      
 n)                             128)                                                              
                                                                                                  
 conv3_block2_3_conv (Conv2D)   (None, None, None,   66048       ['conv3_block2_2_relu[0][0]']    
                                512)                                                              
          

 conv4_block1_1_bn (BatchNormal  (None, None, None,   1024       ['conv4_block1_1_conv[0][0]']    
 ization)                       256)                                                              
                                                                                                  
 conv4_block1_1_relu (Activatio  (None, None, None,   0          ['conv4_block1_1_bn[0][0]']      
 n)                             256)                                                              
                                                                                                  
 conv4_block1_2_conv (Conv2D)   (None, None, None,   590080      ['conv4_block1_1_relu[0][0]']    
                                256)                                                              
                                                                                                  
 conv4_block1_2_bn (BatchNormal  (None, None, None,   1024       ['conv4_block1_2_conv[0][0]']    
 ization) 

                                                                                                  
 conv4_block3_3_bn (BatchNormal  (None, None, None,   4096       ['conv4_block3_3_conv[0][0]']    
 ization)                       1024)                                                             
                                                                                                  
 conv4_block3_add (Add)         (None, None, None,   0           ['conv4_block2_out[0][0]',       
                                1024)                             'conv4_block3_3_bn[0][0]']      
                                                                                                  
 conv4_block3_out (Activation)  (None, None, None,   0           ['conv4_block3_add[0][0]']       
                                1024)                                                             
                                                                                                  
 conv4_blo

 ization)                       256)                                                              
                                                                                                  
 conv4_block6_2_relu (Activatio  (None, None, None,   0          ['conv4_block6_2_bn[0][0]']      
 n)                             256)                                                              
                                                                                                  
 conv4_block6_3_conv (Conv2D)   (None, None, None,   263168      ['conv4_block6_2_relu[0][0]']    
                                1024)                                                             
                                                                                                  
 conv4_block6_3_bn (BatchNormal  (None, None, None,   4096       ['conv4_block6_3_conv[0][0]']    
 ization)                       1024)                                                             
          

 conv5_block3_1_conv (Conv2D)   (None, None, None,   1049088     ['conv5_block2_out[0][0]']       
                                512)                                                              
                                                                                                  
 conv5_block3_1_bn (BatchNormal  (None, None, None,   2048       ['conv5_block3_1_conv[0][0]']    
 ization)                       512)                                                              
                                                                                                  
 conv5_block3_1_relu (Activatio  (None, None, None,   0          ['conv5_block3_1_bn[0][0]']      
 n)                             512)                                                              
                                                                                                  
 conv5_block3_2_conv (Conv2D)   (None, None, None,   2359808     ['conv5_block3_1_relu[0][0]']    
          