In [164]:
import os
from pathlib import Path
from deepface import DeepFace
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from tqdm import tqdm
import tensorflow as tf
from sklearn import preprocessing

In [165]:
from IPython.utils import io

In [4]:
try:
    os.mkdir("dataset")
    os.mkdir("dataset/train")
    os.mkdir("dataset/test")
except FileExistsError:
    pass

In [None]:
TEST_SIZE = 0.333
for dir_ in os.listdir('lfw/'):
    folder = Path('lfw/' + dir_)
    num_photos = len(list(folder.iterdir()))
    
    # Если изображений больше, чем 2 отберем их для train/test выборки
    if num_photos > 2:
        z = 0
        
        # Сколько изображений взять для теста
        n_tests_images = int(num_photos * TEST_SIZE)
        n_tests_images = 1 if n_tests_images == 0 else n_tests_images
        
        for img in os.listdir('lfw/' + dir_):
            src = Path(f'lfw/{dir_}/{img}')
            
            with io.capture_output() as captured:
                try:
                    embedding = np.array(DeepFace.represent(img_path = src)).reshape(1, -1)
                except ValueError:
                    num_photos -= 1
                    if num_photos < 2:
                        continue
            # Распределение файлов в зависимсоти от размера test части
            if z < n_tests_images:
                dest_to_test = Path(f'dataset/test/{img}')
                dest_to_test.write_bytes(src.read_bytes())
                z += 1
            else:
                dest_to_train = Path(f'dataset/train/{img}')
                dest_to_train.write_bytes(src.read_bytes())

## Сохранение массивов в файл

In [92]:
def save_emb2arr(mode):
    X = np.empty((1, 2622))
    y = np.empty(1)
    for filename in tqdm(os.listdir(f'dataset/{mode}')):
        path = f'dataset/{mode}/{filename}'
        try:
            # Перехват вывода исполнения команды в контексте
            with io.capture_output() as captured:
                embedding = np.array(DeepFace.represent(img_path = path)).reshape(1, -1)
        # Если не удалось распознать лицо, то запишем NaN
        except ValueError:
            nan_arr = np.empty((1,2622))
            nan_arr[:] = np.NaN
            X = np.append(X, nan_arr, axis=0)
            y = np.append(y, filename)
            continue

        X = np.append(X, embedding, axis=0)
        y = np.append(y, filename)
    
    with open(f'X_{mode}.npy', 'wb') as f:
        np.save(f, X)
    with open(f'y_{mode}.npy', 'wb') as f:
        np.save(f, y)
    return None

In [93]:
# save_emb2arr(mode='train')
# save_emb2arr(mode='test')

100%|██████████| 5427/5427 [32:34<00:00,  2.78it/s]
100%|██████████| 2179/2179 [13:06<00:00,  2.77it/s]


## Чтение записанных массивов

In [330]:
with open('X_train.npy', 'rb') as f:
    # Объект с 0-ым индексом отбросим, 
    # так как там был np.empty()
    X_train = np.load(f)[1:]
with open('y_train.npy', 'rb') as f:
    y_train = np.load(f)[1:]

with open('X_test.npy', 'rb') as f:
    X_test = np.load(f)[1:]
with open('y_test.npy', 'rb') as f:
    y_test = np.load(f)[1:]

## Уберём лишнее в лейблах и применим Encoder

In [331]:
y_train = np.array([str_[:-9] for str_ in y_train])
y_test = np.array([str_[:-9] for str_ in y_test])

In [332]:
le = preprocessing.LabelEncoder()
le.fit(y_train)
y_train = le.transform(y_train)
y_test = le.transform(y_test)

In [333]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(1024, activation='relu'),
    tf.keras.layers.Dense(1024, activation='relu'),
    tf.keras.layers.Dense(901, activation='softmax')
])

In [334]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(),
              metrics=['accuracy'])

In [335]:
model.fit(X_train, y_train, batch_size=32, epochs=1000)

Epoch 1/1000
Epoch 2/1000

KeyboardInterrupt: 