In [11]:
# Imports
import os
import face_recognition
import pandas as pd

In [12]:
# Load images and create encodings
baerbock_image = face_recognition.load_image_file("../data/training/annalena_baerbock.jpg")
baerbock_encoding = face_recognition.face_encodings(baerbock_image)[0]
laschet_image = face_recognition.load_image_file("../data/training/armin_laschet.jpg")
laschet_encoding = face_recognition.face_encodings(laschet_image)[0]
scholz_image = face_recognition.load_image_file("../data/training/olaf_scholz.jpg")
scholz_encoding = face_recognition.face_encodings(scholz_image)[0]

In [13]:
# Get the list of all files and directories
test_data_path = "../data/raw/"
dir_list = os.listdir(test_data_path)

# Try to remove .DS_Store
dir_list.remove('.DS_Store')

# Initialize dictionary of recognized faces
face_dict = {
    'filename':[],
    'is_baerbock':[],
    'is_laschet':[],
    'is_scholz':[],
    'no_matches':[],
    'no_face_found':[],
    'multiple_faces_found':[],
}

# Loop through list of images and create dictionary of recognized faces
for image in dir_list:
    try:
        unknown_image = face_recognition.load_image_file(f"{test_data_path}{image}")
        unknown_encoding = face_recognition.face_encodings(unknown_image)
    except:
        continue
    
    face_dict['filename'].append(image)
    
    if len(unknown_encoding) == 0:
        face_dict['no_face_found'].append(True)
        face_dict['multiple_faces_found'].append(False)
        face_dict['is_baerbock'].append(False)
        face_dict['is_laschet'].append(False)
        face_dict['is_scholz'].append(False)
        face_dict['no_matches'].append(False)
        continue
    elif len(unknown_encoding) > 1:
        face_dict['multiple_faces_found'].append(True)
        face_dict['no_face_found'].append(False)
        face_dict['is_baerbock'].append(False)
        face_dict['is_laschet'].append(False)
        face_dict['is_scholz'].append(False)
        face_dict['no_matches'].append(False)
        continue
    else:
        face_dict['no_face_found'].append(False)
        face_dict['multiple_faces_found'].append(False)
    
    if face_recognition.compare_faces([baerbock_encoding], unknown_encoding[0])[0]:
        face_dict['is_baerbock'].append(True)
        face_dict['is_laschet'].append(False)
        face_dict['is_scholz'].append(False)
        face_dict['no_matches'].append(False)
        continue

    if face_recognition.compare_faces([laschet_encoding], unknown_encoding[0])[0]:
        face_dict['is_laschet'].append(True)
        face_dict['is_baerbock'].append(False)
        face_dict['is_scholz'].append(False)
        face_dict['no_matches'].append(False)
        continue

    if face_recognition.compare_faces([scholz_encoding], unknown_encoding[0])[0]:
        face_dict['is_scholz'].append(True)
        face_dict['is_baerbock'].append(False)
        face_dict['is_laschet'].append(False)
        face_dict['no_matches'].append(False)
        continue
    
    face_dict['no_matches'].append(True)
    face_dict['is_baerbock'].append(False)
    face_dict['is_laschet'].append(False)
    face_dict['is_scholz'].append(False)  

In [16]:
df = pd.DataFrame(face_dict).sort_values(by="filename")
df

Unnamed: 0,filename,is_baerbock,is_laschet,is_scholz,no_matches,no_face_found,multiple_faces_found
215,annalena-baerbock_faz.net_1675006669632091635,False,False,False,True,False,False
274,annalena-baerbock_faz.net_1675006669641636596,False,False,False,True,False,False
14,annalena-baerbock_faz.net_1675006669643408521,False,False,False,True,False,False
385,annalena-baerbock_faz.net_1675006669745051775,False,True,False,False,False,False
331,annalena-baerbock_faz.net_1675006669865329219,True,False,False,False,False,False
...,...,...,...,...,...,...,...
251,olaf-scholz_tagesschau.de_1675006390193810137,False,False,False,False,True,False
377,olaf-scholz_tagesschau.de_1675006390205161204,False,False,False,True,False,False
64,olaf-scholz_tagesschau.de_1675006390225434741,False,False,False,True,False,False
281,olaf-scholz_tagesschau.de_1675006390231079905,False,False,False,True,False,False


In [21]:
df.drop(columns='filename').sum(axis=0)

is_baerbock              73
is_laschet               91
is_scholz                98
no_matches              207
no_face_found            98
multiple_faces_found     54
dtype: int64