In [None]:
from pathlib import Path
import json

In [None]:
from facenet_pytorch import MTCNN
from PIL import Image
import numpy as np
from tqdm import tqdm

In [None]:
frontal_angle_r_range = range(35, 57)  # Right eye angle range for frontal face
frontal_angle_l_range = range(35, 58)  # Left eye angle range for frontal face

In [None]:
mtcnn = MTCNN(
    image_size=160,
    margin=0,
    min_face_size=20,
    thresholds=[0.6, 0.7, 0.7], # MTCNN thresholds
    factor=0.709,
    post_process=True,
    device='cpu'
)

In [None]:
def angle(a, b, c):
    ba = np.array(a) - np.array(b)
    bc = np.array(c) - np.array(b) 
    
    cosine_angle = np.dot(ba, bc)/(np.linalg.norm(ba)*np.linalg.norm(bc))
    angle = np.arccos(cosine_angle)
    
    return np.degrees(angle)

In [None]:
def pred_face_pose(file_path):
    image = Image.open(file_path) # Reading the image
    
    if image.mode != 'RGB':
        image = image.convert('RGB')
    
    # Detection part producing bounding box, probability of the detected face, and the facial landmarks
    try:
        bbox_, prob_, landmarks_ = mtcnn.detect(image, landmarks=True)
    except RuntimeError as e:
        return [{'error': 'error_runtime', 'error_detail': str(e)}]
    
    if bbox_ is None: 
        return [{'error': 'error_no_face_detected', 'error_detail': 'Bounding box is None, no face detected'}]
    
    faces = []
    for bbox, landmarks, prob in zip(bbox_, landmarks_, prob_):
        # Check if we detect a face in the image
        if bbox is None: 
            faces.append({'error': 'error_no_face_detected', 'error_detail': 'Bounding box is None, no face detected'})
            continue
            
        angle_r = angle(landmarks[0], landmarks[1], landmarks[2])  # Calculate the right eye angle
        angle_l = angle(landmarks[1], landmarks[0], landmarks[2])  # Calculate the left eye angle
    
        if (int(angle_r) in frontal_angle_r_range) and (int(angle_l) in frontal_angle_l_range):
            pred_label = 'frontal'
        else: 
            if angle_r < angle_l:
                pred_label = 'left'
            else:
                pred_label = 'right'
                
        face = {
            'error': None,
            'bbox': bbox.tolist(),
            'landmarks': {
                'left_eye': landmarks[0].tolist(),
                'right_eye': landmarks[1].tolist(),
                'nose': landmarks[2].tolist(),
                'left_mouth': landmarks[3].tolist(),
                'right_mouth': landmarks[4].tolist()
            
            },
            'angle_r': angle_r,
            'angle_l': angle_l,
            'pred_prob': prob,
            'pred_label': pred_label
        }
                
        faces.append(face)
            
    return faces

# Process all images

In [None]:
sample_parent = '../data/wiki_crop/'
sample_output = '../data/wiki_crop_mtcnn_pose_detection/'

In [None]:
(Path(sample_output) / 'json').mkdir(parents=True, exist_ok=True)

datasets = list(Path(sample_parent).iterdir())

total_images = 0
for dataset in datasets:
    total_images += len(list(dataset.glob('**/*.jpg')))
    
progress_bar = tqdm(total=total_images)

for dataset in datasets:
    detection_result = {}
    
    images = list(dataset.glob('**/*.jpg'))
    for image in images:
        faces = pred_face_pose(image)
        detection_result[image.name] = faces
        
        progress_bar.update(1)

    with open(Path(sample_output) / 'json' / f'{dataset.name}.json', 'w', encoding='utf-8') as f:
        json.dump(detection_result, f, ensure_ascii=False, indent=4)