In [1]:
import dlib
import os
import shutil
import cv2
import json
import numpy as np
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt

In [3]:
class LandmarksDataset_original:
    def __init__(self, root_dir):
        self.root_dir = root_dir
        self.image_files = [f for f in os.listdir(root_dir) if f.endswith('.jpg')]
        self.landmarks_files = [f for f in os.listdir(root_dir) if f.endswith('.pts')]
    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir, self.image_files[idx])
        image = cv2.imread(img_name)

        landmarks_name = os.path.join(self.root_dir, self.landmarks_files[idx])
        landmarks = self._load_landmarks(landmarks_name)

        return image, landmarks

    def _load_landmarks(self, file_path):
        with open(file_path, 'r') as f:
            landmarks = []
            for line in f.readlines()[3:-1]:  # skip the first 3 and last lines
                x, y = map(float, line.split())
                landmarks.append([x, y])
            return np.array(landmarks)

def get_landmarks_bbox(landmarks):
    x_min = np.min(landmarks[:, 0])
    x_max = np.max(landmarks[:, 0])
    y_min = np.min(landmarks[:, 1])
    y_max = np.max(landmarks[:, 1])
    return dlib.rectangle(left=int(x_min), top=int(y_min), right=int(x_max), bottom=int(y_max))

def calculate_iou(bbox1, bbox2):
    # Calculate intersection area
    inter_left = max(bbox1.left(), bbox2.left())
    inter_top = max(bbox1.top(), bbox2.top())
    inter_right = min(bbox1.right(), bbox2.right())
    inter_bottom = min(bbox1.bottom(), bbox2.bottom())
    inter_area = max(0, inter_right - inter_left + 1) * max(0, inter_bottom - inter_top + 1)

    # Calculate union area
    bbox1_area = (bbox1.right() - bbox1.left() + 1) * (bbox1.bottom() - bbox1.top() + 1)
    bbox2_area = (bbox2.right() - bbox2.left() + 1) * (bbox2.bottom() - bbox2.top() + 1)
    union_area = bbox1_area + bbox2_area - inter_area

    # Calculate IoU
    iou = inter_area / union_area
    return iou

def preprocess_dataset(dataset, json_name='preprocessed_data.json'):
    data = {}
    detector = dlib.get_frontal_face_detector()
    for idx in tqdm(range(len(dataset))):
        image, landmarks = dataset[idx]
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        faces = detector(gray)

        if not faces:
            continue

        max_iou = 0
        selected_bbox = None
        landmarks_bbox = get_landmarks_bbox(landmarks)

        for face in faces:
            iou = calculate_iou(face, landmarks_bbox)
            if iou > max_iou:
                max_iou = iou
                selected_bbox = face

        if selected_bbox is not None:
            image_name = dataset.image_files[idx]
            data[image_name] = {
                'bbox': [selected_bbox.left(), selected_bbox.top(), selected_bbox.right(), selected_bbox.bottom()],
                'landmarks': landmarks.tolist()
            }

    with open(json_name, 'w') as json_file:
        json.dump(data, json_file, indent=4)

def merge_datasets(folder1, folder2, output_folder):
    # Create the output folder if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Iterate over images and landmarks in the first folder
    for file1 in os.listdir(folder1):
        if file1.endswith('.jpg'):
            basename = os.path.splitext(file1)[0]
            pts_file = os.path.join(folder1, basename + '.pts')
            if os.path.exists(pts_file) and count_landmarks(pts_file) == 68:
                shutil.copy(os.path.join(folder1, file1), os.path.join(output_folder, file1))
                shutil.copy(pts_file, os.path.join(output_folder, basename + '.pts'))

    # Iterate over images and landmarks in the second folder
    for file2 in os.listdir(folder2):
        if file2.endswith('.jpg'):
            basename = os.path.splitext(file2)[0]
            pts_file = os.path.join(folder2, basename + '.pts')
            if os.path.exists(pts_file) and count_landmarks(pts_file) == 68:
                shutil.copy(os.path.join(folder2, file2), os.path.join(output_folder, file2))
                shutil.copy(pts_file, os.path.join(output_folder, basename + '.pts'))

def count_landmarks(pts_file):
    with open(pts_file, 'r') as f:
        num_landmarks = sum(1 for line in f.readlines()[3:-1])
    return num_landmarks

In [4]:
folder1 = './landmarks_task/Menpo/train/'
folder2 = './landmarks_task/300W/train/'
output_folder = 'merged_landmarks_train'
merge_datasets(folder1, folder2, output_folder)

folder1 = './landmarks_task/Menpo/test/'
folder2 = './landmarks_task/300W/test/'
output_folder = 'merged_landmarks_test'
merge_datasets(folder1, folder2, output_folder)

dataset = LandmarksDataset_original('./merged_landmarks_train/')
preprocess_dataset(dataset, json_name='train_data.json')

dataset = LandmarksDataset_original('./merged_landmarks_test/')
preprocess_dataset(dataset, json_name='test_data.json')

  0%|          | 0/8539 [00:00<?, ?it/s]

  0%|          | 0/942 [00:00<?, ?it/s]

In [5]:
class LandmarksDataset_from_json:
    def __init__(self, root_dir, json_file):
        self.root_dir = root_dir
        with open(json_file, 'r') as f:
            self.data = json.load(f)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_name = list(self.data.keys())[idx]
        img_path = os.path.join(self.root_dir, img_name)
        image = cv2.imread(img_path)
        info = self.data[img_name]
        bbox = info['bbox']
        landmarks = np.array(info['landmarks'])
        return image, bbox, landmarks

In [6]:
# def visualize(image, bbox, landmarks, img_path):
#     plt.figure(figsize=(8, 8))
#     plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
#     plt.gca().add_patch(plt.Rectangle((bbox[0], bbox[1]), bbox[2] - bbox[0], bbox[3] - bbox[1], linewidth=2, edgecolor='g', facecolor='none'))
#     for landmark in landmarks:
#         plt.scatter(landmark[0], landmark[1], c='r', s=20)
#     plt.axis('off')
#     plt.title(f"Image Path: {img_path}")
#     plt.show()

# dataset = LandmarksDataset_from_json('./merged_landmarks_test/', 'test_data.json')
# for idx in range(len(dataset)):
#     image, bbox, landmarks = dataset[idx]
#     img_name = list(dataset.data.keys())[idx]
#     img_path = os.path.join('./merged_landmarks_test/', img_name)
#     visualize(image, bbox, landmarks, img_path)


In [7]:
# import dlib
# import cv2
# import matplotlib.pyplot as plt

# def detect_and_visualize(image_path):
#     # Load the image
#     image = cv2.imread(image_path)
    
#     # Convert the image to grayscale
#     gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
#     # Initialize the face detector from dlib
#     detector = dlib.get_frontal_face_detector()
    
#     # Detect faces in the grayscale image
#     faces = detector(gray)
    
#     # Visualize the image and detected faces
#     plt.figure(figsize=(8, 8))
#     plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
#     for face in faces:
#         x, y, w, h = face.left(), face.top(), face.width(), face.height()
#         plt.gca().add_patch(plt.Rectangle((x, y), w, h, linewidth=2, edgecolor='g', facecolor='none'))
#     plt.axis('off')
#     plt.title('Detected Faces')
#     plt.show()

In [8]:
# error_image_path = './merged_landmarks_test/aflw__face_64689.jpg'
# detect_and_visualize(error_image_path)