In [None]:
import os
import scipy.io as sio
import numpy as np
import pandas as pd
import csv
from collections import defaultdict

DATASET_PATH = ""
CSV_PATH = ""
SELECTED_FOLDERS = ["AFW", "HELEN", "IBUG", "LFPW"] 
FRONT_THRESH = 15.0
SIDE_THRESH = 30.0

In [None]:
def load_yaw(label_path):
    """
    Read label file (.mat) and extract yaw value
    """
    mat = sio.loadmat(label_path)
    pose = np.array(mat["Pose_Para"]).reshape(-1)
    # index format: [pitch, yaw, roll, tx, ty, tz, scale]

    yaw_rad = float(pose[1])
    yaw_deg = np.degrees(yaw_rad)
    return yaw_deg

In [None]:
def extract_person_id(filename):
    """
    Robust extraction of person ID from 300W-LP filenames.
    Works for AFW, HELEN, LFPW, IBUG.
    """
    name = filename.replace(".mat", "")
    parts = name.split("_")

    # Case 1: AFW_xxx_x_x , HELEN_xxx_x_x
    if parts[0] == "AFW" or parts[0] == "HELEN":
        return parts[1]
    
    # Case 2: IBUG_image_xxx_x_x
    elif parts[0] == "IBUG":
        return parts[2]
    
    # Case 3: LFPW_image_test_xxx_x / LFPW_image_train_xxx_x
    elif parts[0] == "LFPW":
        return parts[3]

In [None]:
# Group images by person id (take filename prefix before first "_")
by_id = defaultdict(list)

for folder in SELECTED_FOLDERS:
    folder_path = os.path.join(DATASET_PATH, folder)
    
    if os.path.exists(folder_path): 
        for file in os.listdir(folder_path):
            if file.endswith(".mat"):
                mat_path = os.path.join(folder_path, file)
                name_noext = file[:-4]  
                img_name = name_noext + ".jpg"

                person_id = extract_person_id(file)

                try:
                    yaw = load_yaw(mat_path)
                except Exception as e:
                    print("Skip", file, "err:", e)
                    continue

                by_id[person_id].append({
                    "mat_path": file,
                    "img_name": img_name,
                    "yaw": yaw
                })

In [None]:
# Choose triplets
triplets = []
for pid, entries in by_id.items():
    fronts = []
    lefts = []
    rights = []

    for e in entries:
        yaw = e["yaw"]
        if abs(yaw) < FRONT_THRESH:
            fronts.append(e)
        if yaw < -SIDE_THRESH:
            lefts.append(e)
        if yaw > SIDE_THRESH:
            rights.append(e)

    if len(fronts) > 0 and len(lefts) > 0 and len(rights) > 0:
        # create all combination
        for f in fronts:
            for l in lefts:
                for r in rights:
                    triplets.append((f["img_name"], l["img_name"], r["img_name"],
                                    f["mat_path"], l["mat_path"], r["mat_path"]))

print(f"Total triplets created: {len(triplets)}")

In [None]:
with open(CSV_PATH, "w", newline="") as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(["front_img", "left_img", "right_img", "front_mat", "left_mat", "right_mat"])

    for t in triplets:
        writer.writerow(t)

print("Saved triplets CSV to", CSV_PATH)