In [52]:
import os, math
import scipy.io as sio
import numpy as np
import pandas as pd
import csv
from collections import defaultdict

DATASET_PATH = "/kaggle/input/300w-lp/300W_LP"
CSV_PATH = "/kaggle/working/triplets_shorter_AFW.csv"
SELECTED_FOLDERS = ["AFW", "AFW_Flip"]
FRONT_THRESH = 0.15 # radians
SIDE_THRESH = 0.3 # radians

MODEL_SHAPE = "/kaggle/input/multi-view-input/Model_Shape.mat"
MODEL_EXP = "/kaggle/input/multi-view-input/Model_Expression.mat"
DATA_FROM_AUTHOR = "/kaggle/input/multi-view-input/sigma_exp.mat"

In [53]:
model_shape = sio.loadmat(MODEL_SHAPE)
model_exp = sio.loadmat(MODEL_EXP)
data = sio.loadmat(DATA_FROM_AUTHOR)

print(model_shape.keys()) 
print(model_exp.keys())
print(data.keys())

print(model_shape["w"].shape)
print(model_shape['mu_shape'].shape)
print(model_exp['w_exp'].shape)
print(model_shape["sigma"].shape)
print(data["sigma_exp"].shape)
print(model_shape["keypoints"].shape)

shape_std = model_shape["sigma"].flatten()  # (199,) per-dimension
exp_std = 1.0 / (1000.0 * data["sigma_exp"].flatten())  # (29,)

pose_mean = np.array([0, 0, 0, 112, 112, 0, 0]).astype(np.float32)
pose_std = np.array([
    math.pi/2.0, math.pi/2.0, math.pi/2.0,  # rotation
    56, 56, 1,  # translation
    224.0 / (2 * 180000.0)  # scale std ~0.000622
]).astype(np.float32)

print(shape_std)
print(exp_std)
print(pose_mean)
print(pose_std)

kpt_index = model_shape["keypoints"].flatten().astype(np.int32) - 1
print(kpt_index[:10])

dict_keys(['__header__', '__version__', '__globals__', 'keypoints', 'mu_shape', 'segbin', 'segbin_tri', 'sigma', 'symlist', 'symlist_tri', 'tex', 'tri', 'w'])
dict_keys(['__header__', '__version__', '__globals__', 'mu_exp', 'w_exp', 'sigma_exp'])
dict_keys(['__header__', '__version__', '__globals__', 'sigma_exp'])
(159645, 199)
(159645, 1)
(159645, 29)
(199, 1)
(1, 29)
(1, 68)
[884336.25   555884.6    436801.44   313216.6    276257.2    209015.34
 192705.58   181120.11   177736.72   161613.2    152266.27   143272.22
 129612.55   114983.5    106353.03   101484.695   98814.484   95429.88
  89034.36    87535.04    82250.93    76056.766   75095.54    73497.375
  70085.1     69617.87    67851.516   65234.703   60350.746   57751.39
  56826.504   53567.543   51958.      50399.527   49517.945   47552.816
  46419.6     45736.055   43672.63    42845.676   40993.934   39643.086
  37674.47    36911.35    35686.535   35331.3     34741.04    34151.2
  33042.02    32360.275   31601.893   31358.668   

In [54]:
SAMPLE_PATH_1 = "/kaggle/input/300w-lp/300W_LP/landmarks/LFPW/LFPW_image_test_0001_1_pts.mat"
SAMPLE_PATH_FRONT = "/kaggle/input/300w-lp/300W_LP/LFPW/LFPW_image_test_0003_0.mat"
SAMPLE_PATH_RIGHT = "/kaggle/input/300w-lp/300W_LP/LFPW/LFPW_image_test_0003_6.mat"
SAMPLE_PATH_LEFT = "/kaggle/input/300w-lp/300W_LP/LFPW_Flip/LFPW_image_test_0003_5.mat"

# sample = sio.loadmat(SAMPLE_PATH_1)
# print(sample.keys())
# print(sample["pts_2d"].shape)
# print(sample["pts_2d"])

sample_front = sio.loadmat(SAMPLE_PATH_FRONT)
print(sample_front["Pose_Para"].shape)
print(sample_front["Pose_Para"].flatten()[1])
sample_left = sio.loadmat(SAMPLE_PATH_LEFT)
print(sample_left["Pose_Para"].shape)
print(sample_left["Pose_Para"].flatten()[1])
sample_right = sio.loadmat(SAMPLE_PATH_RIGHT)
print(sample_right["Pose_Para"].shape)
print(sample_right["Pose_Para"].flatten()[1])

(1, 7)
0.034392405
(1, 7)
-0.47623187
(1, 7)
0.5579912


In [55]:
def load_yaw(mat_pathh):
    """Load yaw from Pose_Para in the .mat file."""
    mat = sio.loadmat(mat_pathh)
    # index format: [pitch, yaw, roll, tx, ty, tz, scale]
    yaw = mat["Pose_Para"].flatten()[1]  # 2nd value is yaw
    return yaw

In [56]:
def extract_person_id(filename):
    """
    Robust extraction of person ID from 300W-LP filenames.
    Works for AFW, HELEN, LFPW, IBUG.
    """
    name = filename.replace(".mat", "")
    parts = name.split("_")

    # Case 1: AFW_xxx_x_x , HELEN_xxx_x_x
    if parts[0] == "AFW" or parts[0] == "HELEN":
        return parts[1]
    
    # Case 2: IBUG_image_xxx_x_x
    elif parts[0] == "IBUG":
        return parts[2]
    
    # Case 3: LFPW_image_test_xxx_x / LFPW_image_train_xxx_x
    elif parts[0] == "LFPW":
        return parts[3]

In [57]:
def create_triplets(dataset_path, folders):
    by_id = defaultdict(list)
    
    # Step 1: parse all .mat files
    for folder in folders:
        folder_path = os.path.join(dataset_path, folder)
        if not os.path.exists(folder_path):
            continue
        for file in os.listdir(folder_path):
            if not file.endswith(".mat"):
                continue
            mat_path = os.path.join(folder_path, file)
            img_name = file[:-4] + ".jpg"
            person_id = extract_person_id(file)
            try:
                yaw = load_yaw(mat_path)
            except Exception as e:
                print("Skipping", file, "error:", e)
                continue
            by_id[person_id].append({
                "mat_path": file,
                "img_name": img_name,
                "yaw": yaw
            })
    
    # Step 2: create triplets
    triplets = []
    for pid, entries in by_id.items():
        fronts = []
        lefts = []
        rights = []
        
        for e in entries:
            yaw = e["yaw"]
            if yaw <= FRONT_THRESH and yaw >= -FRONT_THRESH:
                fronts.append(e)
            elif yaw < -FRONT_THRESH:
                lefts.append(e)
            elif yaw > FRONT_THRESH:
                rights.append(e)
        
        # create all possible triplets
        for f in fronts:
            for l in lefts:
                for r in rights:
                    triplets.append((f["img_name"], l["img_name"], r["img_name"],
                                     f["mat_path"], l["mat_path"], r["mat_path"]))
    
    print(f"Total triplets created: {len(triplets)}")
    return triplets

In [58]:
triplets = create_triplets(DATASET_PATH, SELECTED_FOLDERS)

Total triplets created: 1954848


In [59]:
with open(CSV_PATH, "w", newline="") as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(["front_img", "left_img", "right_img", "front_mat", "left_mat", "right_mat"])

    for t in triplets:
        writer.writerow(t)

print("Saved triplets CSV to", CSV_PATH)

Saved triplets CSV to /kaggle/working/triplets_shorter_AFW.csv
