In [31]:
# %load eval.py
from argparse import ArgumentParser

from pytorch_lightning import seed_everything, Trainer

from dataset.mpii_face_gaze_dataset import get_dataloaders
from train import Model

import numpy as np
import torch
from utils import calc_angle_error,compute_bias_eq3
from sklearn.linear_model import LinearRegression

In [32]:
def evaluate_with_eq3(model, test_set, device='cuda', s=9, target_pitch=0.0, target_yaw=0.0):
    """
    用 Eq.3 進行校正後測試，模擬 SGTC (Single Gaze Target Calibration)

    :param model: 已訓練好的模型
    :param test_set: 測試資料 (list of dict)
    :param device: 'cuda' or 'cpu'
    :param s: 校正用圖數量 S
    :param target_pitch: 要選的注視點 pitch 值
    :param target_yaw: 要選的注視點 yaw 值
    :return: 平均 angular error
    """

    # Step 1: 從 test_set 選擇 S 張符合 gaze target 條件的樣本當作 calibration set
    calibration_samples = [
        item for item in test_set
        if abs(item['gaze_pitch'] - target_pitch) < 0.1 and abs(item['gaze_yaw'] - target_yaw) < 0.1
    ][:s]

    if len(calibration_samples) < s:
        print(f"⚠️ 校正圖數不足 {s} 張，只找到 {len(calibration_samples)} 張")
        return None

    model = model.to(device)

    calibration_filenames = set(item['file_name'] for item in calibration_samples)
    
    # Step 2: 計算 Eq.3 偏移 b̂
    b_hat = compute_bias_eq3(model, calibration_samples, device)

    # Step 3: 用 b̂ 預測其他 test 圖
    preds = []
    labels = []

    
    model.eval()
    with torch.no_grad():
        for item in test_set:
            # 如果是 calibration sample 就跳過，避免重複
            if item['file_name'] in calibration_filenames:
                continue

            full_face = item['full_face_image'].unsqueeze(0).to(device)
            right_eye = item['right_eye_image'].unsqueeze(0).to(device)
            left_eye = item['left_eye_image'].unsqueeze(0).to(device)

            t_hat = model.get_subject_independent_output(full_face, right_eye, left_eye)
            g_hat = t_hat + b_hat.to(t_hat.device)
            #print("t_hat device:", t_hat.device)
            #print("b_hat device:", b_hat.device)
            #print("model bias device:", model.subject_biases.device)
            #print("g_hat device:", g_hat.device)

            preds.append(g_hat.squeeze(0).cpu())
            labels.append(torch.tensor([
                float(item['gaze_pitch']),
                float(item['gaze_yaw'])
            ], device=device, dtype=torch.float32))

    preds = torch.stack(preds)
    labels = torch.stack(labels)

    # Step 4: 計算 angular error
    mean_angle_error = calc_angle_error(labels.cpu(), preds.cpu()).item()
    print(f'🎯 Mean angular error after Eq.3 calibration(S={s}): {mean_angle_error:.2f}°')
    return mean_angle_error


In [33]:
def simulate_calibration_lr(model, dataset, S=1, device='cuda'):
    """
    使用 Linear Regression 做視線偏移校正
    """
    model = model.to(device)
    model.eval()

    with torch.no_grad():
        all_data = []
        for i in range(len(dataset)):
            item = dataset[i]
            data = {
                'left_eye': item['left_eye_image'].unsqueeze(0).to(device),
                'right_eye': item['right_eye_image'].unsqueeze(0).to(device),
                'face': item['full_face_image'].unsqueeze(0).to(device),
                'label': torch.tensor(
                    [float(item['gaze_pitch']), float(item['gaze_yaw'])],
                    dtype=torch.float32
                ).to(device),
                'person_idx': torch.tensor([[item['person_idx']]]).to(device)
            }
            all_data.append(data)

        # 選 S 張圖作為校正資料
        indices = np.random.choice(len(all_data), size=S, replace=False)
        calibration_set = [all_data[i] for i in indices]
        test_set = [all_data[i] for i in range(len(all_data)) if i not in indices]

        # 擷取 calibration 資料的 pred & label
        preds, labels = [], []
        for d in calibration_set:
            pred = model(
                d['person_idx'], d['face'], d['right_eye'], d['left_eye']
            ).squeeze(0).cpu().numpy()
            label = d['label'].cpu().numpy()
            preds.append(pred)
            labels.append(label)

        # 執行 Linear Regression 擬合 (pred -> label)
        reg = LinearRegression().fit(preds, labels)

        # 測試時套用 regression 校正
        angle_errors = []
        for d in test_set:
            pred = model(
                d['person_idx'], d['face'], d['right_eye'], d['left_eye']
            ).squeeze(0).cpu().numpy()
            corrected = reg.predict([pred])[0]
            angle = calc_angle_error(
                torch.tensor(corrected).unsqueeze(0).to(device),
                d['label'].unsqueeze(0)
            )
            angle_errors.append(angle.item())

        mean_err = np.mean(angle_errors)
        print(f"📐 Linear Calibration (S={S}): {mean_err:.2f} degrees")
        return mean_err


In [34]:
def simulate_calibration(model, dataset, S=1, device='cuda'):
    """
    模擬 S 張圖當校正資料，對剩下資料測試。
    模型 forward 接收格式：FinalModel(person_idx, full_face, right_eye, left_eye)
    """
    model = model.to(device)
    model.eval()
    with torch.no_grad():
        all_data = []
        for i in range(len(dataset)):
            item = dataset[i]
            data = {
                'left_eye': item['left_eye_image'].unsqueeze(0).to(device),
                'right_eye': item['right_eye_image'].unsqueeze(0).to(device),
                'face': item['full_face_image'].unsqueeze(0).to(device),
                'label': torch.tensor(
                    [float(item['gaze_pitch']), float(item['gaze_yaw'])],
                    dtype=torch.float32
                ).to(device),
                'person_idx': torch.tensor([[item['person_idx']]]).to(device)  # shape [1, 1]
            }
            all_data.append(data)

        # Step 1：選 S 張圖當校正資料
        indices = np.random.choice(len(all_data), size=S, replace=False)
        calibration_set = [all_data[i] for i in indices]
        test_set = [all_data[i] for i in range(len(all_data)) if i not in indices]

        # Step 2：計算 bias
        cal_outputs, cal_labels = [], []
        for d in calibration_set:
            pred = model(
                d['person_idx'],
                d['face'],
                d['right_eye'],
                d['left_eye']
            ).squeeze(0)
            cal_outputs.append(pred)
            cal_labels.append(d['label'])

        bias = torch.stack(cal_labels).mean(dim=0) - torch.stack(cal_outputs).mean(dim=0)

        # Step 3：套用 bias 做測試
        angle_errors = []
        for d in test_set:
            pred = model(
                d['person_idx'],
                d['face'],
                d['right_eye'],
                d['left_eye']
            ).squeeze(0) + bias

            angle = calc_angle_error(pred.unsqueeze(0), d['label'].unsqueeze(0))
            angle_errors.append(angle.item())

        mean_err = np.mean(angle_errors)
        print(f"📐 Calibration (S={S}): {mean_err:.2f} degrees")
        return mean_err

In [35]:
if __name__ == '__main__':

    ###非變動區塊
    k = [9,128]
    adjust_slope = False
    ###對應作者表格grid calibration,如果是random就填true
    grid_calibration_samples=False
    batch_size =32
    path_to_data = './data/mpiifacegaze_preprocessed'
    
    
    ####變動區塊
    path_to_checkpoints = './saved_models/p00/p00_best-v28.ckpt'
    person_idx = 1
    validate_on_person = 0 
    
    seed_everything(42)

    print(f"{path_to_checkpoints}")
    
    model = Model.load_from_checkpoint(path_to_checkpoints, k=[9, 128], adjust_slope=False, grid_calibration_samples=False)

    trainer = Trainer(
        gpus=1,
        benchmark=True,
    )

    _, _, test_dataloader = get_dataloaders(
        path_to_data, 
        validate_on_person, 
        person_idx, 
        batch_size   
    )
     
    trainer.test(model, test_dataloader)

    ####################拿出 test set（單人完整資料）
    test_dataset = test_dataloader.dataset
    #######################

    evaluate_with_eq3(model, test_dataset, s=1, target_pitch=0.0, target_yaw=0.0)
    evaluate_with_eq3(model, test_dataset, s=5, target_pitch=0.0, target_yaw=0.0)
    evaluate_with_eq3(model, test_dataset, s=9, target_pitch=0.0, target_yaw=0.0)
    evaluate_with_eq3(model, test_dataset, s=16, target_pitch=0.0, target_yaw=0.0)


Global seed set to 42


./saved_models/p00/p00_best-v28.ckpt


  rank_zero_warn(
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


train on persons [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
valid on person 0
test on person 1
len(dataset_train) 60784
len(dataset_valid) 2927


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


len(dataset_test) 2904
Testing DataLoader 0: 100%|██████████| 181/181 [00:26<00:00,  6.90it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
             Test metric                         DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
   test/offset(k=0)/angular_error             3.6843433380126953
        test/offset(k=0)/loss                0.002673023846000433
test/offset(k=128)/mean_angular_error         2.7848633340120315
test/offset(k=128)/std_angular_error         0.029947001860490575
 test/offset(k=9)/mean_angular_error           2.940035085964203
 test/offset(k=9)/std_angular_error           0.18966737049682192
  test/offset(k=all)/angular_error             2.773134469985962
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
🎯 Mean angular error a