In [2]:
# %load eval.py
from argparse import ArgumentParser

from pytorch_lightning import seed_everything, Trainer

from dataset.mpii_face_gaze_dataset import get_dataloaders
from train import Model

import numpy as np
import torch
from utils import calc_angle_error, PitchYawHelper
from sklearn.linear_model import LinearRegression

In [4]:
def simulate_calibration_lr(model, dataset, S=1, device='cuda'):
    """
    ‰ΩøÁî® Linear Regression ÂÅöË¶ñÁ∑öÂÅèÁßªÊ†°Ê≠£
    """
    model = model.to(device)
    model.eval()

    with torch.no_grad():
        all_data = []
        for i in range(len(dataset)):
            item = dataset[i]
            data = {
                'left_eye': item['left_eye_image'].unsqueeze(0).to(device),
                'right_eye': item['right_eye_image'].unsqueeze(0).to(device),
                'face': item['full_face_image'].unsqueeze(0).to(device),
                'label': torch.tensor(
                    [float(item['gaze_pitch']), float(item['gaze_yaw'])],
                    dtype=torch.float32
                ).to(device),
                'person_idx': torch.tensor([[item['person_idx']]]).to(device)
            }
            all_data.append(data)

        # ÈÅ∏ S ÂºµÂúñ‰ΩúÁÇ∫Ê†°Ê≠£Ë≥áÊñô
        indices = np.random.choice(len(all_data), size=S, replace=False)
        calibration_set = [all_data[i] for i in indices]
        test_set = [all_data[i] for i in range(len(all_data)) if i not in indices]

        # Êì∑Âèñ calibration Ë≥áÊñôÁöÑ pred & label
        preds, labels = [], []
        for d in calibration_set:
            pred = model(
                d['person_idx'], d['face'], d['right_eye'], d['left_eye']
            ).squeeze(0).cpu().numpy()
            label = d['label'].cpu().numpy()
            preds.append(pred)
            labels.append(label)

        # Âü∑Ë°å Linear Regression Êì¨Âêà (pred -> label)
        reg = LinearRegression().fit(preds, labels)

        # Ê∏¨Ë©¶ÊôÇÂ•óÁî® regression Ê†°Ê≠£
        angle_errors = []
        for d in test_set:
            pred = model(
                d['person_idx'], d['face'], d['right_eye'], d['left_eye']
            ).squeeze(0).cpu().numpy()
            corrected = reg.predict([pred])[0]
            angle = calc_angle_error(
                torch.tensor(corrected).unsqueeze(0).to(device),
                d['label'].unsqueeze(0)
            )
            angle_errors.append(angle.item())

        mean_err = np.mean(angle_errors)
        print(f"üìê Linear Calibration (S={S}): {mean_err:.2f} degrees")
        return mean_err


In [5]:
def simulate_calibration(model, dataset, S=1, device='cuda'):
    """
    Ê®°Êì¨ S ÂºµÂúñÁï∂Ê†°Ê≠£Ë≥áÊñôÔºåÂ∞çÂâ©‰∏ãË≥áÊñôÊ∏¨Ë©¶„ÄÇ
    Ê®°Âûã forward Êé•Êî∂Ê†ºÂºèÔºöFinalModel(person_idx, full_face, right_eye, left_eye)
    """
    model = model.to(device)
    model.eval()
    with torch.no_grad():
        all_data = []
        for i in range(len(dataset)):
            item = dataset[i]
            data = {
                'left_eye': item['left_eye_image'].unsqueeze(0).to(device),
                'right_eye': item['right_eye_image'].unsqueeze(0).to(device),
                'face': item['full_face_image'].unsqueeze(0).to(device),
                'label': torch.tensor(
                    [float(item['gaze_pitch']), float(item['gaze_yaw'])],
                    dtype=torch.float32
                ).to(device),
                'person_idx': torch.tensor([[item['person_idx']]]).to(device)  # shape [1, 1]
            }
            all_data.append(data)

        # Step 1ÔºöÈÅ∏ S ÂºµÂúñÁï∂Ê†°Ê≠£Ë≥áÊñô
        indices = np.random.choice(len(all_data), size=S, replace=False)
        calibration_set = [all_data[i] for i in indices]
        test_set = [all_data[i] for i in range(len(all_data)) if i not in indices]

        # Step 2ÔºöË®àÁÆó bias
        cal_outputs, cal_labels = [], []
        for d in calibration_set:
            pred = model(
                d['person_idx'],
                d['face'],
                d['right_eye'],
                d['left_eye']
            ).squeeze(0)
            cal_outputs.append(pred)
            cal_labels.append(d['label'])

        bias = torch.stack(cal_labels).mean(dim=0) - torch.stack(cal_outputs).mean(dim=0)

        # Step 3ÔºöÂ•óÁî® bias ÂÅöÊ∏¨Ë©¶
        angle_errors = []
        for d in test_set:
            pred = model(
                d['person_idx'],
                d['face'],
                d['right_eye'],
                d['left_eye']
            ).squeeze(0) + bias

            angle = calc_angle_error(pred.unsqueeze(0), d['label'].unsqueeze(0))
            angle_errors.append(angle.item())

        mean_err = np.mean(angle_errors)
        print(f"üìê Calibration (S={S}): {mean_err:.2f} degrees")
        return mean_err

In [6]:
if __name__ == '__main__':

    ###ÈùûËÆäÂãïÂçÄÂ°ä
    k = [1,5,9,16,32,64, 128]
    adjust_slope = False
    ###Â∞çÊáâ‰ΩúËÄÖË°®Ê†ºgrid calibration,Â¶ÇÊûúÊòØrandomÂ∞±Â°´true
    grid_calibration_samples=False
    batch_size =32
    path_to_data = './data/mpiifacegaze_preprocessed'
    
    
    ####ËÆäÂãïÂçÄÂ°ä
    path_to_checkpoints = './saved_models/p00/p00_best-v28.ckpt'
    person_idx = 1
    validate_on_person = 0 
    
    seed_everything(42)

    print(f"{path_to_checkpoints}")
    
    model = Model.load_from_checkpoint(path_to_checkpoints, k=[9, 128], adjust_slope=False, grid_calibration_samples=False)

    trainer = Trainer(
        gpus=1,
        benchmark=True,
    )

    _, _, test_dataloader = get_dataloaders(        path_to_data, 
        validate_on_person, 
        person_idx, 
        batch_size   
    )
     
    trainer.test(model, test_dataloader)

    ####################ÊãøÂá∫ test setÔºàÂñÆ‰∫∫ÂÆåÊï¥Ë≥áÊñôÔºâ
    test_dataset = test_dataloader.dataset
    #######################

    simulate_calibration_ls(model, test_dataset, S=1)
    simulate_calibration_ls(model, test_dataset, S=5)
    simulate_calibration_ls(model, test_dataset, S=9)
    simulate_calibration_ls(model, test_dataset, S=16)


Global seed set to 42


./saved_models/p00/p00_best-v28.ckpt


  rank_zero_warn(
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


train on persons [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
valid on person 0
test on person 1
len(dataset_train) 60784
len(dataset_valid) 2927


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


len(dataset_test) 2904
Testing DataLoader 0: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 181/181 [00:25<00:00,  7.21it/s]
‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
             Test metric                         DataLoader 0
‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
   test/offset(k=0)/angular_error             3.6843724250793457
        test/offset(k=0)/loss       