# Import Libraries

In [None]:
import os
import pandas as pd
import numpy as np
import cv2
import random

from tqdm.auto import tqdm
from glob import glob
from scipy.io import loadmat

In [None]:
def make_arbitrary_masking(N, ind) :
    indice = np.arange(0,N)
    mask = np.zeros(N, dtype=bool)
    mask[ind] = True
    return indice[~mask], indice[mask]

def k_fold_index(N = 150, k = 10, randomize = True, SEED = 10) :
    indice = np.arange(0,N)
    if randomize is True :
        np.random.seed(SEED)
        np.random.shuffle(indice)
    result = []
    for fold in np.split(indice, k) :
        result.append(make_arbitrary_masking(N, fold))
    return result

# Data Path

In [None]:
data_path = "./MPIIGaze/Data/Normalized"
mat_path = sorted(glob(os.path.join(data_path,"*","*.mat")))
print(f"Find {len(mat_path)} Data")

## Mat to Dataframe

In [None]:
def convert_pose(vector: np.ndarray) -> np.ndarray:
    rot = cv2.Rodrigues(np.array(vector).astype(np.float32))[0]
    vec = rot[:, 2]
    pitch = np.arcsin(vec[1])
    yaw = np.arctan2(vec[0], vec[2])
    return np.array([pitch, yaw]).astype(np.float32)


def convert_gaze(vector: np.ndarray) -> np.ndarray:
    x, y, z = vector / np.linalg.norm(vector)
    pitch = np.arcsin(-y)
    yaw = np.arctan2(-x, -z)
    return np.array([pitch, yaw]).astype(np.float32)

In [None]:
eye_loc_list = ["right", "left"]

datas = []
for idx in tqdm(range(len(mat_path))):
    cur_mat_path = mat_path[idx].replace("\\","/")
    par_id = cur_mat_path.split("/")[-2]
    day_id = cur_mat_path.split("/")[-1].split(".")[0]

    # load mat
    mat_data = loadmat(cur_mat_path, squeeze_me=True, struct_as_record=True)['data']
    for loc in eye_loc_list:
        eye_loc_data = mat_data[loc].tolist()

        image_data = eye_loc_data["image"].tolist()
        if len(image_data.shape) < 3:
            image_data = image_data[np.newaxis,:]

        pose_data = eye_loc_data["pose"].tolist()
        if len(pose_data.shape) < 2:
            pose_data = pose_data[np.newaxis,:]

        gaze_data = eye_loc_data["gaze"].tolist()
        if len(gaze_data.shape) < 2:
            gaze_data = gaze_data[np.newaxis,:]


        for i in range(len(image_data)):
            if loc == "left":
                image = image_data[i]
                pose = convert_pose(pose_data[i])
                gaze = convert_gaze(gaze_data[i])
            else:
                image = image_data[i][:, ::-1]
                pose = convert_pose(pose_data[i]) * np.array([1, -1])
                gaze = convert_gaze(gaze_data[i]) * np.array([1, -1])

            data_list = [par_id, day_id, loc, image.ravel(), pose[0], pose[1], gaze[0], gaze[1]]
            datas.append(data_list)

data_df = pd.DataFrame(columns=["participant_id","day","eye_location","image","head_pitch","head_yaw","gaze_pitch","gaze_yaw"], data=datas)
data_df.head(3)
data_df = data_df.sort_values(by=['participant_id', 'day']).reset_index(drop=True)

# Save Total Data

In [None]:
save_path = "../mpii_dataset"
if os.path.isdir(save_path) == False:
    os.makedirs(save_path)
save_file = os.path.join(save_path, "mpii_preprocessed_dataset.parquet")
# data_df.to_parquet(save_file, engine='pyarrow', index=False)

In [None]:
id_vector = np.asarray(data_df["participant_id"].to_list())
is_right = np.asarray(data_df["eye_location"].to_list()) == "right"
images = np.stack(data_df["image"].to_list()).reshape(-1,36,60)
head_poses = np.asarray(data_df[["head_pitch", "head_yaw"]])
gazes = np.asarray(data_df[["gaze_pitch", "gaze_yaw"]])

In [None]:
print(f"{is_right.shape}\n{images.shape}\n{head_poses.shape}\n{gazes.shape}")

In [None]:
# np.save('../mpii_dataset/full_ids', id_vector)
# np.save('../mpii_dataset/full_images', images)
# np.save('../mpii_dataset/full_2d_hps', head_poses)
# np.save('../mpii_dataset/full_2d_gazes', gazes)

In [None]:
indice_list = []

np.random.seed(42)
for i in tqdm(range(15)) :
    if i < 10 :
        left_indice = np.random.choice(np.where((id_vector == f"p0{i}") & np.invert(is_right))[0], size=1500, replace=False)
        right_indice = np.random.choice(np.where((id_vector == f"p0{i}") & is_right)[0], size=1500, replace=False)
        total_indice = np.concatenate([left_indice, right_indice])
    elif i != 13 :
        left_indice = np.random.choice(np.where((id_vector == f"p{i}") & np.invert(is_right))[0], size=1500, replace=False)
        right_indice = np.random.choice(np.where((id_vector == f"p{i}") & is_right)[0], size=1500, replace=False)
        total_indice = np.concatenate([left_indice, right_indice])
    else :
        left_indice = np.random.choice(np.where((id_vector == f"p{i}") & np.invert(is_right))[0], size=2, replace=False)
        right_indice = np.random.choice(np.where((id_vector == f"p{i}") & is_right)[0], size=2, replace=False)
        total_indice = np.concatenate([np.where(id_vector == f"p{i}")[0], left_indice, right_indice])
    indice_list.append(total_indice)

selected_ids = [id_vector[indice] for indice in indice_list]
selected_images = [images[indice] for indice in indice_list]
selected_head_poses = [head_poses[indice] for indice in indice_list]
selected_gazes = [gazes[indice] for indice in indice_list]

In [None]:
loocv_ids = np.concatenate([ids[np.newaxis,:] for ids in selected_ids])
loocv_images = np.concatenate([images[np.newaxis,:] for images in selected_images])
loocv_hps = np.concatenate([hps[np.newaxis,:] for hps in selected_head_poses])
loocv_gazes = np.concatenate([gazes[np.newaxis,:] for gazes in selected_gazes])

In [None]:
print(f"{loocv_ids.shape}\n{loocv_images.shape}\n{loocv_hps.shape}\n{loocv_gazes.shape}")

In [None]:
cv_indice_list = [k_fold_index(N=3000, k=10, randomize=True, SEED=100 + i) for i in range(15)]

fold_indice_list = [np.concatenate([cv_indice_list[i][fold][1][np.newaxis,:] for i in range(15)]) for fold in range(10)]

within_ids = np.concatenate([np.concatenate([ids[fold_indice_list[fold][i]] for ids in selected_ids])[np.newaxis,] for fold in range(10)])
within_images = np.concatenate([np.concatenate([images[fold_indice_list[fold][i]] for images in selected_images])[np.newaxis,:] for fold in range(10)])
within_hps = np.concatenate([np.concatenate([hps[fold_indice_list[fold][i]] for hps in selected_head_poses])[np.newaxis,:] for fold in range(10)])
within_gazes = np.concatenate([np.concatenate([gazes[fold_indice_list[fold][i]] for gazes in selected_gazes])[np.newaxis,:] for fold in range(10)])

In [None]:
print(f"{within_ids.shape}\n{within_images.shape}\n{within_hps.shape}\n{within_gazes.shape}")

# Extract Sampling Data

In [None]:
np.save('../mpii_dataset/loocv_ids', loocv_ids)
np.save('../mpii_dataset/loocv_images', loocv_images)
np.save('../mpii_dataset/loocv_2d_hps', loocv_hps)
np.save('../mpii_dataset/loocv_2d_gazes', loocv_gazes)

np.save('../mpii_dataset/within_ids', within_ids)
np.save('../mpii_dataset/within_images', within_images)
np.save('../mpii_dataset/within_2d_hps', within_hps)
np.save('../mpii_dataset/within_2d_gazes', within_gazes)