# Pengumpulan Dataset


In [225]:
import os
import pandas as pd
import scipy.io
import numpy as np
import matplotlib.pyplot as plt
import cv2 as cv

In [226]:
def createSubsetList():
    
    if not os.path.exists("subset_list") :
        os.mkdir("subset_list")
    
    for i in range(15) : 
        person = f"{i:02d}"
        data = pd.read_csv(f"dataset/MPIIGaze/Evaluation Subset/sample list for eye image/p{person}.txt", sep=" ", header=None)
        data = data[0]
        
        duplicate =  data.duplicated()
        print("Duplicate data: " + str(duplicate.sum()) + "\n")
        
        data = data.drop_duplicates()
        
        face_data = pd.read_csv(f"dataset/MPIIGaze/Evaluation Subset/annotation for face image/p{person}.txt", sep=" ", header=None)
        
        data = pd.merge(data, face_data, on=0, how="outer")
        data.columns = ['day/name',0,1,2,3,4,5,6,7,8,9,10,11]
        
        data[["day", "filename"]] = data['day/name'].str.split("/",expand=True)
        data.drop(columns=['day/name'], inplace=True)

        data = data[['day', 'filename'] + list(data.columns[:-2])]
        
        data.to_csv(f"subset_list/p{person}.txt", index=False, sep=' ', header=["day", 'filename',0,1,2,3,4,5,6,7,8,9,10,11])
        
# createSubsetList()


In [227]:
def check_face_images() : 

    for i in range(15) :
        person = f"p{i:02d}"
        
        df = pd.read_csv(f"subset_list/{person}.txt", sep=" ")
        
        for _,row in df.iterrows() :
            day = row["day"]
            fn = row["filename"]
            
            path = f"dataset/MPIIFaceGaze/{person}/{day}/{fn}"
            res = os.path.exists(path)        
            if not res :
                print(path)
                
# check_face_images()


In [228]:
def get_face_image(image_path, landmarks):
    # Read the image
    image = cv.imread(image_path)
    
    # Extract facial landmark coordinates
    x1, y1, x2, y2, x3, y3, x4, y4, x5, y5, x6, y6 = landmarks
    
    
    margin = 40
    # Calculate bounding box coordinates
    left = min(x1, x2, x3, x4, x5, x6) - margin
    top = min(y1, y2, y3, y4, y5, y6)  - margin
    right = max(x1, x2, x3, x4, x5, x6) + margin
    bottom = max(y1, y2, y3, y4, y5, y6) + margin
    
    # Crop the image based on bounding box
    cropped_image = image[top:bottom, left:right]
    
    # Resize the image to 224x224 pixels
    resized_image = cv.resize(cropped_image, (224, 224))
    
    return resized_image

In [229]:

def organize_data() :
    
    for i in range(1) :
        
        person = f"p{i:02d}"
        
        df = pd.read_csv(f'subset_list/{person}.txt', sep=" ")
        
        for _, row in list(df.iterrows())[:1]:
            
            day = row["day"]
            filename = row["filename"]
            
            #copy eye images
            mat_data = scipy.io.loadmat(f'dataset/MPIIGaze/Data/Normalized/{person}/{day}.mat')
            
            data = mat_data["data"]
            filenames = mat_data["filenames"]
            
            res =  np.where(filenames == filename)
            
            index = res[0][0]
            
            right_eye = data[0][0][0]
            left_eye = data[0][0][1]
            
            
            gaze_pos_right = right_eye[0][0][0][index]
            gaze_pos_left = left_eye[0][0][0][index]
            
            right_eye_img = right_eye[0][0][1][index]
            left_eye_img = left_eye[0][0][1][index]
            
            face_img_path = f"dataset/MPIIFaceGaze/{person}/{day}/{filename}"
            landmarks = row[2:14]
            
            face_img = get_face_image(face_img_path, landmarks=landmarks)
                        
            if not os.path.exists(f"data_subset/{person}/{day}") :
                os.makedirs(f"data_subset/{person}/{day}/left_eye", exist_ok=True)
                os.makedirs(f"data_subset/{person}/{day}/right_eye", exist_ok=True)
                os.makedirs(f"data_subset/{person}/{day}/face", exist_ok=True)
            
            cv.imwrite(f"data_subset/{person}/{day}/left_eye/{filename}", left_eye_img)
            cv.imwrite(f"data_subset/{person}/{day}/right_eye/{filename}", right_eye_img)
            
            with open(f"data_subset/{person}/{day}/data.txt", "a") as file :
                file.write(f"{filename} {gaze_pos_left[0]} {gaze_pos_left[1]} {gaze_pos_left[2]} {gaze_pos_right[0]} {gaze_pos_right[1]} {gaze_pos_right[2]} \n")
                
            cv.imwrite(f"data_subset/{person}/{day}/face/{filename}", face_img)
            
organize_data()