In [None]:
# get_names_of_subj_folders(path_to_project_folder)
def get_names_of_subj_folders(path_to_project_folder):

    """
    Find the path to the directory with all the subjects' folders.

    Return: list of folder names.
    """

    import os

    # Initialize an empty list to store the folder names
    folders = []

    # Get a list of all the files and folders in the directory
    files = os.listdir(path_to_project_folder)

    # Iterate through the list and add only the folders to the 'folders' list
    for file in files:
        if os.path.isdir(os.path.join(path_to_project_folder, file)):
            folders.append(file)

    # Return the list of folder names
    return folders


In [None]:
# get_biographic_data(subject_index, names_of_subjects_folders)
def get_biographic_data(subject_index, names_of_subjects_folders):

    """
    Get the name of the file in the `biographical_data` folder.

    `subject_index` identifies the elements of the `folder` list.
    """

    import os
    import pandas as pd

    path_to_biographic_folder = os.path.join(
        "..",
        "data",
        "raw",
        "thesis_marta",
        names_of_subjects_folders[subject_index],
        "biographical_data",
    )

    # Get the biographic file name.
    biographic_file_name = os.listdir(path_to_biographic_folder)

    # Read biographic data.
    d = pd.read_csv(
        os.path.join(
            "..",
            "data",
            "raw",
            "thesis_marta",
            names_of_subjects_folders[subject_index],
            "biographical_data",
            biographic_file_name[0],
        ),
        sep=";",
    )

    # Return DataFrame with biographic information.
    return d


In [None]:
# Get thresholds from calibration data.
def get_thresholds(subject_index, names_of_subjects_folders):

    import os
    import pandas as pd

    path_to_calibrarion_folder = os.path.join(
        "..",
        "data",
        "raw",
        "thesis_marta",
        names_of_subjects_folders[subject_index],
        "calibration_data",
    )

    # Get the calibration file name.
    calibration_file_name = os.listdir(path_to_calibrarion_folder)
    # Read calibration data.
    d = pd.read_csv(
        os.path.join(
            "..",
            "data",
            "raw",
            "thesis_marta",
            names_of_subjects_folders[subject_index],
            "calibration_data",
            calibration_file_name[0],
        ),
        sep=";",
    )

    # If the value of the ratio is less than the threshold1, it means that
    # the fixation is to the left. On the other hand, if the value of the
    # ratio is greater than the threshold3, it means that the fixation is
    # to the right. If the ratio falls between threshold1 and threshold3,
    # then the fixation is in the center.
    threshold1 = d["left_side_left_eye_ratio"].values[0]
    threshold2 = d["left_side_right_eye_ratio"].values[0]
    threshold3 = d["right_side_left_eye_ratio"].values[0]
    threshold4 = d["right_side_right_eye_ratio"].values[0]

    return [threshold1, threshold2, threshold3, threshold4]


In [None]:
# Get eye tracking data.
def get_eye_tracking_data(subject_index, names_of_subjects_folders):
    
    import os
    import pandas as pd

    path_to_eye_tracking_folder = os.path.join(
        "..",
        "data",
        "raw",
        "thesis_marta",
        names_of_subjects_folders[subject_index],
        "eye_tracking_data",
    )
    
    names_of_eye_tracking_folders = files = os.listdir(path_to_eye_tracking_folder)
    
    d_list = []
    
    for file_name in names_of_eye_tracking_folders:

        d = pd.read_csv(os.path.join(path_to_eye_tracking_folder, file_name), sep=";")

        where_fixates_most = np.select(
            [d["ratio"] < threshold1, d["ratio"] > threshold3],
            ["sx", "dx"],
            default="center",
        )

        prop_dx_fixation = (where_fixates_most == "dx").sum() / len(where_fixates_most)
        prop_sx_fixation = (where_fixates_most == "sx").sum() / len(where_fixates_most)
        prop_center_fixation = (where_fixates_most == "center").sum() / len(
            where_fixates_most
        )

        # trial: split string of filename according to underscore
        foo = file_name.split("_")
        # select the third element and remove the first character
        trial = int(foo[2][1:])

        # block
        block = int(foo[1][1:])

        stim_left = foo[3]
        stim_left_img_number = int(foo[4])
        stim_right = foo[5]
        stim_right_img_number = int(foo[6])

        subject_id_number2 = foo[0]

        mydf = pd.DataFrame(
            {
                "prop_dx_fixation": [prop_dx_fixation],
                "prop_sx_fixation": [prop_sx_fixation],
                "prop_center_fixation": [prop_center_fixation],
                "block": [block],
                "trial": [trial],
                "stim_left": [stim_left],
                "stim_left_img_number": [stim_left_img_number],
                "stim_right": [stim_right],
                "stim_right_img_number": [stim_right_img_number],
                "subject_id_number2": [subject_id_number2],
            }
        )

        d_list.append(mydf)
    
    # Convert list into DataFrame.
    fix_df = pd.concat(d_list, ignore_index=True)
    
    return fix_df


In [None]:
# Read PRL data.
def read_prl_data(subject_index, names_of_subjects_folders):

    import os
    import pandas as pd

    # Specify folder path and file extension
    prl_folder_path = os.path.join(
        "..",
        "data",
        "raw",
        "thesis_marta",
        names_of_subjects_folders[subject_index],
        "prl_task_data",
    )
    file_extension = ".csv"

    # Get list of all CSV files in the folder
    file_list = [f for f in os.listdir(prl_folder_path) if f.endswith(file_extension)]

    # Create an empty list to store dataframes
    df_list = []

    # Loop through the files and import them as dataframes
    for file_name in file_list:
        file_path = os.path.join(prl_folder_path, file_name)
        df = pd.read_csv(file_path, sep=";")
        df_list.append(df)

    # Concatenate all dataframes into a single dataframe
    prl_df = pd.concat(df_list, ignore_index=True)

    return prl_df
