# RQ1 - Eyetracking Fixation Metrics

## Import Libraries

In [2]:
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
import utils.GenSnippetsLib as gsl
import os

In [3]:
screen_resolution = (1920, 1080)

## Import Eyetracking Data

In [4]:
df_query = pd.read_csv("./data/filteredData/filtered_data.csv")
df_eyetracking_events = pd.DataFrame(columns=["Participant", "Algorithm", "Path"])
snippets = df_query["Algorithm"].unique()
participants = df_query["Participant"].unique()
for participant in participants:
    for snippet in snippets:
        path = f"./data/filteredData/Participant{str(participant).zfill(2)}/{snippet}_Code_eyetracking.csv"
        # check if path exists
        if os.path.exists(path):
            df_eyetracking_events.loc[len(df_eyetracking_events)] = [participant, snippet, path]
df_eyetracking_events

Unnamed: 0,Participant,Algorithm,Path
0,1,IsPrime,./data/filteredData/Participant01/IsPrime_Code...
1,1,SiebDesEratosthenes,./data/filteredData/Participant01/SiebDesErato...
2,1,IsAnagram,./data/filteredData/Participant01/IsAnagram_Co...
3,1,RemoveDoubleChar,./data/filteredData/Participant01/RemoveDouble...
4,1,BinToDecimal,./data/filteredData/Participant01/BinToDecimal...
...,...,...,...
1067,71,BogoSort,./data/filteredData/Participant71/BogoSort_Cod...
1068,71,ReverseQueue,./data/filteredData/Participant71/ReverseQueue...
1069,71,Ackerman,./data/filteredData/Participant71/Ackerman_Cod...
1070,71,RabbitTortoise,./data/filteredData/Participant71/RabbitTortoi...


In [5]:
df_filtered = pd.read_csv("./data/filteredData/filtered_data.csv")
Algorithms = df_filtered["Algorithm"].unique()
Participants = df_filtered[df_filtered["IsOutlier"] == False]["Participant"].unique()

In [6]:
def doBoxesCollide(a, b):
    a_x_center = a[0]
    a_y_center = a[1]
    a_width = a[2]
    a_height = a[3]
    b_x_center = b[0]
    b_y_center = b[1]
    b_width = b[2]
    b_height = b[3]
    return abs(a_x_center - b_x_center) * 2 < (a_width + b_width) and abs(a_y_center - b_y_center) * 2 < (a_height + b_height)

# Token Based Metrics

In [7]:
# Get Token based AOIs
df_token_aois = pd.DataFrame(columns=["Algorithm", "Token", "TokenIdx", "BoundingBox"])
for snippet in tqdm(Algorithms):
    aoi_token_generator = f"./../CodeSnippets/Generators_Labeled/Generators/{snippet}_ast.json"
    image, aoi_list = gsl.create_image(aoi_token_generator, font_path="./../CodeSnippets/fonts/ttf/")
    height, width = image.size
    width_offset = int(1920 * 0.5) - int(height / 2)
    height_offset = int(1080 * 0.5) - int(width / 2)
    aoi_clustered = []
    current_left = None
    current_top = None
    current_right = None
    current_bottom = None
    current_aoi = None
    color = None
    for letter in aoi_list:
        if len(letter["AOI"]) == 1 or letter["letter"] == '\n':
            if current_aoi is not None:
                aoi_clustered.append(
                    (len(aoi_clustered), current_aoi, current_left, current_top, current_right, current_bottom, color))
            current_aoi = None
            color = None
            current_left = None
            current_top = None
            current_right = None
            current_bottom = None
            continue
        if current_aoi is None:
            current_aoi = letter["AOI"][1]
            color = letter["color"]
            current_left = letter["BoundingBox"][0]
            current_top = letter["BoundingBox"][1]
            current_right = letter["BoundingBox"][2]
            current_bottom = letter["BoundingBox"][3]
        elif current_aoi == letter["AOI"][1]:
            current_left = min(current_left, letter["BoundingBox"][0])
            current_top = min(current_top, letter["BoundingBox"][1])
            current_right = max(current_right, letter["BoundingBox"][2])
            current_bottom = max(current_bottom, letter["BoundingBox"][3])
        else:
            aoi_clustered.append(
                (len(aoi_clustered), current_aoi, current_left, current_top, current_right, current_bottom, color))
            current_aoi = letter["AOI"][1]
            color = letter["color"]
            current_left = letter["BoundingBox"][0]
            current_top = letter["BoundingBox"][1]
            current_right = letter["BoundingBox"][2]
            current_bottom = letter["BoundingBox"][3]

    for token in aoi_clustered:
        df_token_aois.loc[len(df_token_aois)] = [snippet, token[1], token[0],
                                                 (token[2] + width_offset,
                                                  token[3] + height_offset,
                                                  token[4] + width_offset,
                                                  token[5] + height_offset)]
df_token_aois

  0%|          | 0/32 [00:00<?, ?it/s]

Unnamed: 0,Algorithm,Token,TokenIdx,BoundingBox
0,IsPrime,Modifier,0,"(808, 468, 856, 482)"
1,IsPrime,Modifier,1,"(864, 468, 912, 479)"
2,IsPrime,BasicType,2,"(920, 469, 976, 479)"
3,IsPrime,Identifier,3,"(984, 468, 1040, 479)"
4,IsPrime,Separator,4,"(1040, 467, 1048, 481)"
...,...,...,...,...
2700,Rectangle,Keyword,82,"(988, 677, 1020, 688)"
2701,Rectangle,Separator,83,"(1020, 685, 1028, 688)"
2702,Rectangle,Identifier,84,"(1028, 677, 1076, 691)"
2703,Rectangle,Separator,85,"(1076, 676, 1100, 691)"


### Check which Fixation of which Participant is in which Token

In [8]:
df_token_fixation_per_participant = pd.DataFrame([], columns=["Algorithm", "Participant", "FixationNumber", "Start", "End", "TokenIdx"])
labels = set()
for snippet in tqdm(Algorithms):
    df_token_per_algo = df_token_aois[df_token_aois["Algorithm"] == snippet]

    for participant in Participants:
        df_grouped = df_eyetracking_events[(df_eyetracking_events["Participant"] == participant) &
                                           (df_eyetracking_events["Algorithm"] == snippet)]
        if len(df_grouped) == 0:
            continue

        eyetracking_path = df_grouped["Path"].values[0]
        df_current_eyetracking = pd.read_csv(eyetracking_path)
        label = df_current_eyetracking["label"].unique()
        labels.update(label)
        df_fix = df_current_eyetracking[df_current_eyetracking["label"] == "FIXA"]
        df_fix = df_fix.reset_index()
        df_fix["duration"] = df_fix["end_time"] - df_fix["start_time"]
        df_fix["x_range"] = (df_fix["end_x"] - df_fix["start_x"]).apply(abs)
        df_fix["y_range"] = (df_fix["end_y"] - df_fix["start_y"]).apply(abs)
        for fix_idx, fix_row in df_fix.iterrows():
            rectangle_a = [fix_row["start_x"], fix_row["start_y"], fix_row["x_range"], fix_row["y_range"]]
            was_in_aoi = False
            for _, token_row in df_token_per_algo.iterrows():
                token_idx = token_row["TokenIdx"]
                bounding_box = token_row["BoundingBox"]
                rectangle_b = [(bounding_box[0] + bounding_box[2])/ 2, (bounding_box[1] + bounding_box[3])/ 2,
                               bounding_box[2] - bounding_box[0], bounding_box[3] - bounding_box[1]]
                # check if any value is true
                if doBoxesCollide(rectangle_a, rectangle_b):
                    df_token_fixation_per_participant.loc[len(df_token_fixation_per_participant)] =[snippet, participant, fix_idx, fix_row["start_time"], fix_row["end_time"], token_idx]
                    was_in_aoi = True
                    break
            if not was_in_aoi:
                 df_token_fixation_per_participant.loc[len(df_token_fixation_per_participant)] =[snippet, participant, fix_idx, fix_row["start_time"], fix_row["end_time"], None]


df_token_fixation_per_participant

  0%|          | 0/32 [00:00<?, ?it/s]

Unnamed: 0,Algorithm,Participant,FixationNumber,Start,End,TokenIdx
0,IsPrime,1,0,0.000,0.228,
1,IsPrime,1,1,0.264,0.440,
2,IsPrime,1,2,0.656,0.736,
3,IsPrime,1,3,0.756,0.884,
4,IsPrime,1,4,1.148,1.348,1
...,...,...,...,...,...,...
172988,Rectangle,71,30,8.812,8.944,26
172989,Rectangle,71,31,8.996,9.080,
172990,Rectangle,71,32,9.272,9.404,
172991,Rectangle,71,33,9.436,9.596,22


In [9]:
# save the result
df_token_aois_tmp = df_token_aois.copy()
df_token_aois_tmp = df_token_aois_tmp[["Algorithm", "TokenIdx", "Token"]]
for algorithm in df_token_aois["Algorithm"].unique():
    df_token_aois_tmp.loc[len(df_token_aois_tmp)] = [algorithm, -1, ""]

df_token_fixation_per_participant["TokenIdx"] = df_token_fixation_per_participant["TokenIdx"].fillna(-1.0).astype(int)
df_token_fixation_per_participant = pd.merge(df_token_fixation_per_participant, df_token_aois_tmp, on=["Algorithm", "TokenIdx"], how="left")
df_token_fixation_per_participant.to_csv("./data/fixation_per_participant_per_token.csv", index=False)

In [10]:
# load the result
df_token_fixation_per_participant = pd.read_csv("./data/fixation_per_participant_per_token.csv")
df_token_fixation_per_participant = df_token_fixation_per_participant.drop("Token", axis=1)
df_token_fixation_per_participant["TokenIdx"] = df_token_fixation_per_participant["TokenIdx"].astype(float)
df_token_fixation_per_participant.loc[df_token_fixation_per_participant["TokenIdx"] < 0, "TokenIdx"] = float("Nan")
df_token_fixation_per_participant

Unnamed: 0,Algorithm,Participant,FixationNumber,Start,End,TokenIdx
0,IsPrime,1,0,0.000,0.228,
1,IsPrime,1,1,0.264,0.440,
2,IsPrime,1,2,0.656,0.736,
3,IsPrime,1,3,0.756,0.884,
4,IsPrime,1,4,1.148,1.348,1.0
...,...,...,...,...,...,...
172988,Rectangle,71,30,8.812,8.944,26.0
172989,Rectangle,71,31,8.996,9.080,
172990,Rectangle,71,32,9.272,9.404,
172991,Rectangle,71,33,9.436,9.596,22.0


## Remove Nones from the Tokens

In [11]:
df_token_fixation_per_participant = df_token_fixation_per_participant[df_token_fixation_per_participant["TokenIdx"].isna() == False]
df_token_fixation_per_participant["TokenIdx"] = df_token_fixation_per_participant["TokenIdx"].astype(int)
df_token_fixation_per_participant.head(5)

Unnamed: 0,Algorithm,Participant,FixationNumber,Start,End,TokenIdx
4,IsPrime,1,4,1.148,1.348,1
5,IsPrime,1,5,1.372,1.524,0
7,IsPrime,1,7,1.848,2.144,1
8,IsPrime,1,8,2.172,2.384,2
10,IsPrime,1,10,2.728,2.932,3


### Transform the Data to a Fixation/ Refixation split by Participant

In [12]:
df_token = df_token_aois.copy()
df_token = df_token.drop(["BoundingBox", "Token"], axis=1)
for participant in participants:
    # add new column with empty lists for each participant
    df_token[f"TokenFixation_P{participant}"] = df_token.apply(lambda row: [], axis=1)
    df_token[f"TokenReFixation_P{participant}"] = df_token.apply(lambda row: [], axis=1)


prev_participant = df_token_fixation_per_participant["Participant"].iloc[0]
prev_token_idx = df_token_fixation_per_participant["TokenIdx"].iloc[0]
prev_algorithm = df_token_fixation_per_participant["Algorithm"].iloc[0]
fixations = []
re_fixation = False
for idx, row in tqdm(df_token_fixation_per_participant.iterrows(), total=len(df_token_fixation_per_participant)):
    participant = row["Participant"]
    token_idx = row["TokenIdx"]
    algorithm = row["Algorithm"]
    FixationDuration = row["End"] - row["Start"]

    # fixation switches
    if prev_participant != participant or prev_token_idx != token_idx:
        index = df_token[(df_token["TokenIdx"] == prev_token_idx) & (df_token["Algorithm"] == prev_algorithm)].index[0]
        if re_fixation:
            re_fixations = df_token.loc[index, f"TokenReFixation_P{prev_participant}"]
            re_fixations.extend(fixations.copy())
            df_token.loc[index, f"TokenReFixation_P{prev_participant}"] = re_fixations.copy()
        else:
            df_token.loc[index, f"TokenFixation_P{prev_participant}"] = fixations.copy()
        fixations = []
        # possible new fixation
        re_fixation = False

    sub_frame = df_token[(df_token["TokenIdx"] == token_idx) & (df_token["Algorithm"] == algorithm)]
    if len(sub_frame) == 0:
        raise Exception(f"No Token found for {token_idx} in Algorithm {algorithm}")
    len_of_fixation = len(sub_frame[f"TokenFixation_P{participant}"].iloc[0])
    if re_fixation == False and len_of_fixation > 0 and len(fixations) == 0:
        re_fixation = True

    fixations.append(FixationDuration)
    prev_participant = participant
    prev_token_idx = token_idx
    prev_algorithm = algorithm

df_token

  0%|          | 0/53512 [00:00<?, ?it/s]

Unnamed: 0,Algorithm,TokenIdx,TokenFixation_P1,TokenReFixation_P1,TokenFixation_P2,TokenReFixation_P2,TokenFixation_P3,TokenReFixation_P3,TokenFixation_P4,TokenReFixation_P4,...,TokenFixation_P66,TokenReFixation_P66,TokenFixation_P67,TokenReFixation_P67,TokenFixation_P68,TokenReFixation_P68,TokenFixation_P70,TokenReFixation_P70,TokenFixation_P71,TokenReFixation_P71
0,IsPrime,0,[0.1519999999999999],[],"[0.17999999999999972, 0.07600000000000007]",[0.14000000000000012],[],[],[],[],...,[],[],"[0.0519999999999996, 0.0519999999999996]",[],[],[],[],[],[],[]
1,IsPrime,1,[0.20000000000000018],[0.29600000000000004],[0.19199999999999995],"[0.09600000000000009, 0.04800000000000004, 0.1...",[],[],[0.15600000000000058],[],...,[],[],"[0.052000000000000046, 0.20000000000000018]","[0.04800000000000004, 0.09999999999999964, 0.1...",[0.3400000000000003],"[0.21999999999999975, 0.16000000000000014]",[0.048],[],[0.1759999999999997],[]
2,IsPrime,2,[0.21199999999999974],[],[0.19199999999999995],"[0.18800000000000017, 0.04800000000000004, 0.1...",[0.196],[],[0.2559999999999998],[0.1479999999999997],...,[0.1479999999999997],"[0.0519999999999996, 0.1880000000000006, 0.279...",[0.09999999999999964],"[0.06400000000000006, 0.14400000000000013, 0.0...","[0.048, 0.06399999999999997]","[0.08399999999999963, 0.3879999999999999, 0.22...",[],[],[0.1080000000000001],[0.1280000000000001]
3,IsPrime,3,[0.20399999999999974],[0.20000000000000107],[],[],[0.2240000000000002],[],[0.12400000000000011],[0.04800000000000004],...,"[0.49599999999999955, 0.12400000000000055]",[0.09200000000000053],"[0.0519999999999996, 0.37599999999999945]","[0.2759999999999998, 0.06400000000000006, 0.04...","[0.1719999999999997, 0.1120000000000001]",[0.08800000000000097],[],[],[0.06400000000000006],[]
4,IsPrime,4,[],[],[],[],[],[],[],[],...,[0.19200000000000017],[],[],[],[],[],[],[],[],[]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2700,Rectangle,82,[],[],[],[],[],[],[],[],...,[],[],[],[],[],[],[],[],[],[]
2701,Rectangle,83,[],[],[],[],[],[],[],[],...,[],[],[],[],[],[],[],[],[],[]
2702,Rectangle,84,[0.04400000000000048],[],[],[],[],[],[],[],...,[],[],[],[],[],[],[],[],[],[]
2703,Rectangle,85,[],[],[],[],[],[],[],[],...,[],[],[],[],[],[],[],[],[],[]


### Calculate the Token Based Eyetracking Metrics

In [13]:
# Melt the Dataframe to be thinner so that we have Algorithm TokenIdx and Participant and Keys per Fixation / Refixation
df_token_melted = pd.melt(df_token, id_vars=["Algorithm", "TokenIdx"], var_name="KindOfFixation", value_name="FixationDurations")

# Classify Participant and the Kind and Number of Fixations / Refixations
df_token_melted["Participant"] = df_token_melted["KindOfFixation"].apply(lambda x: int(x.split("_")[1][1:]))
df_token_melted["KindOfFixation"] = df_token_melted["KindOfFixation"].apply(lambda x: x.split("_")[0])
df_token_melted["KindOfFixation"] = df_token_melted["KindOfFixation"].apply(lambda x: "Fixation" if x == "TokenFixation" else "ReFixation")
df_token_melted["NumberOfFixations"] = df_token_melted["FixationDurations"].apply(lambda x: len(x))

# Get the number of Participants for further calculations
number_of_participants = len(participants)

# Calculate the First Fixation Duration per Participant per Token
df_token_melted["FirstFixationDuration"] = None
df_token_melted.loc[df_token_melted["KindOfFixation"] == "Fixation" , "FirstFixationDuration"] = df_token_melted["FixationDurations"]\
    .apply(lambda x: x[0] if len(x) > 0 else None)

# Calculate the Single Fixation Duration per Participant per Token
df_token_melted["SingleFixationDuration"] = None
df_token_melted.loc[df_token_melted["KindOfFixation"] == "Fixation" , "SingleFixationDuration"] = df_token_melted["FixationDurations"]\
    .apply(lambda x: x[0] if len(x) == 1 else None)

# Calculate the Gaze Duration per Participant per Token
df_token_melted["GazeDuration"] = None
df_token_melted.loc[df_token_melted["KindOfFixation"] == "Fixation" , "GazeDuration"] = df_token_melted["FixationDurations"]\
    .apply(lambda x: sum(x) if len(x) > 0 else None)

# Calculate the Total Time per Participant per Token
df_token_melted_total_time = df_token_melted.groupby(["Participant", "Algorithm", "TokenIdx"])\
    .agg({"FixationDurations": lambda x: sum(x.values.sum())})
df_token_melted_total_time = df_token_melted_total_time.rename(columns={"FixationDurations": "TotalTime"})

# Merge the Dataframes
df_token_melted = pd.merge(df_token_melted, df_token_melted_total_time, on=["Participant", "Algorithm", "TokenIdx"], how="left")

# Cast the Dataframes to the right datatype
df_token_melted["FirstFixationDuration"] = df_token_melted["FirstFixationDuration"].astype(float)
df_token_melted["SingleFixationDuration"] = df_token_melted["SingleFixationDuration"].astype(float)
df_token_melted["GazeDuration"] = df_token_melted["GazeDuration"].astype(float)
df_token_melted["TotalTime"] = df_token_melted["TotalTime"].astype(float).replace(0, np.nan)

# Read in the Skilllevel
df_skill = pd.read_csv(f"./data/filteredData/filtered_data.csv")
df_skill = df_skill[["Participant", "SkillScore"]]
df_skill = df_skill.drop_duplicates()

# Merge the Dataframes to combine metrics with the Skilllevel
df_metrics_skill = pd.merge(df_token_melted, df_skill, on=["Participant"], how="left")

# Helper Methods for the Metrics
def get_no_fixations(df):
    df_fixations = df[df["KindOfFixation"] == "Fixation"]
    # remove every entry from df fixations on ["Algorithm", "TokenIdx"] where there is a refixation6
    df_fixations = df_fixations[df_fixations["NumberOfFixations"] == 0]
    return df_fixations


def get_single_fixations(df):
    df_fixations = df[df["KindOfFixation"] == "Fixation"]
    df_refixations = df[df["KindOfFixation"] == "ReFixation"]
    df_refixations = df_refixations[df_refixations["NumberOfFixations"] > 0]
    # remove every entry from df fixations on ["Algorithm", "TokenIdx"] where there is a refixation6
    df_fixations = df_fixations[~df_fixations["TokenIdx"].isin(df_refixations["TokenIdx"].values)]
    # remove every entry from df fixations on where Number Of Fixations is not 1
    df_fixations = df_fixations[df_fixations["NumberOfFixations"] == 1]
    return df_fixations

def get_multiple_fixations(df):
    df_fixations = df[df["KindOfFixation"] == "Fixation"]
    df_refixations = df[df["KindOfFixation"] == "ReFixation"]
    df_refixations = df_refixations[df_refixations["NumberOfFixations"] > 0]
    # remove every entry from df fixations on ["Algorithm", "TokenIdx"] where there is a refixation6
    df_fixations = df_fixations[(df_fixations["TokenIdx"].isin(df_refixations["TokenIdx"].values)) & (df_fixations["NumberOfFixations"] >= 1)]
    return df_fixations

def get_fixations(df):
    df_fixations = df[df["KindOfFixation"] == "Fixation"]
    # remove every entry from df fixations on where no Fixation is found
    df_fixations = df_fixations[df_fixations["NumberOfFixations"] >= 1]
    return df_fixations



# dataframe for number of fixations per participant
number_of_fixation_per_algorithm = df_metrics_skill.groupby(["Participant", "Algorithm"])["NumberOfFixations"].count()
number_of_fixation_per_algorithm = number_of_fixation_per_algorithm.reset_index()

# dataframe for number of tokens per algorithm
number_of_tokens_per_algorithm = df_metrics_skill.groupby(["Algorithm"])["TokenIdx"].max()
number_of_tokens_per_algorithm = number_of_tokens_per_algorithm.reset_index()

# dataframe for number of tokens with no fixation per algorithm per participant
df_no_fixation_per_algorithm = df_metrics_skill.groupby(["Participant", "Algorithm"])\
    .apply(get_no_fixations)\
    .drop(["Algorithm"], axis=1)

# dataframe for number of tokens with only one fixation per algorithm per participant
df_single_fixation_per_algorithm = df_metrics_skill.groupby(["Participant", "Algorithm"])\
    .apply(get_single_fixations)\
    .drop(["Algorithm"], axis=1)

# dataframe for number of tokens with more than one fixation per algorithm per participant
df_multiple_fixation_per_algorithm = df_metrics_skill.groupby(["Participant", "Algorithm"])\
    .apply(get_multiple_fixations)\
    .drop(["Algorithm"], axis=1)

# dataframe for number of tokens with more or equal than one fixation per algorithm per participant
df_fixation_per_algorithm = df_metrics_skill.groupby(["Participant", "Algorithm"])\
    .apply(get_fixations)\
    .drop(["Algorithm"], axis=1)

# Reformat the dataframes
no_fixation_per_algorithm = df_no_fixation_per_algorithm[["TokenIdx"]]
no_fixation_per_algorithm = no_fixation_per_algorithm.reset_index().drop(["level_2"], axis=1)

single_fixation_per_algorithm = df_single_fixation_per_algorithm[["TokenIdx"]]
single_fixation_per_algorithm = single_fixation_per_algorithm.reset_index().drop(["level_2"], axis=1)

multiple_fixation_per_algorithm = df_multiple_fixation_per_algorithm[["TokenIdx"]]
multiple_fixation_per_algorithm = multiple_fixation_per_algorithm.reset_index().drop(["level_2"], axis=1)

fixations_per_algorithm = df_fixation_per_algorithm[["TokenIdx"]]
fixations_per_algorithm = fixations_per_algorithm.reset_index().drop(["level_2"], axis=1)

# Helper Method for Probability Metrics
def group_len_divided_by_number(current_df, counting_df):
    algorithm = current_df["Algorithm"].iloc[0]
    number_of_tokens = counting_df[counting_df["Algorithm"] == algorithm]["TokenIdx"].iloc[0]
    value = len(current_df) / number_of_tokens
    return len(current_df) / (number_of_tokens + 1)

# Calculate the Metrics per Participant
# Probability of no fixation
df_no_fixation_probability = no_fixation_per_algorithm.groupby(["Participant", "Algorithm"]).apply(lambda df: group_len_divided_by_number(df, number_of_tokens_per_algorithm))
df_no_fixation_probability = df_no_fixation_probability.reset_index()

# Probability of single fixation
df_single_fixation_probability = single_fixation_per_algorithm.groupby(["Participant", "Algorithm"]).apply(lambda df: group_len_divided_by_number(df, number_of_tokens_per_algorithm))
df_single_fixation_probability = df_single_fixation_probability.reset_index()

# Probability of multiple fixation
df_multiple_fixation_probability = multiple_fixation_per_algorithm.groupby(["Participant", "Algorithm"]).apply(lambda df: group_len_divided_by_number(df, number_of_tokens_per_algorithm))
df_multiple_fixation_probability = df_multiple_fixation_probability.reset_index()

# Probability of fixation
df_fixation_probability = fixations_per_algorithm.groupby(["Participant", "Algorithm"]).apply(lambda df: group_len_divided_by_number(df, number_of_tokens_per_algorithm))
df_fixation_probability = df_fixation_probability.reset_index()

# Calculate the means for the metrics per algorithm
df_no_fixation_probability = df_no_fixation_probability.groupby(["Participant"]).mean()
df_single_fixation_probability = df_single_fixation_probability.groupby(["Participant"]).mean()
df_multiple_fixation_probability = df_multiple_fixation_probability.groupby(["Participant"]).mean()
df_fixation_probability = df_fixation_probability.groupby(["Participant"]).mean()

# Raw Durations Metrics
# Duration of first fixation
df_first_fixation = df_metrics_skill[~df_metrics_skill["FirstFixationDuration"].isnull()]
df_first_fixation = df_first_fixation.groupby(["Participant"])["FirstFixationDuration"].mean()

# Duration of single fixation
df_single_fixation = df_metrics_skill[~df_metrics_skill["SingleFixationDuration"].isnull()]
df_single_fixation = df_single_fixation.groupby(["Participant"])["SingleFixationDuration"].mean()

# Duration of gaze duration
df_gaze_duration = df_metrics_skill[~df_metrics_skill["GazeDuration"].isnull()]
df_gaze_duration = df_gaze_duration.groupby(["Participant"])["GazeDuration"].mean()

# Total Time
df_total_time = df_metrics_skill[~df_metrics_skill["TotalTime"].isnull()]
df_total_time = df_total_time.groupby(["Participant"])["TotalTime"].mean()



# Put every metric dataframe together into one
df_combined = pd.DataFrame({"FirstFixationDuration": df_first_fixation.values,
                            "SingleFixationDuration": df_single_fixation.values,
                            "GazeDuration": df_gaze_duration.values,
                            "TotalTime": df_total_time.values,
                            "TokenNoFixationProbability": df_no_fixation_probability.values.reshape(37, ),
                            "TokenSingleFixationProbability": df_single_fixation_probability.values.reshape(37, ),
                            "TokenMultipleFixationProbability": df_multiple_fixation_probability.values.reshape(37, ),
                            "TokenFixationProbability": df_fixation_probability.values.reshape(37, ),
                            "Skill": df_metrics_skill.groupby(["Participant"])["SkillScore"].mean().values})
# get spearman correlation for metrics and skill level
df_combined.corrwith(df_combined["Skill"])

FirstFixationDuration              -0.181417
SingleFixationDuration             -0.192109
GazeDuration                       -0.188401
TotalTime                          -0.399142
TokenNoFixationProbability          0.315480
TokenSingleFixationProbability     -0.393085
TokenMultipleFixationProbability   -0.563707
TokenFixationProbability           -0.509573
Skill                               1.000000
dtype: float64

In [14]:
df_combined.describe()

Unnamed: 0,FirstFixationDuration,SingleFixationDuration,GazeDuration,TotalTime,TokenNoFixationProbability,TokenSingleFixationProbability,TokenMultipleFixationProbability,TokenFixationProbability,Skill
count,37.0,37.0,37.0,37.0,37.0,37.0,37.0,37.0,37.0
mean,0.169443,0.17014,0.183969,0.382657,0.754487,0.152598,0.136958,0.285131,0.32523
std,0.031279,0.031822,0.035892,0.094706,0.067375,0.036204,0.050866,0.087936,0.10251
min,0.111995,0.111625,0.119901,0.218404,0.646419,0.046273,0.044758,0.081264,0.14252
25%,0.145967,0.146925,0.157881,0.327738,0.702035,0.141269,0.099161,0.225679,0.259896
50%,0.166088,0.165864,0.183676,0.378384,0.762657,0.151439,0.131585,0.282632,0.315932
75%,0.18728,0.189424,0.205408,0.414835,0.795479,0.174971,0.16877,0.336705,0.379187
max,0.242461,0.246199,0.281058,0.677665,0.94921,0.247443,0.300986,0.563852,0.656726
