## 4 - Area-of-Interest Features Computation

##### Imports

In [3]:
import os
import sys
import pandas as pd
import numpy as np
import scipy as su
from scipy.stats import entropy

from matplotlib.path import Path
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import seaborn as sns

import dask.dataframe as dd

sys.path.append('./libraries/EyeTrackingMetrics')
from Metrics.entropy import GazeEntropy
from transition_matrix.aoi import PolyAOI

##### Data Loading (AoI)

In [39]:
# Load AoI ddfs into dictionary
root_dir = './aoi_data'
def load_aoi_data(root_dir, exception=False): # Get csv file reads into one dictionary
    data = {}
    for file in os.listdir(root_dir):
        file_path = os.path.join(root_dir, file)
        file_name = os.path.splitext(file)[0]  # file name without '.csv'
        data[file_name] = dd.read_csv(file_path)  # read and attach to dict
    return data

data = load_aoi_data(root_dir)
participant_ids = [1,2,4,5,6,7,8,9,10,12,13,14,15,16,17,18,19,20]

In [40]:
# GUI data
gui_ddf = data['gui']
gui_df = gui_ddf.compute()
gui_df['block_data'] = gui_df['block_data'].apply(lambda x: eval(x))

block_type_mapping = {
    1: 1,
    2: 2,
    3: 3,
    4: 4,
    5: 1,
    6: 2,
    7: 3,
    8: 4
}
gui_df['condition'] = gui_df['block_type'].map(block_type_mapping)

In [41]:
# AoI data (task based)
ddf = data['aoi']
df = ddf.compute()
df = df.dropna(subset=['task_id'])

custom_labels = ['Easy Slow', 'Easy Fast', 'Hard Slow', 'Hard Fast']

### 0. Dataset Preparation

In [42]:
# Prepare datasets
# map encoded variables
difficulty_mapping = {
    1: 'Easy',
    2: 'Easy',
    3: 'Hard',
    4: 'Hard'
}
frequency_mapping = {
    1: 'Slow',
    2: 'Fast',
    3: 'Slow',
    4: 'Fast'
}
area_mapping = {
    1: 'Ticket',
    2: 'Description',
    3: 'Map',
    4: 'Diagnostics',
    5: 'Actions'
}

df['difficulty'] = df['condition'].map(difficulty_mapping)
df['frequency'] = df['condition'].map(frequency_mapping)
df['area'] = df['area'].map(area_mapping)
df_long = df[['participant_id', 'difficulty', 'frequency', 'area', 'block_id']].drop_duplicates().reset_index(drop=True)

# sort rows
difficulties = ['Easy', 'Hard']
frequencies = ['Slow', 'Fast']
areas = ['Ticket', 'Description', 'Map', 'Diagnostics', 'Actions']

df_long['difficulty'] = pd.Categorical(df_long['difficulty'], categories=difficulties, ordered=True)
df_long['frequency'] = pd.Categorical(df_long['frequency'], categories=frequencies, ordered=True)
df_long['area'] = pd.Categorical(df_long['area'], categories=areas, ordered=True)

df_long = df_long.sort_values(by=['participant_id', 'difficulty', 'frequency', 'area', 'block_id']).reset_index(drop=True)

### 1. Computation

##### Fixation Duration

In [43]:
# Lists
duration_results = []
for (participant_id, difficulty, frequency, block_id) in df[['participant_id', 'difficulty', 'frequency', 'block_id']].drop_duplicates().values:
    df_block = df[(df['participant_id'] == participant_id) &
                  (df['difficulty'] == difficulty) &
                  (df['frequency'] == frequency) &
                  (df['block_id'] == block_id)]
    
    all_combinations = pd.MultiIndex.from_product([
        df_block['participant_id'].unique(),
        df_block['difficulty'].unique(),
        df_block['frequency'].unique(),
        df_block['area'].unique(),
        df_block['block_id'].unique(),
        df_block['task_id'].unique()
    ], names=['participant_id', 'difficulty', 'frequency', 'area', 'block_id', 'task_id']).to_frame(index=False)

    duration_mean_lists = df_block.groupby(['participant_id', 'difficulty', 'frequency', 'area', 'block_id', 'task_id'])['duration'].mean().reset_index()
    merged_df = pd.merge(all_combinations, duration_mean_lists, on=['participant_id', 'difficulty', 'frequency', 'area', 'block_id', 'task_id'], how='left')

    duration_lists = merged_df.groupby(['participant_id', 'difficulty', 'frequency', 'area', 'block_id'])['duration'].apply(list).reset_index()
    duration_results.append(duration_lists)

df_duration_lists = pd.concat(duration_results, ignore_index=True)
df_long = pd.merge(df_long, df_duration_lists, on=['participant_id', 'difficulty', 'frequency', 'area', 'block_id'], how='left')

##### Fixation Count

In [44]:
# Lists
fix_count_results = []

for (participant_id, difficulty, frequency, block_id) in df[['participant_id', 'difficulty', 'frequency', 'block_id']].drop_duplicates().values:
    
    df_block = df[(df['participant_id'] == participant_id) &
                  (df['difficulty'] == difficulty) &
                  (df['frequency'] == frequency) &
                  (df['block_id'] == block_id)]
    
    all_combinations = pd.MultiIndex.from_product([
        df_block['participant_id'].unique(),
        df_block['difficulty'].unique(),
        df_block['frequency'].unique(),
        df_block['area'].unique(),
        df_block['block_id'].unique(),
        df_block['task_id'].unique()
    ], names=['participant_id', 'difficulty', 'frequency', 'area', 'block_id', 'task_id']).to_frame(index=False)

    area_condition_counts = df_block.groupby(['participant_id', 'area', 'difficulty', 'frequency', 'block_id', 'task_id']).size().reset_index(name='fix_count')
    merged_df = pd.merge(all_combinations, area_condition_counts, on=['participant_id', 'area', 'difficulty', 'frequency', 'block_id', 'task_id'], how='left')

    fix_count_lists = merged_df.groupby(['participant_id', 'difficulty', 'frequency', 'area', 'block_id'])['fix_count'].apply(list).reset_index()
    fix_count_results.append(fix_count_lists)

df_fix_count_lists = pd.concat(fix_count_results, ignore_index=True)

df_long = pd.merge(df_long, df_fix_count_lists, on=['participant_id', 'difficulty', 'frequency', 'area', 'block_id'], how='left')

##### Fixation Frequency

In [45]:
# Lists
fix_freq_results = []

for (participant_id, difficulty, frequency, block_id) in df[['participant_id', 'difficulty', 'frequency', 'block_id']].drop_duplicates().values:
    
    df_block = df[(df['participant_id'] == participant_id) &
                  (df['difficulty'] == difficulty) &
                  (df['frequency'] == frequency) &
                  (df['block_id'] == block_id)]
    
    all_combinations = pd.MultiIndex.from_product([
        df_block['participant_id'].unique(),
        df_block['difficulty'].unique(),
        df_block['frequency'].unique(),
        df_block['area'].unique(),
        df_block['block_id'].unique(),
        df_block['task_id'].unique()
    ], names=['participant_id', 'difficulty', 'frequency', 'area', 'block_id', 'task_id']).to_frame(index=False)

    fixation_stats = df_block.groupby(['participant_id', 'area', 'difficulty', 'frequency', 'block_id', 'task_id']).agg(
        fixations=('fixation_id', 'count'),
    ).reset_index()

    merged_stats = pd.merge(fixation_stats, gui_df[['participant_id', 'block_id', 'task_id', 'start_timestamp', 'end_timestamp']],
                            on=['participant_id', 'block_id', 'task_id'], how='left')
    merged_stats['task_duration'] = (merged_stats['end_timestamp'] - merged_stats['start_timestamp'])
    merged_stats['fix_freq'] = merged_stats['fixations'] / merged_stats['task_duration']

    merged_df = pd.merge(all_combinations, merged_stats, on=['participant_id', 'area', 'difficulty', 'frequency', 'block_id', 'task_id'], how='left')

    fix_freq_lists = merged_df.groupby(['participant_id', 'difficulty', 'frequency', 'area', 'block_id'])['fix_freq'].apply(list).reset_index()
    fix_freq_results.append(fix_freq_lists)

df_fix_freq_lists = pd.concat(fix_freq_results, ignore_index=True)
df_long = pd.merge(df_long, df_fix_freq_lists, on=['participant_id', 'difficulty', 'frequency', 'area', 'block_id'], how='left')

##### Time to First Fixation

In [46]:
# Lists
ttff_results = []

for (participant_id, difficulty, frequency, block_id) in df[['participant_id', 'difficulty', 'frequency', 'block_id']].drop_duplicates().values:
    
    df_block = df[(df['participant_id'] == participant_id) &
                  (df['difficulty'] == difficulty) &
                  (df['frequency'] == frequency) &
                  (df['block_id'] == block_id)]
    
    all_combinations = pd.MultiIndex.from_product([
        df_block['participant_id'].unique(),
        df_block['difficulty'].unique(),
        df_block['frequency'].unique(),
        df_block['area'].unique(),
        df_block['block_id'].unique(),
        df_block['task_id'].unique()
    ], names=['participant_id', 'difficulty', 'frequency', 'area', 'block_id', 'task_id']).to_frame(index=False)

    merged_df = pd.merge(df_block, gui_df[['participant_id', 'condition', 'block_id', 'task_id', 'start_timestamp']], 
                         on=['participant_id', 'condition', 'block_id', 'task_id'], 
                         suffixes=('_fixation', '_task'))

    merged_df = merged_df.sort_values(by=['participant_id', 'condition', 'block_id', 'task_id', 'area', 'start_timestamp_fixation'])

    first_fixations = merged_df.drop_duplicates(subset=['participant_id', 'condition', 'block_id', 'task_id', 'area'], keep='first')
    first_fixations['time_1st_fix'] = (first_fixations['start_timestamp_fixation'] - first_fixations['start_timestamp_task'])

    merged_ttff_df = pd.merge(all_combinations, first_fixations, on=['participant_id', 'area', 'difficulty', 'frequency', 'block_id', 'task_id'], how='left')

    ttff_lists = merged_ttff_df.groupby(['participant_id', 'difficulty', 'frequency', 'area', 'block_id'])['time_1st_fix'].apply(list).reset_index()
    ttff_results.append(ttff_lists)

df_ttff_lists = pd.concat(ttff_results, ignore_index=True)
df_long = pd.merge(df_long, df_ttff_lists, on=['participant_id', 'difficulty', 'frequency', 'area', 'block_id'], how='left')


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  first_fixations['time_1st_fix'] = (first_fixations['start_timestamp_fixation'] - first_fixations['start_timestamp_task'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  first_fixations['time_1st_fix'] = (first_fixations['start_timestamp_fixation'] - first_fixations['start_timestamp_task'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing

##### Visit Count

In [47]:
# Lists
visit_count_results = []

for (participant_id, difficulty, frequency, block_id) in df[['participant_id', 'difficulty', 'frequency', 'block_id']].drop_duplicates().values:
    
    df_block = df[(df['participant_id'] == participant_id) &
                  (df['difficulty'] == difficulty) &
                  (df['frequency'] == frequency) &
                  (df['block_id'] == block_id)]
    
    all_combinations = pd.MultiIndex.from_product([
        df_block['participant_id'].unique(),
        df_block['difficulty'].unique(),
        df_block['frequency'].unique(),
        df_block['area'].unique(),
        df_block['block_id'].unique(),
        df_block['task_id'].unique()
    ], names=['participant_id', 'difficulty', 'frequency', 'area', 'block_id', 'task_id']).to_frame(index=False)

    df_block['visit_id'] = (df_block['area'] != df_block['area'].shift(1)).cumsum()

    visit_counts = df_block.groupby(['participant_id', 'difficulty', 'frequency', 'area', 'block_id', 'task_id', 'visit_id']).size().reset_index(name='fixation_count')
    visit_summary = visit_counts.groupby(['participant_id', 'difficulty', 'frequency', 'area', 'block_id', 'task_id']).size().reset_index(name='visit_count')

    merged_visit_df = pd.merge(all_combinations, visit_summary, on=['participant_id', 'difficulty', 'frequency', 'area', 'block_id', 'task_id'], how='left')

    visit_lists = merged_visit_df.groupby(['participant_id', 'difficulty', 'frequency', 'area', 'block_id'])['visit_count'].apply(list).reset_index()
    visit_count_results.append(visit_lists)

df_visit_count_lists = pd.concat(visit_count_results, ignore_index=True)
df_long = pd.merge(df_long, df_visit_count_lists, on=['participant_id', 'difficulty', 'frequency', 'area', 'block_id'], how='left')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_block['visit_id'] = (df_block['area'] != df_block['area'].shift(1)).cumsum()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_block['visit_id'] = (df_block['area'] != df_block['area'].shift(1)).cumsum()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_block['visit_id'] = (df_block['area'] != d

##### Visit Frequency

In [48]:
# Lists
visit_freq_results = []

for (participant_id, difficulty, frequency, block_id) in df[['participant_id', 'difficulty', 'frequency', 'block_id']].drop_duplicates().values:
    
    df_block = df[(df['participant_id'] == participant_id) &
                  (df['difficulty'] == difficulty) &
                  (df['frequency'] == frequency) &
                  (df['block_id'] == block_id)]
    
    all_combinations = pd.MultiIndex.from_product([
        df_block['participant_id'].unique(),
        df_block['difficulty'].unique(),
        df_block['frequency'].unique(),
        df_block['area'].unique(),
        df_block['block_id'].unique(),
        df_block['task_id'].unique()
    ], names=['participant_id', 'difficulty', 'frequency', 'area', 'block_id', 'task_id']).to_frame(index=False)

    df_block['visit_id'] = (df_block['area'] != df_block['area'].shift(1)).cumsum()
    visit_counts = df_block.groupby(['participant_id', 'difficulty', 'frequency', 'area', 'block_id', 'task_id', 'visit_id']).size().reset_index(name='fixation_count')
    visit_summary = visit_counts.groupby(['participant_id', 'difficulty', 'frequency', 'area', 'block_id', 'task_id']).size().reset_index(name='visit_count')

    merged_stats = pd.merge(visit_summary, gui_df[['participant_id', 'block_id', 'task_id', 'start_timestamp', 'end_timestamp']],
                            on=['participant_id', 'block_id', 'task_id'], how='left')
    merged_stats['task_duration'] = (merged_stats['end_timestamp'] - merged_stats['start_timestamp'])
    merged_stats['visit_freq'] = merged_stats['visit_count'] / merged_stats['task_duration']

    merged_df = pd.merge(all_combinations, merged_stats, on=['participant_id', 'area', 'difficulty', 'frequency', 'block_id', 'task_id'], how='left')

    visit_freq_lists = merged_df.groupby(['participant_id', 'difficulty', 'frequency', 'area', 'block_id'])['visit_freq'].apply(list).reset_index()
    visit_freq_results.append(visit_freq_lists)

df_visit_freq_lists = pd.concat(visit_freq_results, ignore_index=True)
df_long = pd.merge(df_long, df_visit_freq_lists, on=['participant_id', 'difficulty', 'frequency', 'area', 'block_id'], how='left')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_block['visit_id'] = (df_block['area'] != df_block['area'].shift(1)).cumsum()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_block['visit_id'] = (df_block['area'] != df_block['area'].shift(1)).cumsum()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_block['visit_id'] = (df_block['area'] != d

##### Visit Length

In [49]:
# Lists
visit_len_results = []

for (participant_id, difficulty, frequency, block_id) in df[['participant_id', 'difficulty', 'frequency', 'block_id']].drop_duplicates().values:
    
    df_block = df[(df['participant_id'] == participant_id) &
                  (df['difficulty'] == difficulty) &
                  (df['frequency'] == frequency) &
                  (df['block_id'] == block_id)]
    
    all_combinations = pd.MultiIndex.from_product([
        df_block['participant_id'].unique(),
        df_block['difficulty'].unique(),
        df_block['frequency'].unique(),
        df_block['area'].unique(),
        df_block['block_id'].unique(),
        df_block['task_id'].unique()
    ], names=['participant_id', 'difficulty', 'frequency', 'area', 'block_id', 'task_id']).to_frame(index=False)

    df_block['end_timestamp'] = df_block['start_timestamp'] + (df_block['duration'] / 1000)
    df_block['visit_id'] = (df_block['area'] != df_block['area'].shift(1)).cumsum()

    visit_details = df_block.groupby(['participant_id', 'difficulty', 'frequency', 'area', 'block_id', 'task_id', 'visit_id']).agg(
        start_time=('start_timestamp', 'first'),
        end_time=('end_timestamp', 'last')
    ).reset_index()

    visit_details['visit_len'] = visit_details['end_time'] - visit_details['start_time']

    len_mean_lists = visit_details.groupby(['participant_id', 'area', 'difficulty', 'frequency', 'block_id', 'task_id'])['visit_len'].mean().reset_index()

    merged_visit_len_df = pd.merge(all_combinations, len_mean_lists, on=['participant_id', 'area', 'difficulty', 'frequency', 'block_id', 'task_id'], how='left')

    len_lists = merged_visit_len_df.groupby(['participant_id', 'difficulty', 'frequency', 'area', 'block_id'])['visit_len'].apply(list).reset_index()
    visit_len_results.append(len_lists)

df_visit_len_lists = pd.concat(visit_len_results, ignore_index=True)
df_long = pd.merge(df_long, df_visit_len_lists, on=['participant_id', 'difficulty', 'frequency', 'area', 'block_id'], how='left')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_block['end_timestamp'] = df_block['start_timestamp'] + (df_block['duration'] / 1000)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_block['visit_id'] = (df_block['area'] != df_block['area'].shift(1)).cumsum()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_block['end_timestamp'] = df_block[

##### Dwell Time Percentage

In [50]:
# Lists
dwell_pct_results = []

for (participant_id, difficulty, frequency, block_id) in df[['participant_id', 'difficulty', 'frequency', 'block_id']].drop_duplicates().values:
    
    df_block = df[(df['participant_id'] == participant_id) &
                  (df['difficulty'] == difficulty) &
                  (df['frequency'] == frequency) &
                  (df['block_id'] == block_id)]
    
    all_combinations = pd.MultiIndex.from_product([
        df_block['participant_id'].unique(),
        df_block['difficulty'].unique(),
        df_block['frequency'].unique(),
        df_block['area'].unique(),
        df_block['block_id'].unique(),
        df_block['task_id'].unique()
    ], names=['participant_id', 'difficulty', 'frequency', 'area', 'block_id', 'task_id']).to_frame(index=False)

    df_block['end_timestamp'] = df_block['start_timestamp'] + (df_block['duration'] / 1000)
    df_block['visit_id'] = (df_block['area'] != df_block['area'].shift(1)).cumsum()

    visit_details = df_block.groupby(['participant_id', 'difficulty', 'frequency', 'area', 'block_id', 'task_id', 'visit_id']).agg(
        start_time=('start_timestamp', 'first'),
        end_time=('end_timestamp', 'last')
    ).reset_index()

    visit_details['visit_len'] = visit_details['end_time'] - visit_details['start_time']

    total_visit_details = visit_details.groupby(['participant_id', 'difficulty', 'frequency', 'area', 'block_id', 'task_id']).agg(
        dwell_pct=('visit_len', 'sum')
    ).reset_index()

    total_lengths = total_visit_details.groupby(['participant_id', 'difficulty', 'frequency', 'block_id', 'task_id'])['dwell_pct'].transform('sum')
    total_visit_details['dwell_pct'] = (total_visit_details['dwell_pct'] / total_lengths) * 100

    merged_dwell_pct_df = pd.merge(all_combinations, total_visit_details, on=['participant_id', 'difficulty', 'frequency', 'area', 'block_id', 'task_id'], how='left')

    dwell_lists = merged_dwell_pct_df.groupby(['participant_id', 'difficulty', 'frequency', 'area', 'block_id'])['dwell_pct'].apply(list).reset_index()
    dwell_pct_results.append(dwell_lists)

df_dwell_pct_lists = pd.concat(dwell_pct_results, ignore_index=True)
df_long = pd.merge(df_long, df_dwell_pct_lists, on=['participant_id', 'difficulty', 'frequency', 'area', 'block_id'], how='left')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_block['end_timestamp'] = df_block['start_timestamp'] + (df_block['duration'] / 1000)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_block['visit_id'] = (df_block['area'] != df_block['area'].shift(1)).cumsum()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_block['end_timestamp'] = df_block[

##### Stationary and Transition Entropy

In [51]:
# Prepare
screen_dim = (1, 1)
aoi_dict = {
    'Ticket':       PolyAOI(screen_dim, [(.88, 1), (1, 1), (1, 0), (.88, 0)]),
    'Description':  PolyAOI(screen_dim, [(.515, 1), (.88, 1), (.88, .39), (.515, .39)]),
    'Map':          PolyAOI(screen_dim, [(0, 1), (.515, 1), (.515, .5), (0, .5)]),
    'Diagnostics':  PolyAOI(screen_dim, [(0, .5), (.515, .5), (.515, 0), (0, 0)]),
    'Action':       PolyAOI(screen_dim, [(.515, .39), (.88, .39), (.88, 0), (.515, 0)])
}


In [52]:
# Compute Entropy metrics
results = []
grouped = df.groupby(['participant_id', 'difficulty', 'frequency', 'block_id', 'task_id'])

for name, group in grouped:
    fix_data = list(zip(group['norm_pos_x'], group['norm_pos_y'], group['duration']))
    
    stat_entropy = GazeEntropy(screen_dim, aoi_dict, fix_data, entropy='stationary').compute()
    trans_entropy = GazeEntropy(screen_dim, aoi_dict, fix_data, entropy='transition').compute()
    
    results.append({
        'participant_id': name[0],
        'difficulty': name[1],
        'frequency': name[2],
        'block_id': name[3],
        'task_id': name[4],
        'stat_entropy': stat_entropy,
        'trans_entropy': trans_entropy
    })

entropy_df = pd.DataFrame(results)

In [53]:
# Static entropy Lists
stat_entropy_results = []

for (participant_id, difficulty, frequency, block_id) in df[['participant_id', 'difficulty', 'frequency', 'block_id']].drop_duplicates().values: 
    df_block = df[(df['participant_id'] == participant_id) &
                  (df['difficulty'] == difficulty) &
                  (df['frequency'] == frequency) &
                  (df['block_id'] == block_id)]
    
    all_combinations = pd.MultiIndex.from_product([
        df_block['participant_id'].unique(),
        df_block['difficulty'].unique(),
        df_block['frequency'].unique(),
        df_block['block_id'].unique(),
        df_block['task_id'].unique()
    ], names=['participant_id', 'difficulty', 'frequency', 'block_id', 'task_id']).to_frame(index=False)

    merged_entropy_df = pd.merge(all_combinations, entropy_df, on=['participant_id', 'difficulty', 'frequency', 'block_id', 'task_id'], how='left')

    stat_entropy_lists = merged_entropy_df.groupby(['participant_id', 'difficulty', 'frequency', 'block_id'])['stat_entropy'].apply(list).reset_index()
    stat_entropy_results.append(stat_entropy_lists)

df_stat_entropy_lists = pd.concat(stat_entropy_results, ignore_index=True)
df_long = pd.merge(df_long, df_stat_entropy_lists, on=['participant_id', 'difficulty', 'frequency', 'block_id'], how='left')

In [54]:
# Transition entropy Lists
trans_entropy_results = []

for (participant_id, difficulty, frequency, block_id) in df[['participant_id', 'difficulty', 'frequency', 'block_id']].drop_duplicates().values:
    df_block = df[(df['participant_id'] == participant_id) &
                  (df['difficulty'] == difficulty) &
                  (df['frequency'] == frequency) &
                  (df['block_id'] == block_id)]
    
    all_combinations = pd.MultiIndex.from_product([
        df_block['participant_id'].unique(),
        df_block['difficulty'].unique(),
        df_block['frequency'].unique(),
        df_block['block_id'].unique(),
        df_block['task_id'].unique()
    ], names=['participant_id', 'difficulty', 'frequency', 'block_id', 'task_id']).to_frame(index=False)

    merged_entropy_df = pd.merge(all_combinations, entropy_df, on=['participant_id', 'difficulty', 'frequency', 'block_id', 'task_id'], how='left')

    trans_entropy_lists = merged_entropy_df.groupby(['participant_id', 'difficulty', 'frequency', 'block_id'])['trans_entropy'].apply(list).reset_index()
    trans_entropy_results.append(trans_entropy_lists)

df_trans_entropy_lists = pd.concat(trans_entropy_results, ignore_index=True)
df_long = pd.merge(df_long, df_trans_entropy_lists, on=['participant_id', 'difficulty', 'frequency', 'block_id'], how='left')

### 2. Export Dataset

In [55]:
# Code instance
df_long['instance'] = df_long.groupby(['participant_id', 'difficulty', 'frequency', 'area'], observed=True).cumcount() + 1
df_long = pd.concat([df_long.iloc[:, :4], df_long.iloc[:, -1], df_long.iloc[:, 5:-1]], axis=1)

In [56]:
# Export
df_long.to_csv('./aoi_data/aoi_features.csv', index=False)