In [1]:
import pandas as pd

In [2]:
# A list containing the AOIs considered in the study
AOIS = ['Paragraph1', 'Paragraph2', 'Paragraph3', 'Paragraph4',
       'Paragraph5', 'Paragraph6', 'Text_Area', 'SubFigure1', 'SubFigure2',
       'SubFigure3', 'Figure', 'Formula']

In [3]:
# Read fixation data with AOIs using pandas library
fixationData = pd.read_csv("data/fixationDataWithAOIs.csv")
# set display.max_columns to none, to show all the columns when using head()
pd.set_option('display.max_columns', None)

In [4]:
# 1. Derive AOI visits

In [5]:
# documentation for the aprroach used to group columns by consecutive same value https://towardsdatascience.com/pandas-dataframe-group-by-consecutive-same-values-128913875dba

aoiVisits = pd.DataFrame(columns = ["VisitedAOI","Visit Start","Visit End","Visit Duration","Number of Fixations"])

# Iterate through the different AOIS
for aoi in AOIS:
    
    #  Group by consecutive same value (CSAV) (see "Appendix: exmaple of CSAV" for groups derived using AOI "Text_Area")
    csavGroups = fixationData.groupby((fixationData[aoi].shift() != fixationData[aoi]).cumsum())
    
    # Keep only groups where the AOI is activited (aoiName==1) 
    csavGroups = {key: value for key, value in csavGroups if value[aoi].unique()==[1]}
    
    # compute a set of metrics for each visit (i.e., a group where the AOI is activiated)     
    for key in csavGroups:
        # print(key)
        # display(csavGroups[key])
        aoiVisits = aoiVisits.append({
            "VisitedAOI": aoi,
            "Visit Start": csavGroups[key].iloc[0]["Fixation Start"],
            "Visit End" : csavGroups[key].iloc[-1]["Fixation End"],
            "Visit Duration": csavGroups[key].iloc[-1]["Fixation End"]-csavGroups[key].iloc[0]["Fixation Start"],
            "Number of Fixations": len(csavGroups[key].index)
        },ignore_index=True)

In [6]:
# export aoiVisits to csv
aoiVisits.to_csv("data/aoiVisits.csv",  index=False)

In [None]:
#Appendix: "exmaple of CSAV" for groups derived using AOI "Text_Area"
for k, v in fixationData.groupby((fixationData['Text_Area'].shift() != fixationData['Text_Area']).cumsum()):
    print(f'[group {k}]')
    display(v)

In [None]:
# to remove
# documentation for the aprroach used to group columns by consecutive same value https://towardsdatascience.com/pandas-dataframe-group-by-consecutive-same-values-128913875dba

aoiVisits = pd.DataFrame(columns = ["VisitedAOI","Visit Start","Visit End","Visit Duration","Number of Fixations"])

# Iterate through the different AOIS
for aoi in AOIS:
    
    #  Group by consecutive same value (CSAV) (see "Appendix: exmaple of CSAV" for groups derived using AOI "Text_Area")
    csavGroups = fixationData.groupby((fixationData[aoi].shift() != fixationData[aoi]).cumsum())
    
    # Keep only groups where the AOI is activited (aoiName==1) 
#     csavGroups = {key: value for key, value in csavGroups if value[aoi].unique()==[1]}
    
    # compute a set of metrics for each visit (i.e., a group where the AOI is activiated)     
    for name, group in csavGroups:
        display(group)