In [110]:
import pandas as pd

In [111]:
# A list containing the AOIs considered in the study  --- TO UPDATE IF YOU UPDATE THE AOIs
AOIS = ['Paragraph1', 'Paragraph2', 'Paragraph3', 'Paragraph4',
       'Paragraph5', 'Paragraph6', 'Text_Area', 'SubFigure1', 'SubFigure2',
       'SubFigure3', 'Figure', 'Formula']

In [112]:
# Read fixation and saccades data with AOIs using pandas library
fixationAndSaccadeData = pd.read_csv("data/fixationAndSaccadesDataWithAOIs.csv")
# set display.max_columns to none, to show all the columns when using head()
pd.set_option('display.max_columns', None)

In [113]:
# Preview fixationAndSaccadeData
fixationAndSaccadeData.head()

Unnamed: 0,Respondent,FixID,Fixation X,Fixation Y,Fixation Start,Fixation End,Fixation Duration,Fixation Dispersion,SacID,Saccade Start,Saccade End,Saccade Duration,Saccade Amplitude,Saccade Peak Velocity,Saccade Peak Acceleration,Saccade Peak Deceleration,Saccade Direction,Paragraph1,Paragraph2,Paragraph3,Paragraph4,Paragraph5,Paragraph6,Text_Area,SubFigure1,SubFigure2,SubFigure3,Figure,Formula,Timestamp
0,Anonymous 14-11-22 09h35m,1.0,947.656,491.5955,133.1086,358.082,224.9734,0.3205,,,,,,,,,,0,0,0,1,0,0,1,0,0,0,0,0,137.2724
1,Anonymous 14-11-22 09h35m,,,,,,,,1.0,358.082,433.0785,74.9965,5.6844,148.4457,9324.2426,-9634.6219,203.9321,0,0,0,1,0,0,1,0,0,0,0,0,362.246
2,Anonymous 14-11-22 09h35m,,,,,,,,2.0,474.7339,491.4003,16.6664,0.6259,59.9309,4718.1428,-1362.159,84.5597,0,0,0,0,0,0,1,0,0,0,0,0,478.899
3,Anonymous 14-11-22 09h35m,,,,,,,,3.0,516.3954,566.4076,50.0122,6.6098,200.9994,8268.0361,-10127.0766,225.0,0,0,0,0,0,0,1,0,0,0,0,0,520.561
4,Anonymous 14-11-22 09h35m,2.0,460.5417,172.5694,566.4076,716.4244,150.0168,0.1536,,,,,,,,,,1,0,0,0,0,0,1,0,0,0,0,0,570.5892


In [114]:
#----------------------------------------------------------------------------------------
#
# 1. Fixation measures at stimulus and AOI levels
#
#----------------------------------------------------------------------------------------

In [115]:
#filter out data where FixID is NaN
fixationData = fixationAndSaccadeData[fixationAndSaccadeData['FixID'].notnull()].copy(deep=True)

In [116]:
# Preview fixationData
fixationData.head()

Unnamed: 0,Respondent,FixID,Fixation X,Fixation Y,Fixation Start,Fixation End,Fixation Duration,Fixation Dispersion,SacID,Saccade Start,Saccade End,Saccade Duration,Saccade Amplitude,Saccade Peak Velocity,Saccade Peak Acceleration,Saccade Peak Deceleration,Saccade Direction,Paragraph1,Paragraph2,Paragraph3,Paragraph4,Paragraph5,Paragraph6,Text_Area,SubFigure1,SubFigure2,SubFigure3,Figure,Formula,Timestamp
0,Anonymous 14-11-22 09h35m,1.0,947.656,491.5955,133.1086,358.082,224.9734,0.3205,,,,,,,,,,0,0,0,1,0,0,1,0,0,0,0,0,137.2724
4,Anonymous 14-11-22 09h35m,2.0,460.5417,172.5694,566.4076,716.4244,150.0168,0.1536,,,,,,,,,,1,0,0,0,0,0,1,0,0,0,0,0,570.5892
6,Anonymous 14-11-22 09h35m,3.0,214.2487,168.4102,783.036,899.6993,116.6633,0.3027,,,,,,,,,,0,0,0,0,0,0,0,0,0,0,0,0,787.2013
8,Anonymous 14-11-22 09h35m,4.0,210.2339,200.25,908.0385,1166.3355,258.2971,0.2904,,,,,,,,,,1,0,0,0,0,0,1,0,0,0,0,0,912.2187
10,Anonymous 14-11-22 09h35m,5.0,288.837,189.2826,1182.9961,1374.6494,191.6533,0.1624,,,,,,,,,,1,0,0,0,0,0,1,0,0,0,0,0,1187.1572


In [117]:
allAOIFixationStats = None

# Metrics for the whole stimulus

# Compute different aggregations for Fixation-based metrics
stats = fixationData.agg({
                            # Number of Fixations (Numerosity measure, cf. documentation/ETmetrics.csv)
                            'FixID':['count'],
                            # Fixation Duration
                                # Sum: Total Fixation Duration (Position measure, cf. documentation/ETmetrics.csv)
                                # Mean: Average Fixation duration (Position measure, cf. documentation/ETmetrics.csv)
                                # Min: Minimum Fixation duration (Position measure, cf. documentation/ETmetrics.csv)
                                # Max: Maximum Fixation duration (Position measure, cf. documentation/ETmetrics.csv)
                            'Fixation Duration':['sum','mean','min','max']
                         }).unstack().to_frame().dropna().T

# edit stats formatting (by flattening it) to enable concating with the upcoming mesures
stats.columns = ['_'.join(x) for x in list(zip(stats.columns.get_level_values(0), stats.columns.get_level_values(1)))]
# rename columns
stats = stats.rename(columns={"FixID_count": "Number of Fixations", 
                              "Fixation Duration_sum": "Total Fixation Duration",
                              "Fixation Duration_mean": "Average Fixation Duration",
                              "Fixation Duration_min": "Minimum Fixation Duration",
                              "Fixation Duration_max": "Maximum Fixation Duration",})

stats.insert(0, 'AOI', "Whole Stimulus")

allAOIFixationStats= stats

In [118]:
# Display the metrics for all AOIs aggregated
display(allAOIFixationStats)

Unnamed: 0,AOI,Number of Fixations,Total Fixation Duration,Average Fixation Duration,Minimum Fixation Duration,Maximum Fixation Duration
0,Whole Stimulus,2401.0,675798.4818,281.465423,66.5901,2691.3525


In [119]:
# Metrics for individual AOIs

# Iterate through the different AOIS
for aoi in AOIS:
    # Compute different aggregations for Fixation metrics 
    stats = fixationData[fixationData[aoi]==1].groupby(aoi).agg({
                            # Number of Fixations (Numerosity measure, cf. documentation/ETmetrics.csv)
                            'FixID':['count'],
                            # Fixation Duration
                                # Sum: Total Fixation Duration (Position measure, cf. documentation/ETmetrics.csv)
                                # Mean: Average Fixation duration (Position measure, cf. documentation/ETmetrics.csv)
                                # Min: Minimum Fixation duration (Position measure, cf. documentation/ETmetrics.csv)
                                # Max: Maximum Fixation duration (Position measure, cf. documentation/ETmetrics.csv)
                            'Fixation Duration':['sum','mean','min','max']
                         })
    
    # Flatten dataframe
    stats.columns= ['_'.join(x) for x in list(zip(stats.columns.get_level_values(0), stats.columns.get_level_values(1)))]
    
    # Rename columns
    stats = stats.rename(columns={
                              aoi:"AOI",
                              "FixID_count": "Number of Fixations", 
                              "Fixation Duration_sum": "Total Fixation Duration",
                              "Fixation Duration_mean": "Average Fixation Duration",
                              "Fixation Duration_min": "Minimum Fixation Duration",
                              "Fixation Duration_max": "Maximum Fixation Duration"})

    # Set the value of "AOI" to aoi     
    stats["AOI"] = aoi
    
    # Concate
    allAOIFixationStats = pd.concat([allAOIFixationStats, stats], axis=0)
    
# Reset_index for allAOIFixationStats
allAOIFixationStats = allAOIFixationStats.reset_index(drop=True)

In [120]:
# Display the metrics for all AOIs individually and aggregated
display(allAOIFixationStats)

Unnamed: 0,AOI,Number of Fixations,Total Fixation Duration,Average Fixation Duration,Minimum Fixation Duration,Maximum Fixation Duration
0,Whole Stimulus,2401.0,675798.4818,281.465423,66.5901,2691.3525
1,Paragraph1,203.0,66376.2517,326.976609,66.6578,1441.5283
2,Paragraph2,244.0,85532.2805,350.542133,74.9628,2149.7729
3,Paragraph3,213.0,63726.3356,299.184674,74.9272,1708.1264
4,Paragraph4,124.0,30013.2186,242.042085,66.68,858.2667
5,Paragraph5,218.0,51861.0532,237.894739,66.632,824.9098
6,Paragraph6,244.0,48284.7904,197.888485,66.6155,566.5896
7,Text_Area,1294.0,353859.8277,273.462,66.6155,2149.7729
8,SubFigure1,242.0,71916.8251,297.176963,66.6569,2691.3525
9,SubFigure2,463.0,149049.7881,321.921789,66.5901,1866.4728


In [121]:
# export allAOIFixationStats to csv 
allAOIFixationStats.to_csv("data/aoiFixationStats.csv", index=False)

In [122]:
#----------------------------------------------------------------------------------------
#
# 2. Saccade measures at stimulus and AOI levels 
#
#----------------------------------------------------------------------------------------

In [123]:
#filter out data where SacID is NaN
SaccadeData = fixationAndSaccadeData[fixationAndSaccadeData['SacID'].notnull()].copy(deep=True)

In [124]:
# Preview Saccade data
SaccadeData.head()

Unnamed: 0,Respondent,FixID,Fixation X,Fixation Y,Fixation Start,Fixation End,Fixation Duration,Fixation Dispersion,SacID,Saccade Start,Saccade End,Saccade Duration,Saccade Amplitude,Saccade Peak Velocity,Saccade Peak Acceleration,Saccade Peak Deceleration,Saccade Direction,Paragraph1,Paragraph2,Paragraph3,Paragraph4,Paragraph5,Paragraph6,Text_Area,SubFigure1,SubFigure2,SubFigure3,Figure,Formula,Timestamp
1,Anonymous 14-11-22 09h35m,,,,,,,,1.0,358.082,433.0785,74.9965,5.6844,148.4457,9324.2426,-9634.6219,203.9321,0,0,0,1,0,0,1,0,0,0,0,0,362.246
2,Anonymous 14-11-22 09h35m,,,,,,,,2.0,474.7339,491.4003,16.6664,0.6259,59.9309,4718.1428,-1362.159,84.5597,0,0,0,0,0,0,1,0,0,0,0,0,478.899
3,Anonymous 14-11-22 09h35m,,,,,,,,3.0,516.3954,566.4076,50.0122,6.6098,200.9994,8268.0361,-10127.0766,225.0,0,0,0,0,0,0,1,0,0,0,0,0,520.561
5,Anonymous 14-11-22 09h35m,,,,,,,,4.0,716.4244,783.036,66.6117,4.2411,134.113,10094.7041,-10843.8199,174.144,0,0,0,0,0,0,1,0,0,0,0,0,720.5678
7,Anonymous 14-11-22 09h35m,,,,,,,,5.0,899.6993,908.0385,8.3391,,33.4801,548.5719,,,1,0,0,0,0,0,1,0,0,0,0,0,903.8582


In [125]:
allAOISaccadeStats = None

# Metrics for the whole stimulus

# Compute different aggregations for saccade metrics 
stats = SaccadeData.agg({
                            # Number of Saccades (Numerosity measure, cf. documentation/ETmetrics.csv)
                            'SacID':['count'],
                            # Saccade Duration
                             # Sum: Total Saccade Duration (movement measure, cf. documentation/ETmetrics.csv)
                             # Mean: Average Saccade Duration (movement measure, cf. documentation/ETmetrics.csv)
                            'Saccade Duration':['sum','mean'], 
                             # Saccade Amplitude
                              # Sum: Total Saccade Amplitude (movement measure, cf. documentation/ETmetrics.csv)
                              # Mean: Average Saccade Amplitude (movement measure, cf. documentation/ETmetrics.csv)                                
                            'Saccade Amplitude':['sum','mean'], 
                             # Saccade Peak Velocity
                              # Mean: Average Peak Velocity of Saccades (movement measure, cf. documentation/ETmetrics.csv)
                            'Saccade Peak Velocity':['mean'], 
    
                            # Other metrics
                            #'Saccade Peak Acceleration':['sum','mean','max','min'],  
                            #'Saccade Peak Deceleration':['sum','mean','max','min'], 
                            # 'Saccade Direction':['mean'],
                         }).unstack().to_frame().dropna().T

# edit stats formatting (by flattening it) to enable concating with the upcoming mesures
stats.columns = ['_'.join(x) for x in list(zip(stats.columns.get_level_values(0), stats.columns.get_level_values(1)))]

# Rename columns
stats = stats.rename(columns={"SacID_count": "Number of Saccades", 
                              "Saccade Duration_sum": "Total Saccade Duration",
                              "Saccade Duration_mean": "Average Saccade Duration",
                              "Saccade Amplitude_sum": "Total Saccade Amplitude",
                              "Saccade Amplitude_mean": "Average Saccade Amplitude",
                              "Saccade Peak Velocity_mean": "Average Peak Velocity of Saccades",                             
                             })
                        
stats.insert(0, 'AOI', "Whole Stimulus")

allAOISaccadeStats= stats

In [126]:
# Display the metrics for all AOIs aggregated
display(allAOISaccadeStats)

Unnamed: 0,AOI,Number of Saccades,Total Saccade Duration,Average Saccade Duration,Total Saccade Amplitude,Average Saccade Amplitude,Average Peak Velocity of Saccades
0,Whole Stimulus,3551.0,107687.199,30.32588,7165.5868,2.49672,111.326902


In [127]:
# Metrics for individual AOIs

# Iterate through the different AOIS
for aoi in AOIS:
    # Compute different aggregations for different metrics 
    stats = SaccadeData[SaccadeData[aoi]==1].groupby(aoi).agg({
                            # Number of Saccades (Numerosity measure, cf. documentation/ETmetrics.csv)
                            'SacID':['count'],
                            # Saccade Duration
                             # Sum: Total Saccade Duration (movement measure, cf. documentation/ETmetrics.csv)
                             # Mean: Average Saccade Duration (movement measure, cf. documentation/ETmetrics.csv)
                            'Saccade Duration':['sum','mean'], 
                             # Saccade Amplitude
                              # Sum: Total Saccade Amplitude (movement measure, cf. documentation/ETmetrics.csv)
                              # Mean: Average Saccade Amplitude (movement measure, cf. documentation/ETmetrics.csv)                                
                            'Saccade Amplitude':['sum','mean'], 
                             # Saccade Peak Velocity
                              # Mean: Average Peak Velocity of Saccades (movement measure, cf. documentation/ETmetrics.csv)
                            'Saccade Peak Velocity':['mean'], 
    
                            # Other metrics
                            #'Saccade Peak Acceleration':['sum','mean','max','min'],  
                            #'Saccade Peak Deceleration':['sum','mean','max','min'], 
                            # 'Saccade Direction':['mean'],
                         })
    
    # Flatten dataframe
    stats.columns= ['_'.join(x) for x in list(zip(stats.columns.get_level_values(0), stats.columns.get_level_values(1)))]
    
    # rename columns
    stats = stats.rename(columns={
                              aoi:"AOI",
                              "SacID_count": "Number of Saccades", 
                              "Saccade Duration_sum": "Total Saccade Duration",
                              "Saccade Duration_mean": "Average Saccade Duration",
                              "Saccade Amplitude_sum": "Total Saccade Amplitude",
                              "Saccade Amplitude_mean": "Average Saccade Amplitude",
                              "Saccade Peak Velocity_mean": "Average Peak Velocity of Saccades",                             
                             })
    
    # Set the value of "AOI" to aoi     
    stats["AOI"] = aoi
    
    #concate
    allAOISaccadeStats = pd.concat([allAOISaccadeStats, stats], axis=0)
    
# reset_index for allAOISaccadeStats
allAOISaccadeStats = allAOISaccadeStats.reset_index(drop=True)

In [128]:
# Display the metrics for all AOIs individually and aggregated
display(allAOISaccadeStats)

#Keep only a few measures -- explain them - pointer to the book

Unnamed: 0,AOI,Number of Saccades,Total Saccade Duration,Average Saccade Duration,Total Saccade Amplitude,Average Saccade Amplitude,Average Peak Velocity of Saccades
0,Whole Stimulus,3551.0,107687.199,30.32588,7165.5868,2.49672,111.326902
1,Paragraph1,213.0,5290.8053,24.839462,394.4119,2.32007,97.091374
2,Paragraph2,274.0,7515.6099,27.429233,425.8572,1.827713,88.716774
3,Paragraph3,221.0,5857.611,26.505027,395.5683,2.138207,92.232754
4,Paragraph4,214.0,6365.8652,29.747034,318.3188,1.788308,91.06223
5,Paragraph5,269.0,7565.2901,28.123755,437.5284,1.997847,99.130168
6,Paragraph6,437.0,14641.9754,33.505665,785.1962,2.249846,119.11388
7,Text_Area,1839.0,57577.8148,31.309307,4061.7672,2.659965,116.547659
8,SubFigure1,302.0,8056.9408,26.678612,382.625,1.693031,89.199102
9,SubFigure2,667.0,17756.5296,26.621484,689.8508,1.342122,86.303837


In [129]:
# export allAOISaccadeStats to csv 
allAOISaccadeStats.to_csv("data/aoiSaccadeStats.csv", index=False)

In [130]:
#----------------------------------------------------------------------------------------
#
# 3. Dwell measures 
#
#----------------------------------------------------------------------------------------

In [131]:
# Read dwell data using pandas library
dwells = pd.read_csv("data/dwells.csv")
# set display.max_columns to none, to show all the columns when using head()
pd.set_option('display.max_columns', None)

In [132]:
# Preview dwells
dwells.head()

Unnamed: 0,Respondent,VisitedAOI,Dwell Start,Dwell End,Number of Fixations in Dwell,Number of Saccades in Dwell,Dwell Time
0,Anonymous 14-11-22 09h35m,Paragraph4,133.1086,433.0785,1.0,1.0,299.9699
1,Anonymous 14-11-22 09h35m,Text_Area,133.1086,783.036,2.0,4.0,649.9274
2,Anonymous 14-11-22 09h35m,Paragraph1,566.4076,716.4244,1.0,0.0,150.0168
3,Anonymous 14-11-22 09h35m,Paragraph1,899.6993,6957.3621,23.0,22.0,6057.6628
4,Anonymous 14-11-22 09h35m,Text_Area,899.6993,6957.3621,23.0,22.0,6057.6628


In [133]:
# Compute different aggregations for dwell metrics 
dwellsStats = dwells.groupby('VisitedAOI').agg({
                                                   #Dwell Time:
                                                    #Sum: Total Dwell Time (Position measure, cf. documentation/ETmetrics.csv)
                                                    #Mean: Average Dwell Time (Position measure, cf. documentation/ETmetrics.csv)
                                                    #Count: Number of Dwells (Numerosity measure, cf. documentation/ETmetrics.csv)
                                                   'Dwell Time':['sum','mean','count'], 
    
                                                   #Number of Fixations in Dwell:
                                                    #Mean: Average Number of Fixations in Dwell (Numerosity measure, cf. documentation/ETmetrics.csv)
                                                   'Number of Fixations in Dwell':['mean',],
    
                                                   #Number of Saccades in Dwell:
                                                    #Mean: Average Number of Saccades in Dwell (Numerosity measure, cf. documentation/ETmetrics.csv)
                                                    'Number of Saccades in Dwell':['mean'],
    
                                                  })


# Flatten dataframe
dwellsStats.columns= ['_'.join(x) for x in list(zip(dwellsStats.columns.get_level_values(0), dwellsStats.columns.get_level_values(1)))]

# Rename columns
dwellsStats = dwellsStats.rename(columns={"Dwell Time_sum": "Total Dwell Time", 
                              "Dwell Time_mean": "Average Dwell Time",
                              "Dwell Time_count": "Number of wells",
                              "Number of Fixations in Dwell_mean": "Average Number of Fixations in Dwell",
                              "Number of Saccades in Dwell_mean": "Average Number of Saccades in Dwell",                        
                             })


In [134]:
# Display dwellsStats
display(dwellsStats)

Unnamed: 0_level_0,Total Dwell Time,Average Dwell Time,Number of wells,Average Number of Fixations in Dwell,Average Number of Saccades in Dwell
VisitedAOI,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Figure,335941.2337,3861.393491,87,10.563218,16.034483
Formula,48325.2359,1464.401088,33,5.030303,6.363636
Paragraph1,72933.6791,2431.122637,30,6.766667,7.1
Paragraph2,94760.7021,2871.536427,33,7.393939,8.30303
Paragraph3,70658.8234,2279.316884,31,6.870968,7.129032
Paragraph4,39033.5133,1084.264258,36,3.444444,5.944444
Paragraph5,61384.6217,1278.846285,48,4.541667,5.604167
Paragraph6,67534.399,1107.121295,61,4.0,7.163934
SubFigure1,82473.4046,1018.19018,81,2.987654,3.728395
SubFigure2,175123.5351,1733.896387,101,4.584158,6.60396


In [135]:
# Export dwellsStats to csv 
dwellsStats.to_csv("data/dwellsStats.csv", index=False)