Analysis of the results of the study by Sharafi et al.

In [1]:
import sys
sys.path.append("..")
import os
import pandas as pd
from scipy.stats import wilcoxon
from scipy.stats import mannwhitneyu

from utils.ogama import import_data_into_ogama_sharafi, calculate_results_for_subject_sharafi, drop_all_subject_tables_sharafi
from utils.pygazehelper.pygaze import fixation_data_analysis

Original Results

In [2]:
# Load the original data


fixationsAll = pd.read_csv('../data/StudySharafi/processed-data/fixationAll.csv')

# We only keep the columns Subject, Sex, Age, Image, CategoryName, list_size, Likert_list_size, working_size, Likert_working_size, total_size, Likert_total_size and StimulusType
fixationsAll = fixationsAll[['Subject', 'Sex', 'Age', 'Image', 'CategoryName', 'list_size', 'Likert_list_size', 'working_size', 'Likert_working_size', 'total_size', 'Likert_total_size', 
'StimulusType']]
fixationsAll['Subject'] = fixationsAll['Subject'].astype("string")
fixationsAll['Image'] = fixationsAll['Image'].astype('int32')


PyGaze Analysis

In [3]:
# Run the fixation algorithm of PyGazeAnalyzer
fixation_data_analysis('../data/StudySharafi/formatted-raw-data/', "results/pygaze_fixations_sharafi.csv")

Fixation information saved to results/pygaze_fixations_sharafi.csv


In [4]:
# We load the PyGazeAnalyzer results from the file results/pygaze_fixations_sharafi.csv
pygaze_fixations = pd.read_csv('results/pygaze_fixations_sharafi.csv')
# We add a 'S' before each subject number
pygaze_fixations['Participant'] = 'S' + pygaze_fixations['Participant'].astype(str)
# We remove the '.BMP' from the Task column and cast them to int32
pygaze_fixations['Task'] = pygaze_fixations['Task'].str.replace('.BMP', '')
pygaze_fixations['Task'] = pygaze_fixations['Task'].astype("int32")

# We combine the two dataframes into one by combining the columns where the values of Participant and Task are the same
results_pygaze = pd.merge(fixationsAll, pygaze_fixations, how='inner', left_on=['Subject', 'Image'], right_on=['Participant', 'Task'])
results_pygaze


Unnamed: 0,Subject,Sex,Age,Image,CategoryName,list_size,Likert_list_size,working_size,Likert_working_size,total_size,Likert_total_size,StimulusType,Participant,Task,Fixation Count,Total Fixation Duration [ms],Average Fixation Duration [ms]
0,S151,M,21.0,8,List,6.5,1,16.0,4.0,12.0,1.0,ainsert,S151,8,11,667.4,60.672727
1,S151,M,21.0,25,List,13.0,3,6.0,1.0,13.0,1.0,ainsert,S151,25,25,1802.1,72.084000
2,S151,M,21.0,28,List,14.0,3,8.0,2.0,14.0,2.0,ainsert,S151,28,39,3120.2,80.005128
3,S151,M,21.0,21,List,14.0,3,10.0,2.0,14.0,2.0,ainsert,S151,21,29,2920.0,100.689655
4,S151,M,21.0,11,List,17.0,4,13.0,3.0,17.0,3.0,ainsert,S151,11,26,2870.3,110.396154
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2590,S204,M,24.0,43,Tree,9.0,2,9.0,3.0,1.0,2.0,traversal,S204,43,17,1568.3,92.252941
2591,S204,M,24.0,49,Tree,9.0,2,9.0,3.0,1.0,2.0,traversal,S204,49,32,2736.2,85.506250
2592,S204,M,24.0,40,Tree,12.0,2,12.0,3.0,12.0,2.0,traversal,S204,40,53,3670.5,69.254717
2593,S204,M,24.0,55,Tree,13.0,2,13.0,3.0,13.0,2.0,traversal,S204,55,33,2869.4,86.951515


Ogama Analysis

Import data automatically into Ogama by manipulating the database

In [11]:
database_path = 'your-database-path.db'

drop_all_subject_tables_sharafi(database_path)

for i in range(151, 205):
	# If the folder i exists (some participants are missing)
	path = 'data/StudySharafi/formatted-raw-data/' + str(i)
	if os.path.isdir(path):
		# Import the data into the database
		import_data_into_ogama_sharafi(path + '/ogama.txt', 'S' + str(i), database_path)
		print('Imported data for subject S' + str(i))

Now enter Ogama, load the experiment that is using this database and run the fixation algorithm.

In [13]:
total_results = pd.DataFrame()

for i in range(151, 205):
	# If the folder i exists
	path = '../data/StudySharafi/formatted-raw-data/' + str(i)
	if os.path.isdir(path):
		# Append to the total_results dataframe
		total_results = pd.concat([total_results, calculate_results_for_subject_sharafi('S' + str(i), database_path)])


# We combine the two dataframes into one by combining the columns where the values of Participant and Task are the same
results_ogama = pd.merge(fixationsAll, total_results, how='inner', left_on=['Subject', 'Image'], right_on=['Subject', 'TrialID'])
# Rename the columns Fixation Count, Total Fixation Duration and Average Fixation Duration
results_ogama = results_ogama.rename(columns={'Total Fixation Count': 'Fixation Count', 'Total Fixation Duration': 'Total Fixation Duration [ms]', 'Average Fixation Duration': 'Average Fixation Duration [ms]'})

results_ogama

Unnamed: 0,Subject,Sex,Age,Image,CategoryName,list_size,Likert_list_size,working_size,Likert_working_size,total_size,Likert_total_size,StimulusType,Fixation Count,Total Fixation Duration [ms],Average Fixation Duration [ms]
0,S151,M,21.0,8,List,6.5,1,16.0,4.0,12.0,1.0,ainsert,13,2335,179.615385
1,S151,M,21.0,25,List,13.0,3,6.0,1.0,13.0,1.0,ainsert,15,4088,272.533333
2,S151,M,21.0,28,List,14.0,3,8.0,2.0,14.0,2.0,ainsert,39,5059,129.717949
3,S151,M,21.0,21,List,14.0,3,10.0,2.0,14.0,2.0,ainsert,21,5893,280.619048
4,S151,M,21.0,11,List,17.0,4,13.0,3.0,17.0,3.0,ainsert,23,5239,227.782609
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2521,S204,M,24.0,43,Tree,9.0,2,9.0,3.0,1.0,2.0,traversal,24,3839,159.958333
2522,S204,M,24.0,49,Tree,9.0,2,9.0,3.0,1.0,2.0,traversal,32,5124,160.125000
2523,S204,M,24.0,40,Tree,12.0,2,12.0,3.0,12.0,2.0,traversal,53,7424,140.075472
2524,S204,M,24.0,55,Tree,13.0,2,13.0,3.0,13.0,2.0,traversal,30,6360,212.000000


Results Analysis

In [14]:
def show_results_list_mental_tree(results):
	# We calculate the mean and the standard deviation of the fixation count for each CategoryName
	# We first calculate the mean for each participant

	results_mean_per_subject = results.groupby(['CategoryName', 'Subject']).mean(numeric_only=True)[["Fixation Count", "Total Fixation Duration [ms]"]]
	results_std_per_subject = results.groupby(['CategoryName', 'Subject']).std(numeric_only=True)[["Fixation Count", "Total Fixation Duration [ms]"]]

	results_mean = results_mean_per_subject.groupby(['CategoryName']).mean(numeric_only=True)[["Fixation Count", "Total Fixation Duration [ms]"]]
	results_std = results_std_per_subject.groupby(['CategoryName']).mean(numeric_only=True)[["Fixation Count", "Total Fixation Duration [ms]"]]


	total_fixation_times = results.groupby(['CategoryName', 'Subject'])['Total Fixation Duration [ms]'].sum()

	# Apply Pairwise Comparisons Using Non-Parametric Wilcox Tests (α = 0.05)
	# TODO: check if there is a way to use wilcox 

	stat_list_vs_mental_fix_count, p_list_vs_mental_fix_count = mannwhitneyu(results_mean_per_subject.loc["List"]['Fixation Count'], results_mean_per_subject.loc["Mental"]['Fixation Count'])

	# Use mannwhitneyu for the other comparison
	stat_tree_vs_mental_fix_count, p_tree_vs_mental_fix_count = wilcoxon(results_mean_per_subject.loc["Tree"]['Fixation Count'], results_mean_per_subject.loc["Mental"]['Fixation Count'])

	stat_list_vs_mental_fix_time, p_list_vs_mental_fix_time = mannwhitneyu(total_fixation_times.loc["List"], total_fixation_times.loc["Mental"])

	# Use mannwhitneyu for the other comparison
	stat_tree_vs_mental_fix_time, p_tree_vs_mental_fix_time = wilcoxon(total_fixation_times.loc["Tree"], total_fixation_times.loc["Mental"])

	# We create a table to show the results

	# We divide the fixation time by 1000 to get the fixation time in seconds
	results_mean['Total Fixation Duration [ms]'] = results_mean['Total Fixation Duration [ms]']/1000
	results_std['Total Fixation Duration [ms]'] = results_std['Total Fixation Duration [ms]']/1000

	data = [

		[
			'Fixation Time (s)', 
			f"{'%.2f' % (total_fixation_times.loc['List']/1000).mean()}({'%.2f' % (total_fixation_times.loc['List']/1000).std()})",
			f"{'%.2f' % (total_fixation_times.loc['Mental']/1000).mean()}({'%.2f' % (total_fixation_times.loc['Mental']/1000).std()})",
			f"{'%.2f' % (total_fixation_times.loc['Tree']/1000).mean()}({'%.2f' % (total_fixation_times.loc['Tree']/1000).std()})",
			'%.3f' % p_list_vs_mental_fix_time,
			'%.3f' % p_tree_vs_mental_fix_time
		],
		[
			'Fixation Count', 
			f"{'%.0f' % results_mean.loc['List']['Fixation Count']} ({'%.0f' % results_std.loc['List']['Fixation Count']})", 
			f"{'%.0f' % results_mean.loc['Mental']['Fixation Count']} ({'%.0f' % results_std.loc['Mental']['Fixation Count']})", 
			f"{'%.0f' % results_mean.loc['Tree']['Fixation Count']} ({'%.0f' % results_std.loc['Tree']['Fixation Count']})", 
			'%.3f' % p_list_vs_mental_fix_count, 
			'%.3f' % p_tree_vs_mental_fix_count
		]

		
	]
	data_df = pd.DataFrame(data)
	data_df.columns = ['', 'List', 'Mental', 'Tree', 'List vs Mental p', 'Tree vs Mental p']
	return data_df


In [16]:
def show_results_demographic(results):

	results_mean_per_subject = results.groupby(['CategoryName', 'Subject']).mean(numeric_only=True)[["Fixation Count", "Total Fixation Duration [ms]"]]
	results_std_per_subject = results.groupby(['CategoryName', 'Subject']).std(numeric_only=True)[["Fixation Count", "Total Fixation Duration [ms]"]]

	# Group the results by the different values of Sex
	results_sex = results.groupby("Sex")[["Fixation Count", "Average Fixation Duration [ms]"]]
	results_sex_mean = results_sex.mean(numeric_only=True)
	results_sex_std = results_sex.std(numeric_only=True)

	# Group the results by ages between 18-22 and 23-27
	results_age_group_mean = results.groupby(['CategoryName', 'Subject']).mean(numeric_only=True)[["Fixation Count", "Average Fixation Duration [ms]", "Age"]]
	results_age = results_age_group_mean.groupby(pd.cut(results_age_group_mean["Age"], [18, 22, 27]))[["Fixation Count", "Average Fixation Duration [ms]"]]
	results_age_mean = results_age.mean(numeric_only=True)
	results_age_std = results_age.std(numeric_only=True)

	# Perform non-parametric Wilcoxon tests (α =0.05) 
	stat_gender_fix_count, p_gender_fix_count = wilcoxon(results_sex_mean.loc["M"]['Fixation Count'], results_sex_mean.loc["F"]['Fixation Count'])
	stat_gender_fix_time, p_gender_fix_time = wilcoxon(results_sex_mean.loc["M"]['Average Fixation Duration [ms]'], results_sex_mean.loc["F"]['Average Fixation Duration [ms]'])

	stat_age_fix_count, p_age_fix_count = mannwhitneyu(results_age_group_mean[results_age_group_mean['Age'] <= 22]['Fixation Count'], results_age_group_mean[results_age_group_mean['Age'] > 22]['Fixation Count'])
	stat_age_fix_time, p_age_fix_time = mannwhitneyu(results_age_group_mean[results_age_group_mean['Age'] <= 22]['Average Fixation Duration [ms]'], results_age_group_mean[results_age_group_mean['Age'] > 22]['Average Fixation Duration [ms]'])

	# We create a table to show the results
	data = [
		[
			'Avg. Fix. Duration (ms)', 
			f"{'%.2f' % results_sex_mean.loc['M']['Average Fixation Duration [ms]']}({'%.2f' % results_sex_std.loc['M']['Average Fixation Duration [ms]']})",
			f"{'%.2f' % results_sex_mean.loc['F']['Average Fixation Duration [ms]']}({'%.2f' % results_sex_std.loc['F']['Average Fixation Duration [ms]']})",
			f"{'%.2f' % results_age_mean.loc[pd.Interval(18, 22)]['Average Fixation Duration [ms]']}({'%.2f' % results_age_std.loc[pd.Interval(18, 22)]['Average Fixation Duration [ms]']})",
			f"{'%.2f' % results_age_mean.loc[pd.Interval(22, 27)]['Average Fixation Duration [ms]']}({'%.2f' % results_age_std.loc[pd.Interval(22, 27)]['Average Fixation Duration [ms]']})",
			'%.3f' % p_gender_fix_time,
			'%.3f' % p_age_fix_time
		],
		[
			'Fixation Count', 
			f"{'%.0f' % results_sex_mean.loc['M']['Fixation Count']} ({'%.0f' % results_sex_std.loc['M']['Fixation Count']})", 
			f"{'%.0f' % results_sex_mean.loc['F']['Fixation Count']} ({'%.0f' % results_sex_std.loc['F']['Fixation Count']})", 
			f"{'%.0f' % results_age_mean.loc[pd.Interval(18, 22)]['Fixation Count']} ({'%.0f' % results_age_std.loc[pd.Interval(18, 22)]['Fixation Count']})", 
			f"{'%.0f' % results_age_mean.loc[pd.Interval(22, 27)]['Fixation Count']} ({'%.0f' % results_age_std.loc[pd.Interval(22, 27)]['Fixation Count']})", 
			'%.3f' % p_gender_fix_count, 
			'%.3f' % p_age_fix_count
		]
	]

	data_df = pd.DataFrame(data)
	data_df.columns = ['', 'Men', 'Women', '18-22', '23-27', 'Gender p', 'Age p']
	return data_df


Import the fixations from the database and calculate our results

In [17]:
data_df = show_results_list_mental_tree(results_pygaze)
data_df

Unnamed: 0,Unnamed: 1,List,Mental,Tree,List vs Mental p,Tree vs Mental p
0,Fixation Time (s),120.49(84.63),47.30(39.39),115.46(100.91),0.0,0.0
1,Fixation Count,39 (24),17 (12),37 (21),0.0,0.0


In [18]:
data_df = show_results_list_mental_tree(results_ogama)
data_df

Unnamed: 0,Unnamed: 1,List,Mental,Tree,List vs Mental p,Tree vs Mental p
0,Fixation Time (s),217.87(90.37),83.98(52.45),203.77(104.47),0.0,0.0
1,Fixation Count,38 (21),17 (11),36 (19),0.0,0.0


![Results Study 4 1](../images/results_study_4_1.png)

In [19]:
data_df = show_results_demographic(results_pygaze)
data_df

Unnamed: 0,Unnamed: 1,Men,Women,18-22,23-27,Gender p,Age p
0,Avg. Fix. Duration (ms),82.92(34.36),94.07(40.67),86.33(29.41),89.54(28.57),1.0,0.516
1,Fixation Count,27 (28),34 (28),29 (21),32 (20),1.0,0.368


In [20]:
data_df = show_results_demographic(results_ogama)
data_df

Unnamed: 0,Unnamed: 1,Men,Women,18-22,23-27,Gender p,Age p
0,Avg. Fix. Duration (ms),278.12(682.78),241.62(280.83),285.34(266.43),261.67(180.86),1.0,0.295
1,Fixation Count,28 (24),32 (23),29 (17),30 (17),1.0,0.619


![Results Study 4 2](../images/results_study_4_2.png)