Analysis of the results of the study by Sharafi et al.

In [5]:
import sys
sys.path.append("..")
import os
import pandas as pd
from scipy.stats import wilcoxon
from scipy.stats import mannwhitneyu

from utils.ogama import import_data_into_ogama_sharafi, calculate_results_for_subject_sharafi, drop_all_subject_tables_sharafi
from utils.pygazehelper.pygaze import fixation_data_analysis

Original Results

In [6]:
# Load the original data


fixationsAll = pd.read_csv('./data/StudySharafi/processed-data/fixationAll.csv')

# We only keep the columns Subject, Sex, Age, Image, CategoryName, list_size, Likert_list_size, working_size, Likert_working_size, total_size, Likert_total_size and StimulusType
fixationsAll = fixationsAll[['Subject', 'Sex', 'Age', 'Image', 'CategoryName', 'list_size', 'Likert_list_size', 'working_size', 'Likert_working_size', 'total_size', 'Likert_total_size', 
'StimulusType']]
fixationsAll['Subject'] = fixationsAll['Subject'].astype("string")
fixationsAll['Image'] = fixationsAll['Image'].astype('int32')


PyGaze Analysis

In [None]:
# Run the fixation algorithm of PyGazeAnalyzer
fixation_data_analysis('data/StudySharafi/formatted-raw-data/', "StudySharafi/results/pygaze_fixations_sharafi.csv")

In [15]:
# We load the PyGazeAnalyzer results from the file results/pygaze_fixations_sharafi.csv
pygaze_fixations = pd.read_csv('results/pygaze_fixations_sharafi.csv')
# We add a 'S' before each subject number
pygaze_fixations['Participant'] = 'S' + pygaze_fixations['Participant'].astype(str)
# We remove the '.BMP' from the Task column and cast them to int32
pygaze_fixations['Task'] = pygaze_fixations['Task'].str.replace('.BMP', '')
pygaze_fixations['Task'] = pygaze_fixations['Task'].astype("int32")

# We combine the two dataframes into one by combining the columns where the values of Participant and Task are the same
results_pygaze = pd.merge(fixationsAll, pygaze_fixations, how='inner', left_on=['Subject', 'Image'], right_on=['Participant', 'Task'])
results_pygaze


Unnamed: 0,Subject,Sex,Age,Image,CategoryName,list_size,Likert_list_size,working_size,Likert_working_size,total_size,Likert_total_size,StimulusType,Participant,Task,Fixation Count,Total Fixation Duration [ms],Average Fixation Duration [ms]
0,S151,M,21.0,8,List,6.5,1,16.0,4.0,12.0,1.0,ainsert,S151,8,33,2436.1,73.821212
1,S151,M,21.0,25,List,13.0,3,6.0,1.0,13.0,1.0,ainsert,S151,25,38,4037.3,106.244737
2,S151,M,21.0,28,List,14.0,3,8.0,2.0,14.0,2.0,ainsert,S151,28,69,6723.4,97.440580
3,S151,M,21.0,21,List,14.0,3,10.0,2.0,14.0,2.0,ainsert,S151,21,25,3620.5,144.820000
4,S151,M,21.0,11,List,17.0,4,13.0,3.0,17.0,3.0,ainsert,S151,11,36,4988.3,138.563889
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2590,S204,M,24.0,43,Tree,9.0,2,9.0,3.0,1.0,2.0,traversal,S204,43,48,4237.7,88.285417
2591,S204,M,24.0,49,Tree,9.0,2,9.0,3.0,1.0,2.0,traversal,S204,49,39,4738.2,121.492308
2592,S204,M,24.0,40,Tree,12.0,2,12.0,3.0,12.0,2.0,traversal,S204,40,78,7107.6,91.123077
2593,S204,M,24.0,55,Tree,13.0,2,13.0,3.0,13.0,2.0,traversal,S204,55,46,5105.2,110.982609


Ogama Analysis

Import data automatically into Ogama by manipulating the database

In [2]:
database_path = 'your-database-path.db'

drop_all_subject_tables_sharafi(database_path)

for i in range(151, 205):
	# If the folder i exists (some participants are missing)
	path = 'data/StudySharafi/formatted-raw-data/' + str(i)
	if os.path.isdir(path):
		# Import the data into the database
		import_data_into_ogama_sharafi(path + '/ogama.txt', 'S' + str(i), database_path)
		print('Imported data for subject S' + str(i))

False
your-database-path.db False


Exception: Database does not exist

Now enter Ogama, load the experiment that is using this database and run the fixation algorithm.

In [7]:
total_results = pd.DataFrame()

for i in range(151, 205):
	# If the folder i exists
	path = 'data/StudySharafi/formatted-raw-data/' + str(i)
	if os.path.isdir(path):
		# Append to the total_results dataframe
		total_results = pd.concat([total_results, calculate_results_for_subject_sharafi('S' + str(i), database_path)])

# We combine the two dataframes into one by combining the columns where the values of Participant and Task are the same
results_ogama = pd.merge(fixationsAll, total_results, how='inner', left_on=['Subject', 'Image'], right_on=['Subject', 'TrialID'])
# Rename the columns Fixation Count, Total Fixation Duration and Average Fixation Duration
results_ogama = results_ogama.rename(columns={'Total Fixation Count': 'Fixation Count', 'Total Fixation Duration': 'Total Fixation Duration [ms]', 'Average Fixation Duration': 'Average Fixation Duration [ms]'})

results_ogama

Unnamed: 0,Subject,Sex,Age,Image,CategoryName,list_size,Likert_list_size,working_size,Likert_working_size,total_size,Likert_total_size,StimulusType,Fixation Count,Total Fixation Duration [ms],Average Fixation Duration [ms]
0,S151,M,21.0,8,List,6.5,1,16.0,4.0,12.0,1.0,ainsert,13,2335,179.615385
1,S151,M,21.0,25,List,13.0,3,6.0,1.0,13.0,1.0,ainsert,15,4088,272.533333
2,S151,M,21.0,28,List,14.0,3,8.0,2.0,14.0,2.0,ainsert,39,5059,129.717949
3,S151,M,21.0,21,List,14.0,3,10.0,2.0,14.0,2.0,ainsert,21,5893,280.619048
4,S151,M,21.0,11,List,17.0,4,13.0,3.0,17.0,3.0,ainsert,23,5239,227.782609
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2521,S204,M,24.0,43,Tree,9.0,2,9.0,3.0,1.0,2.0,traversal,24,3839,159.958333
2522,S204,M,24.0,49,Tree,9.0,2,9.0,3.0,1.0,2.0,traversal,32,5124,160.125000
2523,S204,M,24.0,40,Tree,12.0,2,12.0,3.0,12.0,2.0,traversal,53,7424,140.075472
2524,S204,M,24.0,55,Tree,13.0,2,13.0,3.0,13.0,2.0,traversal,30,6360,212.000000


Results Analysis

In [9]:
def show_results_list_mental_tree(results):
	# We calculate the mean and the standard deviation of the fixation count for each CategoryName
	# We first calculate the mean for each participant

	results_mean_per_subject = results.groupby(['CategoryName', 'Subject']).mean(numeric_only=True)[["Fixation Count", "Total Fixation Duration [ms]"]]
	results_std_per_subject = results.groupby(['CategoryName', 'Subject']).std(numeric_only=True)[["Fixation Count", "Total Fixation Duration [ms]"]]

	results_mean = results_mean_per_subject.groupby(['CategoryName']).mean(numeric_only=True)[["Fixation Count", "Total Fixation Duration [ms]"]]
	results_std = results_std_per_subject.groupby(['CategoryName']).mean(numeric_only=True)[["Fixation Count", "Total Fixation Duration [ms]"]]


	total_fixation_times = results.groupby(['CategoryName', 'Subject'])['Total Fixation Duration [ms]'].sum()

	# Apply Pairwise Comparisons Using Non-Parametric Wilcox Tests (α = 0.05)
	# TODO: check if there is a way to use wilcox 

	stat_list_vs_mental_fix_count, p_list_vs_mental_fix_count = mannwhitneyu(results_mean_per_subject.loc["List"]['Fixation Count'], results_mean_per_subject.loc["Mental"]['Fixation Count'])

	# Use mannwhitneyu for the other comparison
	stat_tree_vs_mental_fix_count, p_tree_vs_mental_fix_count = wilcoxon(results_mean_per_subject.loc["Tree"]['Fixation Count'], results_mean_per_subject.loc["Mental"]['Fixation Count'])

	stat_list_vs_mental_fix_time, p_list_vs_mental_fix_time = mannwhitneyu(total_fixation_times.loc["List"], total_fixation_times.loc["Mental"])

	# Use mannwhitneyu for the other comparison
	stat_tree_vs_mental_fix_time, p_tree_vs_mental_fix_time = wilcoxon(total_fixation_times.loc["Tree"], total_fixation_times.loc["Mental"])

	# We create a table to show the results

	# We divide the fixation time by 1000 to get the fixation time in seconds
	results_mean['Total Fixation Duration [ms]'] = results_mean['Total Fixation Duration [ms]']/1000
	results_std['Total Fixation Duration [ms]'] = results_std['Total Fixation Duration [ms]']/1000

	data = [

		[
			'Fixation Time (s)', 
			f"{'%.2f' % (total_fixation_times.loc['List']/1000).mean()}({'%.2f' % (total_fixation_times.loc['List']/1000).std()})",
			f"{'%.2f' % (total_fixation_times.loc['Mental']/1000).mean()}({'%.2f' % (total_fixation_times.loc['Mental']/1000).std()})",
			f"{'%.2f' % (total_fixation_times.loc['Tree']/1000).mean()}({'%.2f' % (total_fixation_times.loc['Tree']/1000).std()})",
			'%.3f' % p_list_vs_mental_fix_time,
			'%.3f' % p_tree_vs_mental_fix_time
		],
		[
			'Fixation Count', 
			f"{'%.0f' % results_mean.loc['List']['Fixation Count']} ({'%.0f' % results_std.loc['List']['Fixation Count']})", 
			f"{'%.0f' % results_mean.loc['Mental']['Fixation Count']} ({'%.0f' % results_std.loc['Mental']['Fixation Count']})", 
			f"{'%.0f' % results_mean.loc['Tree']['Fixation Count']} ({'%.0f' % results_std.loc['Tree']['Fixation Count']})", 
			'%.3f' % p_list_vs_mental_fix_count, 
			'%.3f' % p_tree_vs_mental_fix_count
		]

		
	]
	data_df = pd.DataFrame(data)
	data_df.columns = ['', 'List', 'Mental', 'Tree', 'List vs Mental p', 'Tree vs Mental p']
	return data_df


In [13]:
def show_results_demographic(results):

	results_mean_per_subject = results.groupby(['CategoryName', 'Subject']).mean(numeric_only=True)[["Fixation Count", "Total Fixation Duration [ms]"]]
	results_std_per_subject = results.groupby(['CategoryName', 'Subject']).std(numeric_only=True)[["Fixation Count", "Total Fixation Duration [ms]"]]

	# Group the results by the different values of Sex
	results_sex = results.groupby("Sex")[["Fixation Count", "Average Fixation Duration [ms]"]]
	results_sex_mean = results_sex.mean(numeric_only=True)
	results_sex_std = results_sex.std(numeric_only=True)

	# Group the results by ages between 18-22 and 23-27
	results_age_group_mean = results.groupby(['CategoryName', 'Subject']).mean(numeric_only=True)[["Fixation Count", "Average Fixation Duration [ms]", "Age"]]
	results_age = results_age_group_mean.groupby(pd.cut(results_age_group_mean["Age"], [18, 22, 27]))[["Fixation Count", "Average Fixation Duration [ms]"]]
	results_age_mean = results_age.mean(numeric_only=True)
	results_age_std = results_age.std(numeric_only=True)

	# Perform non-parametric Wilcoxon tests (α =0.05) 
	stat_gender_fix_count, p_gender_fix_count = wilcoxon(results_sex_mean.loc["M"]['Fixation Count'], results_sex_mean.loc["F"]['Fixation Count'])
	stat_gender_fix_time, p_gender_fix_time = wilcoxon(results_sex_mean.loc["M"]['Average Fixation Duration [ms]'], results_sex_mean.loc["F"]['Average Fixation Duration [ms]'])

	stat_age_fix_count, p_age_fix_count = mannwhitneyu(results_age_group_mean[results_age_group_mean['Age'] <= 22]['Fixation Count'], results_age_group_mean[results_age_group_mean['Age'] > 22]['Fixation Count'])
	stat_age_fix_time, p_age_fix_time = mannwhitneyu(results_age_group_mean[results_age_group_mean['Age'] <= 22]['Average Fixation Duration [ms]'], results_age_group_mean[results_age_group_mean['Age'] > 22]['Average Fixation Duration [ms]'])

	# We create a table to show the results
	data = [
		[
			'Avg. Fix. Duration (ms)', 
			f"{'%.2f' % results_sex_mean.loc['M']['Average Fixation Duration [ms]']}({'%.2f' % results_sex_std.loc['M']['Average Fixation Duration [ms]']})",
			f"{'%.2f' % results_sex_mean.loc['F']['Average Fixation Duration [ms]']}({'%.2f' % results_sex_std.loc['F']['Average Fixation Duration [ms]']})",
			f"{'%.2f' % results_age_mean.loc[pd.Interval(18, 22)]['Average Fixation Duration [ms]']}({'%.2f' % results_age_std.loc[pd.Interval(18, 22)]['Average Fixation Duration [ms]']})",
			f"{'%.2f' % results_age_mean.loc[pd.Interval(22, 27)]['Average Fixation Duration [ms]']}({'%.2f' % results_age_std.loc[pd.Interval(22, 27)]['Average Fixation Duration [ms]']})",
			'%.3f' % p_gender_fix_time,
			'%.3f' % p_age_fix_time
		],
		[
			'Fixation Count', 
			f"{'%.0f' % results_sex_mean.loc['M']['Fixation Count']} ({'%.0f' % results_sex_std.loc['M']['Fixation Count']})", 
			f"{'%.0f' % results_sex_mean.loc['F']['Fixation Count']} ({'%.0f' % results_sex_std.loc['F']['Fixation Count']})", 
			f"{'%.0f' % results_age_mean.loc[pd.Interval(18, 22)]['Fixation Count']} ({'%.0f' % results_age_std.loc[pd.Interval(18, 22)]['Fixation Count']})", 
			f"{'%.0f' % results_age_mean.loc[pd.Interval(22, 27)]['Fixation Count']} ({'%.0f' % results_age_std.loc[pd.Interval(22, 27)]['Fixation Count']})", 
			'%.3f' % p_gender_fix_count, 
			'%.3f' % p_age_fix_count
		]
	]

	data_df = pd.DataFrame(data)
	data_df.columns = ['', 'Men', 'Women', '18-22', '23-27', 'Gender p', 'Age p']
	return data_df


Import the fixations from the database and calculate our results

In [None]:
data_df = show_results_list_mental_tree(results_pygaze)
data_df

CategoryName  Subject
List          S151       157152.0
              S153       183849.4
              S154       188985.1
              S155       373423.8
              S156       100368.1
                           ...   
Tree          S200        77974.4
              S201       230751.5
              S202        60479.5
              S203        97952.2
              S204       165780.5
Name: Total Fixation Duration [ms], Length: 92, dtype: float64
Subject
S151    157152.0
S153    183849.4
S154    188985.1
S155    373423.8
S156    100368.1
S157     93552.9
S159     62329.7
S160    299862.8
S161     77742.8
S162    298542.0
S163    234851.4
S165    176929.3
S167    196654.5
S168    102585.8
S169    187850.4
S171      7241.5
S172     93115.5
S173    308305.5
S174    237502.8
S175    216507.6
S176    197226.9
S177    252634.3
S179    387926.8
S182     73396.2
S183    216424.4
S200     84835.5
S201    246592.3
S202     61411.5
S203     95082.3
S204    230525.4
Name: Total Fixation Du



Unnamed: 0,Unnamed: 1,List,Mental,Tree,List vs Mental p,Tree vs Mental p
0,Fixation Time (s),181.45(96.70),71.70(47.94),170.85(109.38),0.0,0.0
1,Fixation Count,49 (27),21 (14),47 (24),0.0,0.0


In [17]:
data_df = show_results_list_mental_tree(results_ogama)
data_df

Unnamed: 0,Unnamed: 1,List,Mental,Tree,List vs Mental p,Tree vs Mental p
0,Fixation Time (s),217.87(90.37),83.98(52.45),203.77(104.47),0.0,0.0
1,Fixation Count,38 (21),17 (11),36 (19),0.0,0.0


![Results Study 4 1](images/results_study_4_1.png)

In [16]:
data_df = show_results_demographic(results_pygaze)
data_df

Unnamed: 0,Unnamed: 1,Men,Women,18-22,23-27,Gender p,Age p
0,Avg. Fix. Duration (ms),106.46(62.30),131.38(91.41),112.87(41.39),123.72(62.95),1.0,0.625
1,Fixation Count,35 (29),40 (29),38 (21),38 (20),1.0,0.625


In [14]:
data_df = show_results_demographic(results_ogama)
data_df

Unnamed: 0,Unnamed: 1,Men,Women,18-22,23-27,Gender p,Age p
0,Avg. Fix. Duration (ms),278.12(682.78),241.62(280.83),285.34(266.43),261.67(180.86),1.0,0.295
1,Fixation Count,28 (24),32 (23),29 (17),30 (17),1.0,0.619


![Results Study 4 2](images/results_study_4_2.png)