In [None]:
import os
import pickle
import numpy as np
import pandas as pd
import scipy.stats as stats
from run_stats import *

##### Smell Rating

In [None]:
survey_data = pd.read_excel("./Experimental_Data_revised.xlsx", 'Surveys')

In [None]:
fish, lemon, neutral = make_three_group_lists(survey_data, 'RATING')
ratings = three_lists_stats(fish, lemon, neutral, 'Smell Rating')

In [None]:
fish, lemon, neutral = make_three_group_lists(survey_data, 'CPP_Experience')
yrs_experience = three_lists_stats(fish, lemon, neutral, 'CPP')

In [None]:
fish, lemon, neutral = make_three_group_lists(survey_data, 'GPA')
gpa = three_lists_stats(fish, lemon, neutral, 'GPA')

In [None]:
fish, lemon, neutral = make_three_group_lists(survey_data, 'AGE')
age = three_lists_stats(fish, lemon, neutral, 'AGE')

## RQ1: COGNITIVE EFFORT

#### Variables

In [None]:
relevant_columns = ['neutral_fc',  'lemon_fc',   'fish_fc', 
                    'neutral_avfd','lemon_avfd', 'fish_avfd']

### CODE COMPREHENSION

In [None]:
# found in 'tobii metrics' folder
cc_df = pd.read_excel("tobii_metrics/code_summary.xlsx")
output_comp = calculate_stats(cc_df, relevant_columns)
# calculate_stats(cc_df)

In [None]:
output_comp.keys()

In [None]:
# need to remove the following
# fixation count: lemon - max value, neutral - top two values
output_comp['Fixation Count']['lemon'] = output_comp['Fixation Count']['lemon'][:-1]
output_comp['Fixation Count']['neutral'] = output_comp['Fixation Count']['neutral'][:-2]
# calculating stats
fish = pd.Series(output_comp['Fixation Count']['fish'])
lemon = pd.Series(output_comp['Fixation Count']['lemon'])
neutral = pd.Series(output_comp['Fixation Count']['neutral'])
output_comp['Fixation Count'] = three_lists_stats(fish, lemon, neutral, 'Fixation Count')


# average fixation duration: fish - max and min values
output_comp['Average Fixation Duration']['fish'] = output_comp['Average Fixation Duration']['fish'][1:-1]
# calculating stats
fish = pd.Series(output_comp['Average Fixation Duration']['fish'])
lemon = pd.Series(output_comp['Average Fixation Duration']['lemon'])
neutral = pd.Series(output_comp['Average Fixation Duration']['neutral'])
output_comp['Average Fixation Duration'] = three_lists_stats(fish, lemon, neutral, 'Average Fixation Duration')



### CODE WRITING

In [None]:
# found in writingFixations folder
w_df = pd.read_csv("writingFixations/all_itrace_counts.csv")
output_writing = calculate_stats(w_df, relevant_columns)


In [None]:
# need to remove the following
# fixation count - lemon: min value
output_writing['Fixation Count']['lemon'] = output_writing['Fixation Count']['lemon'][1:]

# calculating stats
fish = pd.Series(output_writing['Fixation Count']['fish'])
lemon = pd.Series(output_writing['Fixation Count']['lemon'])
neutral = pd.Series(output_writing['Fixation Count']['neutral'])
filtered_fc = three_lists_stats(fish, lemon, neutral, 'Fixation Count')
output_writing['Fixation Count'] = filtered_fc['Fixation Count']

# average fixation duration - lemon: 2 max vlaues, neutral: 2 max values, fish: max value
output_writing['Average Fixation Duration']['fish'] = output_writing['Average Fixation Duration']['fish'][:-1]
output_writing['Average Fixation Duration']['lemon'] = output_writing['Average Fixation Duration']['lemon'][:-2]
output_writing['Average Fixation Duration']['neutral'] = output_writing['Average Fixation Duration']['neutral'][:-2]

# calculating stats
fish = pd.Series(output_writing['Average Fixation Duration']['fish'])
lemon = pd.Series(output_writing['Average Fixation Duration']['lemon'])
neutral = pd.Series(output_writing['Average Fixation Duration']['neutral'])
filtered_fd = three_lists_stats(fish, lemon, neutral, 'Average Fixation Duration')
output_writing['Average Fixation Duration'] = filtered_fd['Average Fixation Duration']


### Reading Comprehension

In [None]:
prose_df = pd.read_excel("tobii_metrics/prose_summary.xlsx")
output_prose = calculate_stats(prose_df, relevant_columns)


In [None]:
# need to remove the following
# fixation count - neutral: max value, fish: max and min values
output_prose['Fixation Count']['neutral'] = output_prose['Fixation Count']['neutral'][:-1]
output_prose['Fixation Count']['fish'] = output_prose['Fixation Count']['fish'][1:-1]

# calculating stats
fish = pd.Series(output_prose['Fixation Count']['fish'])
lemon = pd.Series(output_prose['Fixation Count']['lemon'])
neutral = pd.Series(output_prose['Fixation Count']['neutral'])
filtered_fc = three_lists_stats(fish, lemon, neutral, 'Fixation Count')
output_prose['Fixation Count'] = filtered_fc['Fixation Count']


### SACCADES

In [None]:
task_data = pd.read_excel("Experimental_Data_revised.xlsx", "Tasks")

In [None]:
fish_idx = np.where(task_data['CONDITION'] == 'bad')
lemon_idx = np.where(task_data['CONDITION'] == 'good')
neutral_idx = np.where(task_data['CONDITION'] == 'neutral')

fish_ppl = list(task_data.loc[fish_idx, 'ID'])
lemon_ppl = list(task_data.loc[lemon_idx, 'ID'])
neutral_ppl = list(task_data.loc[neutral_idx, 'ID'])

In [None]:
with open("midprocessing/ccd_saccades.pkl", 'rb') as f:
    ccd_saccades = pickle.load(f)
    
with open("midprocessing/prose_saccades.pkl", 'rb') as f:
    prose_saccades = pickle.load(f)
    
with open("midprocessing/writing_saccades.pkl", 'rb') as f:
    writing_saccades = pickle.load(f)

In [None]:
prose_saccades = three_lists_stats(fish_list=pd.Series(prose_saccades['fish']), 
                  lemon_list=pd.Series(prose_saccades['lemon']), 
                  neut_list=pd.Series(prose_saccades['neutral']), name='Saccades: Prose Comprehension')
prose_saccades = prose_saccades['Saccades: Prose Comprehension']

code_saccades = three_lists_stats(fish_list=pd.Series(ccd_saccades['fish']), 
                  lemon_list=pd.Series(ccd_saccades['lemon']), 
                  neut_list=pd.Series(ccd_saccades['neutral']), name='CODE SACCADES')
code_saccades = code_saccades['CODE SACCADES']

writing_saccades = three_lists_stats(fish_list=pd.Series(writing_saccades['fish']), 
                  lemon_list=pd.Series(writing_saccades['lemon']), 
                  neut_list=pd.Series(writing_saccades['neutral']), name='WRITING SACCADES')
writing_saccades = writing_saccades['WRITING SACCADES']

In [None]:
# need to remove the following
# code_saccades - lemon: remove max, neutral: remove max
code_saccades['lemon'] = code_saccades['lemon'][:-1]
code_saccades['neutral'] = code_saccades['neutral'][:-1]

# calculating stats
fish = pd.Series(code_saccades['fish'])
lemon = pd.Series(code_saccades['lemon'])
neutral = pd.Series(code_saccades['neutral'])
filtered_code = three_lists_stats(fish, lemon, neutral, 'Saccades: Code Reading')
code_saccades['CODE SACCADES'] = filtered_code['Saccades: Code Reading']

# writing_saccades - lemon: remove top 2 and bottom 2, neutral: remove max
writing_saccades['lemon'] = writing_saccades['lemon'][2:-2]
writing_saccades['neutral'] = writing_saccades['neutral'][:-1]

# calculating stats
fish = pd.Series(writing_saccades['fish'])
lemon = pd.Series(writing_saccades['lemon'])
neutral = pd.Series(writing_saccades['neutral'])
filtered_writing = three_lists_stats(fish, lemon, neutral, 'Code Writing Saccades')
writing_saccades['WRITING SACCADES'] = filtered_writing['Code Writing Saccades']


In [None]:
data = code_saccades

# Set the aesthetic style of the plots
sns.set(style="whitegrid")

# Creating boxplot
plt.figure(figsize=(8, 6))
box = plt.boxplot(
    [data['lemon'], data['neutral'], data['fish']], 
    labels=['Lemon', 'Neutral', 'Fish'], 
    patch_artist=True, 
    boxprops=dict(facecolor='lightgray', color='black'),
    medianprops=dict(color='black'),
    whiskerprops=dict(color='black'),
    capprops=dict(color='black')
)

# Set individual colors for the boxes
colors = ['#F0E68C', '#708090', '#FFB3BA'] 
for patch, color in zip(box['boxes'], colors):
    patch.set_facecolor(color)

# Adding title and labels
plt.title("Code Comprehension: Total Saccade Counts", fontsize=16, fontweight='bold')
plt.xlabel("Condition", fontsize=18)
plt.ylabel("Saccade Total", fontsize=18)
plt.tick_params(axis='both', which='major', labelsize=16)

# Add gridlines
plt.grid(True, linestyle='--', alpha=0.7)

# Calculate p-value for significance (e.g., using a t-test)
t_stat, p_val = stats.ttest_ind(data['lemon'], data['fish'])

# Adding the significance annotation
x1, x2 = 1, 3  # positions for Lemon and Fish
y, h, col = max(max(data['lemon']), max(data['fish'])) + 600, 400, 'black'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, f"*", ha='center', va='bottom', fontsize=16, color=col)

ax = plt.gca()
ax.set_ylim([1000, 8500])

# Remove top and right spines for a cleaner look
sns.despine()
# plt.savefig("figures/saccade_code_box_plot.png", dpi=150)
plt.show()

In [None]:
data = prose_saccades

sns.set(style="whitegrid")

# Creating boxplot
plt.figure(figsize=(8, 6))
box = plt.boxplot(
    [data['lemon'], data['neutral'], data['fish']], 
    labels=['Lemon', 'Neutral', 'Fish'], 
    patch_artist=True, 
    boxprops=dict(facecolor='lightgray', color='black'),
    medianprops=dict(color='black'),
    whiskerprops=dict(color='black'),
    capprops=dict(color='black')
)

# Set individual colors for the boxes
colors = ['#F0E68C', '#708090', '#FFB3BA'] 
for patch, color in zip(box['boxes'], colors):
    patch.set_facecolor(color)

# Adding title and labels
plt.title("Prose Comprehension: Total Saccade Counts", fontsize=16, fontweight='bold')
plt.xlabel("Condition", fontsize=18)
plt.ylabel("Saccade Total", fontsize=18)
plt.tick_params(axis='both', which='major', labelsize=16)

# Add gridlines
plt.grid(True, linestyle='--', alpha=0.7)

# Calculate p-value for significance (e.g., using a t-test)
t_stat, p_val = stats.ttest_ind(data['lemon'], data['fish'])

# Adding the significance annotation
x1, x2 = 1, 3  # positions for Lemon and Fish
y, h, col = max(max(data['lemon']), max(data['fish'])) + 1200, 400, 'black'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, f"**", ha='center', va='bottom', fontsize=16, color=col)

ax = plt.gca()
ax.set_ylim([1000, 6000])

# Remove top and right spines for a cleaner look
sns.despine()
# plt.savefig("figures/saccade_prose_box_plot.png", dpi=150)
# Show the plot
plt.show()

In [None]:
data = writing_saccades

sns.set(style="whitegrid")

# Creating boxplot
plt.figure(figsize=(8, 6))
box = plt.boxplot(
    [data['lemon'], data['neutral'], data['fish']], 
    labels=['Lemon', 'Neutral', 'Fish'], 
    patch_artist=True, 
    boxprops=dict(facecolor='lightgray', color='black'),
    medianprops=dict(color='black'),
    whiskerprops=dict(color='black'),
    capprops=dict(color='black')
)

# Set individual colors for the boxes
colors = ['#F0E68C', '#708090', '#FFB3BA'] 
for patch, color in zip(box['boxes'], colors):
    patch.set_facecolor(color)

# Adding title and labels
plt.title("Code Writing: Total Saccade Counts", fontsize=16, fontweight='bold')
plt.xlabel("Condition", fontsize=18)
plt.ylabel("Saccade Total", fontsize=18)
plt.tick_params(axis='both', which='major', labelsize=16)

# Add gridlines
plt.grid(True, linestyle='--', alpha=0.7)

# Calculate p-value for significance (e.g., using a t-test)
t_stat, p_val = stats.ttest_ind(data['lemon'], data['fish'])

ax = plt.gca()
ax.set_ylim([1000, 6000])

# Remove top and right spines for a cleaner look
sns.despine()
# plt.savefig("figures/saccade_writing_box_plot.png", dpi=150)
# Show the plot
plt.show()