This notebook is for examining the bugs/warnings, if any, for the crowding experiment.

In [1]:
# !pip install seaborn

In [2]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
import seaborn as sns
import scipy.stats as stats

In [3]:
# Set up files
dir_firstSess = '../data/corr_firstSess'
files_firstSess = [f for f in os.listdir(dir_firstSess) if f.endswith('.csv')]

dir_secondSess = '../data/corr_secondSess'
files_secondSess = [f for f in os.listdir(dir_secondSess) if f.endswith('.csv')]

numTotalSessions = len(files_firstSess) + len(files_secondSess)

print('Number of total sessions: ', numTotalSessions)
print('Number of subjects who completed the second session: ', len(files_secondSess))

Number of total sessions:  360
Number of subjects who completed the second session:  171


# Data cleaning

## Acquire thresholds for all tasks

### Functions:

In [4]:
def getThresholds(mydir, files, condition_names, num_trials_per_staircase=35,
                    exclude_trial_count_bool=True, exclude_questSD=True):

    all_data = []
    all_checks = []

    numSess = len(files)
    numThresholdsCat = len(condition_names)

    for sess in range(numSess):

        # Read the CSV file
        file_path = os.path.join(mydir, files[sess])
        mainOutput = pd.read_csv(file_path)
        subj_logThresholds = {}

        prolificID = mainOutput['ProlificParticipantID'].dropna().iloc[0]
        subj_logThresholds['prolificID'] = prolificID
                
        for cat in range(numThresholdsCat):

            cond_qualityCheck = {}

            condition_name = condition_names[cat]
                        
            condition_data = mainOutput[mainOutput['conditionName'] == condition_name]

            assert(len(condition_data.questMeanAtEndOfTrialsLoop.dropna()) == 1)
            subj_logThresholds[condition_name] = condition_data.questMeanAtEndOfTrialsLoop.dropna().iloc[0]

            if exclude_trial_count_bool: 
                # Count trials sent to quest
                trial_sent = condition_data['trialGivenToQuest'] 
                num_total_count = len(trial_sent) - 1 # the last line is the line for reporting the threshold         
                num_trial_sent = sum(str(this_trial) == 'True' for this_trial in trial_sent)
                num_trial_not_sent = sum(str(this_trial) == 'False' for this_trial in trial_sent)
                trial_sent_bool = num_trial_sent >= num_trials_per_staircase
                num_missing_line = sum(trial_sent.isna()) - 1
                assert(num_total_count == num_trial_sent + num_trial_not_sent + num_missing_line)

                if 'Warning' in condition_data.columns:
                    warning_bool = not condition_data['Warning'].isna().all() # Check if there are any warnings
                else:
                    warning_bool = False

                if not trial_sent_bool or warning_bool:
                    subj_logThresholds[condition_name] = np.nan
                    cond_qualityCheck['fileName'] = files[sess]
                    cond_qualityCheck['prolificID'] = prolificID
                    cond_qualityCheck['conditionName'] = condition_name
                    cond_qualityCheck['numTrialsMissing'] = num_trial_not_sent
                    cond_qualityCheck['numLinesMissing'] = num_missing_line
                    cond_qualityCheck['warning_bool'] = warning_bool
                    all_checks.append(cond_qualityCheck)

            if exclude_questSD:
                questSD = condition_data['questSDAtEndOfTrialsLoop'].dropna().iloc[0]
                small_questSD_bool = questSD < 0.1
                if not small_questSD_bool:
                    subj_logThresholds[condition_name] = np.nan
                    # print(f'Warning2: large SD (Session {sess}, condition {condition_name}, SD = {questSD})')

        all_data.append(subj_logThresholds)
        
        all_data_df = pd.DataFrame(all_data)
        all_checks_df = pd.DataFrame(all_checks)
        
    return all_data_df, all_checks_df
            


In [8]:
# first session

thresholds_names_sess1 = ['crowding_R8_block1','crowding_L8_block1',
                          'crowding_R8_block2','crowding_L8_block2']
df_firstSess = getThresholds(dir_firstSess, files_firstSess, thresholds_names_sess1, exclude_trial_count_bool=True, exclude_questSD=True)

In [6]:
# second session

thresholds_names_sess2 = ['crowding_R8_block3','crowding_L8_block3',
                          'crowding_R8_block4','crowding_L8_block4']
df_secondSess = getThresholds(dir_secondSess, files_secondSess, thresholds_names_sess2, exclude_trial_count_bool=True, exclude_questSD=True)


In [9]:
# merge data frames
df_both_sessions_warning = pd.concat([df_firstSess[1], df_secondSess[1]], ignore_index=True)
df_both_sessions_warning = df_both_sessions_warning.sort_values(by='prolificID').reset_index(drop=True)

# display(df_both_sessions)
# print(df_both_sessions.columns)

In [10]:
print('Number of thresholds with not enough trials: ', len(df_both_sessions_warning))
print('Number of unique subjects with not enough trials: ', len(df_both_sessions_warning['prolificID'].unique()))
print('Number of unique sessions with not enough trials: ', len(df_both_sessions_warning['fileName'].unique()))



Number of thresholds with not enough trials:  63
Number of unique subjects with not enough trials:  26
Number of unique sessions with not enough trials:  28


In [11]:
df_both_sessions_warning.to_csv('df_both_sessions_warning.csv', index=False)

## Report
From the output, it is shown that for all thresholds/staircases with fewer than 35 trials being sent to quest, none of the incidence happened because of letter falling off the screen. In fact, no one who participanted in this experienment received any warning messages in either sessions.

Only one subject ('67486ef5b88595ed8d20fe7c') experienced this in both sessions 1 and 2.