In [3]:
import pandas as pd
import os
import numpy as np
import re
import matplotlib.pyplot as plt
from statsmodels.stats.anova import AnovaRM
from scipy import stats

make functions to grab subject info and also walk through all files within our data folder and look for behavioral data csv files

In [4]:
# this function gets the subject and run from the file name of the behavioral data file 
def extract_subject_and_run(input_str):
    # Define the regex patterns
    subject_pattern = re.compile(r'SF\d+')
    run_pattern = re.compile(r'run\d+')

    # Search for the patterns in the string
    subject_match = subject_pattern.search(input_str)
    run_match = run_pattern.search(input_str)

    # Extract the matched strings
    subject_number = subject_match.group(0) if subject_match else None
    run_number = run_match.group(0) if run_match else None
    
    return subject_number, run_number

#this function makes a list of our behavioral data filepaths
def find_csv_files(root_folder):
    file_paths = []
    
    # Check if root_folder exists
    if not os.path.exists(root_folder):
        print(f"Error: {root_folder} does not exist!")
        return file_paths
    
    # Walk through root_folder
    for foldername, subfolders, filenames in os.walk(root_folder):
        for file in filenames:
            if file.endswith('.csv') and not file.startswith('.'):  # Avoid hidden files
                file_path = os.path.join(foldername, file)
                # print(f"Appending CSV file: {file_path}")
                file_paths.append(file_path)
    
    return file_paths

use the functions we just made, and create a raw data dataframe

In [9]:
# Specify the main folder
main_folder = '/Users/jinjiang-macair/Library/CloudStorage/Box-Box/CNRISpring24_StabFlexTeam/Data'

# Find .csv files recursively
file_paths = find_csv_files(main_folder)

# initialize empty dataframe to store raw data
raw_data = pd.DataFrame()

for filePath in file_paths:
    df = pd.read_csv(filePath)

    # Extract subject and run information
    subject, run = extract_subject_and_run(filePath)
    df['subject'] = subject
    df['run'] = run
    
    # Append the raw data to the raw data dataframe
    raw_data = pd.concat([raw_data, df])

raw_data

Unnamed: 0,stim,ITI,cuedTask,taskSequence,congruency,corrResp,blockType,trials.thisRepN,trials.thisTrialN,trials.thisN,...,date,expName,psychopyVersion,frameRate,frameDur,Unnamed: 39,subject,thisRow.t,notes,Unnamed: 41
0,,,,,,,,,,,...,2023-06-09_15h26.28.663,stability_flexibility_tradeoff,2022.2.5,59.962859,0.016667,,SF05,,,
1,,,,,,,,,,,...,2023-06-09_15h26.28.663,stability_flexibility_tradeoff,2022.2.5,59.962859,0.016667,,SF05,,,
2,,,,,,,,,,,...,2023-06-09_15h26.28.663,stability_flexibility_tradeoff,2022.2.5,59.962859,0.016667,,SF05,,,
3,1.0,1.5,p,n,c,1.0,C,0.0,0.0,0.0,...,2023-06-09_15h26.28.663,stability_flexibility_tradeoff,2022.2.5,59.962859,0.016667,,SF05,,,
4,8.0,1.0,p,r,c,2.0,C,0.0,1.0,1.0,...,2023-06-09_15h26.28.663,stability_flexibility_tradeoff,2022.2.5,59.962859,0.016667,,SF05,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
127,3.0,2.0,m,s,c,1.0,A,0.0,124.0,124.0,...,2024-02-06_13h27.38.199,stability_flexibility_tradeoff,2022.2.5,60.014031,0.016667,,SF39,,,
128,7.0,2.0,m,r,i,2.0,A,0.0,125.0,125.0,...,2024-02-06_13h27.38.199,stability_flexibility_tradeoff,2022.2.5,60.014031,0.016667,,SF39,,,
129,3.0,1.0,m,r,c,1.0,A,0.0,126.0,126.0,...,2024-02-06_13h27.38.199,stability_flexibility_tradeoff,2022.2.5,60.014031,0.016667,,SF39,,,
130,2.0,1.0,m,r,i,1.0,A,0.0,127.0,127.0,...,2024-02-06_13h27.38.199,stability_flexibility_tradeoff,2022.2.5,60.014031,0.016667,,SF39,,,


In [10]:
file_paths

['/Users/jinjiang-macair/Library/CloudStorage/Box-Box/CNRISpring24_StabFlexTeam/Data/20230609.SF05.SF05/Behavioral/SF05_stability_flexibility_tradeoff_run4_2023-06-09_15h26.28.663.csv',
 '/Users/jinjiang-macair/Library/CloudStorage/Box-Box/CNRISpring24_StabFlexTeam/Data/20230609.SF05.SF05/Behavioral/SF05_stability_flexibility_tradeoff_run3_2023-06-09_15h16.48.879.csv',
 '/Users/jinjiang-macair/Library/CloudStorage/Box-Box/CNRISpring24_StabFlexTeam/Data/20230609.SF05.SF05/Behavioral/SF05_stability_flexibility_tradeoff_run2_2023-06-09_15h05.35.381.csv',
 '/Users/jinjiang-macair/Library/CloudStorage/Box-Box/CNRISpring24_StabFlexTeam/Data/20230609.SF05.SF05/Behavioral/SF05_stability_flexibility_tradeoff_run1_2023-06-09_14h45.12.881.csv',
 '/Users/jinjiang-macair/Library/CloudStorage/Box-Box/CNRISpring24_StabFlexTeam/Data/20231006.SF21.SF21/Behavioral/SF21_stability_flexibility_tradeoff_run2_2023-10-06_14h09.04.530.csv',
 '/Users/jinjiang-macair/Library/CloudStorage/Box-Box/CNRISpring24_Sta

this is just to print out the column names

In [11]:
raw_data.columns.values.tolist()

['stim',
 'ITI',
 'cuedTask',
 'taskSequence',
 'congruency',
 'corrResp',
 'blockType',
 'trials.thisRepN',
 'trials.thisTrialN',
 'trials.thisN',
 'trials.thisIndex',
 'main2_start',
 'key_pressed',
 'key_RT',
 'routine_end',
 'scanner_start',
 'run',
 'fixation_start',
 'fixation_start_rel',
 'fixation_end_rel',
 'stimulus_start',
 'stimulus_start_rel',
 'stimulus_end_rel',
 'acc',
 'corrResp.1',
 'feedback_start',
 'feedback_start_rel',
 'feedback_end_rel',
 'blockType.1',
 'blockBreak_start',
 'blockBreak_start_rel',
 'blockBreak_end_rel',
 'Participant',
 'Run (1-4)',
 'date',
 'expName',
 'psychopyVersion',
 'frameRate',
 'frameDur',
 'Unnamed: 39',
 'subject',
 'thisRow.t',
 'notes',
 'Unnamed: 41']

now you get the average accuracy and reaction time for each subject :)  
here's the pandas documentation: https://pandas.pydata.org/pandas-docs/stable/index.html  
but also ask chatpgt and stackoverflow for help  
HINT: try the groupby function from pandas

get accuracy and rt

accuracy df is just all of the accurate trials

In [13]:
accuracy_df = (raw_data['acc'] == 1)

In [14]:
# Filter the rows where 'acc' is 1
correct_trials = raw_data[accuracy_df]

# Step 1: Overall average acc and key_RT for each subject
overall_avg_acc = raw_data.groupby('subject')['acc'].mean()
overall_avg_key_RT = correct_trials.groupby('subject')['key_RT'].mean() #only get RT on correct trials

# Convert each Series to a DataFrame
overall_avg_acc_df = overall_avg_acc.to_frame(name='acc')
overall_avg_key_RT_df = overall_avg_key_RT.to_frame(name='key_RT')

overall_avg = pd.concat([overall_avg_acc_df, overall_avg_key_RT_df], axis=1)

In [26]:
overall_avg

Unnamed: 0_level_0,acc,key_RT
subject,Unnamed: 1_level_1,Unnamed: 2_level_1
SF01,0.876953,0.77504
SF02,0.875,0.917387
SF03,0.960938,0.769149
SF04,0.912109,0.873968
SF05,0.734375,0.924129
SF06,0.988281,0.814678
SF07,0.943359,0.893543
SF08,0.958984,0.819765
SF09,0.962891,0.780295
SF10,0.878906,0.873355


congruency effect

In [22]:
# Calculate the average RT for correct trials, grouped by subject and congruency
congruency_avg = correct_trials.groupby(['subject', 'congruency'])['key_RT'].mean()

# Unstack the congruency level of the index to get 'i' and 'c' as separate columns
congruency_avg_unstacked = congruency_avg.unstack()

In [23]:
congruency_avg

subject  congruency
SF01     c             0.725651
         i             0.833299
SF02     c             0.883324
         i             0.954634
SF03     c             0.695559
                         ...   
SF37     i             0.948813
SF39     c             0.959107
         i             0.934942
SF40     c             0.764731
         i             0.806893
Name: key_RT, Length: 76, dtype: float64

In [24]:
congruency_avg_unstacked

congruency,c,i
subject,Unnamed: 1_level_1,Unnamed: 2_level_1
SF01,0.725651,0.833299
SF02,0.883324,0.954634
SF03,0.695559,0.84705
SF04,0.833409,0.917219
SF05,0.88276,0.978777
SF06,0.775361,0.854622
SF07,0.836868,0.952859
SF08,0.770506,0.872558
SF09,0.730931,0.832758
SF10,0.86114,0.887191


In [28]:
 # Calculate the congruency effect as the difference between incongruent and congruent RTs
 # and add this column to our overall avg dataframe
overall_avg['congruency_effect'] = congruency_avg_unstacked['i'] - congruency_avg_unstacked['c']

# our overall avg dataframe now has a 'congruency_effect' column for each subject
overall_avg

Unnamed: 0_level_0,acc,key_RT,congruency_effect
subject,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
SF01,0.876953,0.77504,0.107648
SF02,0.875,0.917387,0.071311
SF03,0.960938,0.769149,0.151491
SF04,0.912109,0.873968,0.08381
SF05,0.734375,0.924129,0.096017
SF06,0.988281,0.814678,0.079261
SF07,0.943359,0.893543,0.115991
SF08,0.958984,0.819765,0.102053
SF09,0.962891,0.780295,0.101826
SF10,0.878906,0.873355,0.026051


### get listwide proportion congruent effect
first get the avg rt for each block type for both incongruent and incongruent  

block proportions:  
A: 0.25 congruent, 0.25 switch.  
B: 0.25 congruent, 0.75 switch.  
C: 0.75 congruent, 0.25 switch.  
D: 0.75 congruent, 0.75 switch.  

In [37]:
# Step 3: LWPC effect
# Group the data and then unstack
congruency_and_block_type = correct_trials.groupby(['subject', 'blockType', 'congruency'])['key_RT'].mean()
congruency_and_block_type_unstacked = congruency_and_block_type.unstack(level=['blockType', 'congruency']) #this df has RT by block type and congruency, can use for plotting the block types against each other for congruency

In [38]:
congruency_and_block_type

subject  blockType  congruency
SF01     A          c             0.744833
                    i             0.812455
         B          c             0.825798
                    i             0.887126
         C          c             0.663771
                                    ...   
SF40     B          i             0.815193
         C          c             0.750663
                    i             0.791934
         D          c             0.773234
                    i             0.792327
Name: key_RT, Length: 304, dtype: float64

In [39]:
congruency_and_block_type_unstacked

blockType,A,A,B,B,C,C,D,D
congruency,c,i,c,i,c,i,c,i
subject,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
SF01,0.744833,0.812455,0.825798,0.887126,0.663771,0.747537,0.74617,0.826215
SF02,0.8952,0.941414,0.916627,0.967559,0.845152,0.902734,0.907385,1.017206
SF03,0.684137,0.837116,0.712594,0.821194,0.658829,0.873606,0.729831,0.935863
SF04,0.828355,0.898364,0.92997,0.930391,0.795794,0.904905,0.840577,0.95238
SF05,0.860924,0.980014,0.958929,0.996739,0.862242,0.944501,0.888325,0.950173
SF06,0.84941,0.825586,0.772321,0.888333,0.77032,0.849756,0.756701,0.844394
SF07,0.869958,0.953604,0.975295,0.961167,0.774597,0.935221,0.841606,0.943581
SF08,0.82186,0.827629,0.841475,0.893882,0.748619,0.904992,0.752165,0.916787
SF09,0.762968,0.812676,0.791063,0.860154,0.711162,0.777816,0.72123,0.867585
SF10,0.802838,0.855362,0.905449,0.912434,0.8203,0.824532,0.909195,0.99402


actually calculate lwpc effect

congruency effect in high congruency blocks - congruency effect in low congruency block  
1. blocks C and D are high congruency  
2. blocks A and B are low congruency  
3. So we can treat blocks C and D as one big high congruency block and blocks A and B as one big low congruency block  
4. Thus we can find the congruency effect for the combined high congruency block and subtract the congruency effect for the combined low congruency block  
(C_i + D_i - C_c - D_c) - (A_i + B_i - A_c - B_c)

In [42]:
incongruent_avg_high_con_blocks = congruency_and_block_type_unstacked['C']['i'] + congruency_and_block_type_unstacked['D']['i']
congruent_avg_high_con_blocks = congruency_and_block_type_unstacked['C']['c'] + congruency_and_block_type_unstacked['D']['c']
incongruent_avg_low_con_blocks = congruency_and_block_type_unstacked['A']['i'] + congruency_and_block_type_unstacked['B']['i']
congruent_avg_low_con_blocks = congruency_and_block_type_unstacked['A']['c'] + congruency_and_block_type_unstacked['B']['c']

overall_avg['lwpc_effect'] = (incongruent_avg_high_con_blocks - congruent_avg_high_con_blocks) - (incongruent_avg_low_con_blocks - congruent_avg_low_con_blocks)


In [43]:
overall_avg

Unnamed: 0_level_0,acc,key_RT,congruency_effect,lwpc_effect
subject,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
SF01,0.876953,0.77504,0.107648,0.034861
SF02,0.875,0.917387,0.071311,0.070257
SF03,0.960938,0.769149,0.151491,0.159229
SF04,0.912109,0.873968,0.08381,0.150484
SF05,0.734375,0.924129,0.096017,-0.012793
SF06,0.988281,0.814678,0.079261,0.074941
SF07,0.943359,0.893543,0.115991,0.193081
SF08,0.958984,0.819765,0.102053,0.26282
SF09,0.962891,0.780295,0.101826,0.09421
SF10,0.878906,0.873355,0.026051,0.029548


In [45]:
# Select only numeric columns from the DataFrame (basically skip the subjects column cuz its all strings and cant take the average of strings)
overall_avg_numeric = overall_avg.select_dtypes(include=[np.number])

# Calculate the mean across subjects for each numeric column
means = overall_avg_numeric.mean()

# Calculate the standard error of the mean (SEM) for each numeric column
sem = overall_avg_numeric.sem()

print("Means:\n", means)
print("\nStandard Errors:\n", sem)

Means:
 acc                  0.876285
key_RT               0.854841
congruency_effect    0.096497
lwpc_effect          0.100008
dtype: float64

Standard Errors:
 acc                  0.017599
key_RT               0.012852
congruency_effect    0.008521
lwpc_effect          0.017255
dtype: float64
