In [1]:
import pandas as pd 
import numpy as np 
import pathlib as path
import matplotlib.pyplot as plt
import random
from PIL import Image
from random import sample

In [82]:


PATH_TO_DATA = path.Path(r'E:\working mem represntations pilot\downloaded data') #this is where all downladed data exists (after extracting the zip file)
PATH_TO_PROCESSED = path.Path(r'E:\working mem represntations pilot\processed data') #this will define the name of the location to save the processed csv files (note that it will also create a "batch" directory)




batch_name='batch1' #defines the name of the current subfolder to work on

PATH_TO_BATCH=PATH_TO_DATA / batch_name

#check how many participants are in the folder: 
participants_list=[file.name for file in PATH_TO_BATCH.iterdir() if '.csv' in file.name]
participants_list
print(f'# participants in batch: {len(participants_list)}')

# participants in batch: 4


In [85]:

def process_worker_results(PATH_TO_BATCH,subject_name):
    cur_sub=pd.read_csv(PATH_TO_BATCH / subject_name)
    sub_demographics=cur_sub[['workID',	'Age'	,'Gender']].iloc[0]

    #extract demo related information: 

    #if participant did not click on the arrow during the demo a 'demo_encoding_response.rt' colmumn wont exists: thus we will create one and fill it with nans (so everything will be consistent with other participants)
    if not('demo_encoding_response.rt' in cur_sub.columns):
        cur_sub['demo_encoding_response.rt']=np.nan
    demo_columns=['demo_encoding_loop.thisTrialN','DemoImage','DemoCorrect','demo_encoding_response.rt']

    sub_demo_information=cur_sub[demo_columns]
    empty_inds=sub_demo_information.loc[sub_demo_information.isnull().apply(lambda x: all(x), axis=1)].index
    #drop irrelevant columns: 
    sub_demo_information=sub_demo_information.drop(index=empty_inds)


    #extract the demo test columns: 
    demo_test_columns=['demo_test_response.keys','demo_test_response.corr','demo_test_response.rt','demo_test_loop.thisTrialN','DemoImage1','DemoImage2','DemoCorrectTest']
    sub_demo_test_information=cur_sub[demo_test_columns]
    empty_inds=sub_demo_test_information.loc[sub_demo_test_information.isnull().apply(lambda x: all(x), axis=1)].index
    #drop irrelevant columns: 
    sub_demo_test_information=sub_demo_test_information.drop(index=empty_inds)

    demo_df=pd.concat([sub_demo_information.reset_index(),sub_demo_test_information.reset_index()],axis=1)


    #extract real experiment related information: 
    encoding_related_columns=['test_encoding_response.keys','test_encoding_response.corr','trials.thisTrialN','target_image','pair','layer','correct','test_encoding_response.rt','key_resp_end.keys']
    sub_encoding_information=cur_sub[encoding_related_columns]
    #encoding section ends with a key press of the space key: so seperate this phase by finding this space key row
    end_of_section_ind=np.where(sub_encoding_information['key_resp_end.keys']=='space')[0][0]
    sub_encoding_information=sub_encoding_information.iloc[0:end_of_section_ind]
    #remove all the rows that precede the real encoding phase: 
    empty_inds=sub_encoding_information.loc[sub_encoding_information.isnull().apply(lambda x: all(x), axis=1)].index
    sub_encoding_information=sub_encoding_information.drop(index=empty_inds).reset_index()

    #extract real experiment TEST related information: 
    test_related_columns=['layer','correct','test_test_response.keys','test_test_response.corr','test_test_response.rt','trials_2.thisRepN','trials_2.thisTrialN','trials_2.thisN','trials_2.thisIndex','trials_2.ran','image1','image2']
    sub_test_information=cur_sub[test_related_columns].iloc[end_of_section_ind+2:-1]


    subject_dictionary=dict()
    subject_dictionary['demographics']=sub_demographics
    subject_dictionary['demo_df']=demo_df
    subject_dictionary['encoding_df']=sub_encoding_information
    subject_dictionary['test_df']=sub_test_information

    return subject_dictionary



In [95]:
all_subjects_demo_df=pd.DataFrame()
all_subjects_encoding_df=pd.DataFrame()
all_subjects_test_df=pd.DataFrame()
all_subjects_biographics_df=pd.DataFrame()

for subject_name in participants_list:
    curr_subject_dictionary=process_worker_results(PATH_TO_BATCH,subject_name)
    curr_demo_df=curr_subject_dictionary['demo_df']
    curr_demo_df['subject']=subject_name
    curr_test_df=curr_subject_dictionary['test_df']
    curr_test_df['subject']=subject_name
    curr_encoding_df=curr_subject_dictionary['encoding_df']
    curr_encoding_df['subject']=subject_name
    curr_demographics_df=curr_subject_dictionary['demographics']
    curr_demographics_df['subject']=subject_name

    all_subjects_demo_df=pd.concat([all_subjects_demo_df,curr_demo_df],axis=0,ignore_index=True)
    all_subjects_encoding_df=pd.concat([all_subjects_encoding_df,curr_encoding_df],axis=0,ignore_index=True)
    all_subjects_test_df=pd.concat([all_subjects_test_df,curr_test_df],axis=0,ignore_index=True)
    all_subjects_biographics_df=pd.concat([all_subjects_biographics_df,pd.DataFrame(curr_demographics_df).T],axis=0,ignore_index=True)


all_subjects_demo_df.to_csv(PATH_TO_PROCESSED / 'all_subjects_demo_df.csv')
all_subjects_encoding_df.to_csv(PATH_TO_PROCESSED / 'all_subjects_encoding_df.csv')
all_subjects_test_df.to_csv(PATH_TO_PROCESSED / 'all_subjects_test_df.csv')
all_subjects_biographics_df.to_csv(PATH_TO_PROCESSED / 'all_subjects_biographics_df.csv')

In [96]:
all_subjects_biographics_df

Unnamed: 0,workID,Age,Gender,subject
0,555,30.0,him/they,555_2022-09-15_12h04.41.467.csv
1,,,,2022-09-13_16h02.16.028.csv
2,maya,,,maya_2022-09-15_12h10.00.662.csv
3,nitzan,,,nitzan_2022-09-15_12h24.28.998.csv


In [97]:
all_subjects_demo_df

Unnamed: 0,index,demo_encoding_loop.thisTrialN,DemoImage,DemoCorrect,demo_encoding_response.rt,index.1,demo_test_response.keys,demo_test_response.corr,demo_test_response.rt,demo_test_loop.thisTrialN,DemoImage1,DemoImage2,DemoCorrectTest,subject
0,4,0.0,flower1.jpg,,,12.0,left,1.0,5.1422,0.0,flower1.jpg,flower1_pair.jpg,left,555_2022-09-15_12h04.41.467.csv
1,5,1.0,flower2.jpg,,,13.0,right,1.0,0.3427,1.0,flower2_pair.jpg,flower2.jpg,right,555_2022-09-15_12h04.41.467.csv
2,6,2.0,left.jpg,left,,14.0,left,1.0,0.1999,2.0,flower3.jpg,flower3_pair.jpg,left,555_2022-09-15_12h04.41.467.csv
3,7,3.0,flower3.jpg,,,15.0,right,0.0,0.0918,3.0,flower4.jpg,flower4_pair.jpg,left,555_2022-09-15_12h04.41.467.csv
4,8,4.0,flower4.jpg,,,16.0,left,0.0,0.0432,4.0,flower5_pair.jpg,flower5.jpg,right,555_2022-09-15_12h04.41.467.csv
5,9,5.0,flower5.jpg,,,17.0,right,0.0,0.0942,5.0,flower6.jpg,flower6_pair.jpg,left,555_2022-09-15_12h04.41.467.csv
6,10,6.0,flower6.jpg,,,,,,,,,,,555_2022-09-15_12h04.41.467.csv
7,4,0.0,flower1.jpg,,,12.0,left,1.0,12.4292,0.0,flower1.jpg,flower1_pair.jpg,left,2022-09-13_16h02.16.028.csv
8,5,1.0,flower2.jpg,,,13.0,left,0.0,2.371,1.0,flower2_pair.jpg,flower2.jpg,right,2022-09-13_16h02.16.028.csv
9,6,2.0,left.jpg,left,0.642,14.0,right,0.0,0.7846,2.0,flower3.jpg,flower3_pair.jpg,left,2022-09-13_16h02.16.028.csv


In [99]:
all_subjects_encoding_df

Unnamed: 0,index,test_encoding_response.keys,test_encoding_response.corr,trials.thisTrialN,target_image,pair,layer,correct,test_encoding_response.rt,key_resp_end.keys,subject
0,20,,1.0,0.0,2901941401-292798.jpg,2898618327-366317.jpg,1.0,,,,555_2022-09-15_12h04.41.467.csv
1,21,,1.0,1.0,2964222378-71332.jpg,2981240427-370099.jpg,3.0,,,,555_2022-09-15_12h04.41.467.csv
2,22,,1.0,2.0,2981241414-145842.jpg,2427873848-47312.jpg,3.0,,,,555_2022-09-15_12h04.41.467.csv
3,23,,1.0,3.0,2901947423-70311.jpg,2860312398-62572.jpg,1.0,,,,555_2022-09-15_12h04.41.467.csv
4,24,,1.0,4.0,2874299429-139694.jpg,2986447306-296584.jpg,1.0,,,,555_2022-09-15_12h04.41.467.csv
...,...,...,...,...,...,...,...,...,...,...,...
254,80,,1.0,60.0,2864907432-362042.jpg,2864901420-212147.jpg,1.0,,,,nitzan_2022-09-15_12h24.28.998.csv
255,81,,1.0,61.0,2964222378-71332.jpg,2901933382-367209.jpg,2.0,,,,nitzan_2022-09-15_12h24.28.998.csv
256,82,,1.0,62.0,2901947423-70311.jpg,2238577590-185467.jpg,3.0,,,,nitzan_2022-09-15_12h24.28.998.csv
257,83,,1.0,63.0,2986451363-73599.jpg,2860302315-210704.jpg,2.0,,,,nitzan_2022-09-15_12h24.28.998.csv


In [98]:
all_subjects_test_df

Unnamed: 0,layer,correct,test_test_response.keys,test_test_response.corr,test_test_response.rt,trials_2.thisRepN,trials_2.thisTrialN,trials_2.thisN,trials_2.thisIndex,trials_2.ran,image1,image2,subject
0,1.0,left,right,0.0,0.8423,0.0,0.0,0.0,0.0,1.0,2460547467-349422.jpg,2898618440-216545.jpg,555_2022-09-15_12h04.41.467.csv
1,2.0,right,left,0.0,0.6252,0.0,1.0,1.0,1.0,1.0,2898618440-216547.jpg,2446759803-49249.jpg,555_2022-09-15_12h04.41.467.csv
2,1.0,right,right,1.0,0.5758,0.0,2.0,2.0,2.0,1.0,2984626312-370849.jpg,2883268412-365898.jpg,555_2022-09-15_12h04.41.467.csv
3,2.0,right,left,0.0,0.5506,0.0,3.0,3.0,3.0,1.0,2460547508-51572.jpg,2883273443-68193.jpg,555_2022-09-15_12h04.41.467.csv
4,1.0,left,right,0.0,0.4692,0.0,4.0,4.0,4.0,1.0,2901941401-292798.jpg,2898618327-366317.jpg,555_2022-09-15_12h04.41.467.csv
...,...,...,...,...,...,...,...,...,...,...,...,...,...
235,2.0,left,right,0.0,0.0683,0.0,55.0,55.0,55.0,1.0,2874302418-65978.jpg,2238481198-19823.jpg,nitzan_2022-09-15_12h24.28.998.csv
236,3.0,left,left,1.0,0.0835,0.0,56.0,56.0,56.0,1.0,2452380832-348593.jpg,2237858942-299356.jpg,nitzan_2022-09-15_12h24.28.998.csv
237,2.0,left,right,0.0,0.0671,0.0,57.0,57.0,57.0,1.0,2981231403-71642.jpg,2874305405-214397.jpg,nitzan_2022-09-15_12h24.28.998.csv
238,1.0,right,left,0.0,0.0390,0.0,58.0,58.0,58.0,1.0,2901942428-69919.jpg,2901939379-143480.jpg,nitzan_2022-09-15_12h24.28.998.csv
