In [1]:
# default_exp datasets

# PEERS Dataset
The Penn Electrophysiology of Encoding and Retrieval Study (PEERS) is a multi-session experiment looking at scalp EEG during free recall and recognition. 

The data has been used in several publications including:

- Lohnas, L. J., Polyn, S. M., and Kahana, M. J. Expanding the scope of memory search: Intralist and interlist effects in free recall. Psychological Review. 
- Long, N. M., Danoff, M. S., and Kahana, M. J. Recall dynamics reveal the retrieval of emotional context. Psychonomic Bulletin and Review. 
- Healey, M. K., Crutchley, P., and Kahana, M. J. (2014). Individual differences in memory search and their relation to intelligence. Journal of Experimental Psychology: General, 143(4), 1553–1569
- Healey, M. K. and Kahana, M. J. (2014). Is memory search governed by universal principles or idiosyncratic strategies? Journal of Experimental Psychology: General, 143, 575–596. 
- Lohnas, L. J. and Kahana, M. J. (2014). Compound cuing in free recall. Journal of Experimental Psychology: Learning, Memory and Cognition, 40(1), 12-24.
- Long, N. M., Burke, J. F., and Kahana, M. J. (2014). Subsequent memory effect in intracranial and scalp eeg. NeuroImage, 84, 488–494.  

In [1]:
import os
import pandas as pd
from tqdm import trange
from psifr import fr
import numpy as np

def prepare_peers_data(data_path):
    
    # build list of subject directories that excludes older subjects
    with open(os.path.join(data_path, 'PEERS_older_adult_subject_list.txt')) as f:
        older_subjects = [each for each in f.read().split('\n')[:-1]]

    with open(os.path.join(data_path, 'healkaha2015_subjects.txt')) as f:
        selected_subjects = np.unique(np.array([int(each) for each in f.read().split('\n')[:-1]])).tolist()
        
    subject_dirs = []
    for each in os.listdir(data_path):
        try:
            int(each[-3:])
            if int(each[-3:]) in selected_subjects:
                subject_dirs.append(each)
        except:
            pass

    # throw error if we don't find as many directories as we've selected subjects
    #assert(len(subject_dirs) == len(selected_subjects))
    
    #TODO: use separate columns for encoding and recall response/rts
    table_columns = [
        'subject', 'list', 'trial_type', 'position', 'item', 'item_string_index',
        'item_string', 'subject_id', 'older', 'session', 'session_list', 'task', 'response', 'response_time']
    table = []
        
    # loop through subjects
    for subject_index in trange(len(subject_dirs)):
        subject_dir = subject_dirs[subject_index]
        subject_id = int(subject_dir[-3:])
        older = 1 if subject_dir[-3:] in older_subjects else 0
        subject_list_index = -1
    
        # loop through sessions
        session_dirs = [
            each for each in os.listdir(os.path.join(data_path, subject_dir)) 
            if each[:7] == 'session']
        
        for session_index, session_dir in enumerate(session_dirs):

            # # skip practice runs
            # if session_index == 0:
            #     continue

            # load session log file
            session_log_path = os.path.join(data_path, subject_dir, session_dir, 'session.log')
            try:
                with open(session_log_path) as f:
                    session_log = f.read().split('\n')[:-1]
            except FileNotFoundError:
                continue

            # identify study events in session log file
            subject_list_index += 1
            prior_list_index = -1
            study_position = 0
            presentation = []
            recalled = []
            presentation_entries = []
            recall_entries = []
            for line_index, line in enumerate(session_log):

                if '\t' not in line:
                    continue

                line = line.split('\t')
                event_type = line[2]
                if (event_type != 'FR_PRES') and (event_type != 'SESS_END'):
                    continue
                
                # if we just transitioned to a new list_index, we need to reset the study position
                # we also need to add the previous list's corresponding recall events
                if event_type != 'SESS_END':
                    current_list_index = int(line[3]) + 1
                if (current_list_index != prior_list_index) or (event_type == 'SESS_END'):
                    
                    try:
                        with open(os.path.join(
                            data_path, subject_dir, session_dir, f'{prior_list_index-1}.par')) as f:
                            recall_events = f.read().split('\n')[:-1]
                            
                        recall_entries = []
                        recall_index = 0
                        for recall_event in recall_events:
                            recall_event = recall_event.split('\t')

                            response_time = int(recall_event[0])
                            item_string_index = int(recall_event[1])
                            item_string = recall_event[2]

                            if (item_string_index in presentation) and (item_string_index not in recalled):
                                recall_index += 1
                                entry = [
                                    subject_index, subject_list_index, 'recall', recall_index, 
                                    presentation.index(item_string_index)+1, item_string_index, 
                                    item_string, subject_id, older, session_index, prior_list_index, task, '-1', response_time
                                ]
                                recalled.append(item_string_index)
                                recall_entries.append(entry)

                    except FileNotFoundError:
                        recall_entries = []

                    if len(presentation_entries) > 0:
                        table += presentation_entries
                        table += recall_entries

                    if event_type == 'SESS_END':
                        break

                    study_position = 0
                    subject_list_index += 1
                    presentation = []
                    recalled = []
                    presentation_entries = []
                    recall_entries = []

                prior_list_index = current_list_index
                item_string = line[4]
                item_string_index = int(line[5])
                task = int(line[6])
                response = int(line[7])
                response_time = int(line[8])
                study_position += 1
                presentation.append(item_string_index)
                entry = [
                    subject_index, subject_list_index, 'study', study_position, 
                    presentation.index(item_string_index)+1, item_string_index, 
                    item_string, subject_id, older, session_index, current_list_index, task, response, response_time
                    ]
                presentation_entries.append(entry)
            
    data = pd.DataFrame(table, columns=table_columns)
    return fr.merge_free_recall(data, list_keys=table_columns[5:-2])

In [2]:
try:
    assert(False)
    print('loading PEERS data...')
    events = pd.read_csv('../../data/PEERS.csv')
except:
    print('preparing PEERS data...')
    events = prepare_peers_data('../../data/ltpFR')
    events.to_csv('../../data/PEERS.csv', index=False)

events

  0%|          | 0/126 [00:00<?, ?it/s]

preparing PEERS data...


100%|██████████| 126/126 [03:15<00:00,  1.55s/it]


Unnamed: 0,subject,list,item,input,output,study,recall,repeat,intrusion,item_string_index,item_string,subject_id,older,session,session_list,task
0,0,1,1,1.0,6.0,True,True,0,False,1062,PIE,63,0,0,1,-1
1,0,1,2,2.0,7.0,True,True,0,False,219,CAKE,63,0,0,1,-1
2,0,1,3,3.0,,True,False,0,False,779,KITE,63,0,0,1,-1
3,0,1,4,4.0,,True,False,0,False,148,BONE,63,0,0,1,-1
4,0,1,5,5.0,,True,False,0,False,668,GUARDIAN,63,0,0,1,-1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
620670,125,305,3,,3.0,False,True,0,True,568,FINGERNAIL,244,0,19,12,1
620671,125,305,4,,4.0,False,True,0,True,936,NAIL,244,0,19,12,1
620672,125,305,2,,5.0,False,True,0,True,1332,SNAIL,244,0,19,12,1
620673,125,305,9,,7.0,False,True,0,True,325,CLOCK,244,0,19,12,1


## Tests

In [24]:
from compmemlearn.analyses import fast_spc
from compmemlearn.datasets import events_metadata

query= 'task == -1 & session < 9'
trials, list_lengths, presentations, trial_details = events_metadata(events, query)

print(len(pd.unique(events.query(query).subject)))
np.shape(trials), np.shape(presentations)

126


((2228, 16), (2228, 16))

In [20]:
fast_spc(trials, 16)

array([0.58670171, 0.55809317, 0.5234729 , 0.50274908, 0.48359616,
       0.46704127, 0.45538034, 0.45139267, 0.44664975, 0.45332608,
       0.4605764 , 0.47658752, 0.51374539, 0.56047973, 0.66264878,
       0.75950094])

In [21]:
from compmemlearn.analyses import fast_pfr

fast_pfr(trials, 16)

array([0.07742735, 0.01767265, 0.0119026 , 0.00942541, 0.0084587 ,
       0.00800556, 0.00933478, 0.00942541, 0.01123799, 0.01389644,
       0.02153948, 0.02791372, 0.05525346, 0.07211045, 0.14917528,
       0.3695245 ])

In [13]:
from compmemlearn.analyses import fast_crp

fast_crp(trials, 16)

array([0.07282639, 0.04835114, 0.04615385, 0.04428743, 0.04364447,
       0.04858654, 0.04837388, 0.05303902, 0.05283498, 0.0572185 ,
       0.05923743, 0.06654916, 0.08025741, 0.10246541, 0.23808468,
       0.        , 0.37221208, 0.12036436, 0.08848488, 0.0736083 ,
       0.06388771, 0.06117692, 0.05778673, 0.05494857, 0.05081594,
       0.04623411, 0.0455164 , 0.0443447 , 0.04673968, 0.04366257,
       0.04914005])

## Notes

It recruits both younger adults (16-30) and older adults (60-90). For now, we exclusively use the free recall trials performed by younger adult participants. A file `._PEERS_older_adult_subject_list.txt` identifies which subjects are older.

`ltpFR_lsa.mat` identifies LSA values used for the semantic analyses. I'd have to open it in MATLAB to check out.

Within the ltpFR directory, there is a subdirectory for each subject in PEERS (e.g. `ltpFR/LTP063`). Within each subjects directory there is a subdirectory for each of their experimental sessions (e.g., `ltpFR/LTP063/session_0`). Within each session directory you will find two types of files: `.par` and `.log`

`.log `files record details of every stimuli presented to the subject and can be used to determine which words were presented on each list. Each column gives a different piece of information:  
1 - clock time  
2 - NA  
3 - event type   
4 - list number  
5 - word string 
6 - word number. Corresponds to 1:1638 in word pool.  
7 - task. Ranges between -1 and 1 in FR_PRES events. -1 probably indicates no task. 0 and 1 are distinct tasks.  
8 - response. Ranges between -1 and 3 or receives value -999 in FR_PRES events. When -1, task is -1, so I guess -1 means "null" while -999 means "no response".  
9 - RT. Positive when task is 0 or 1.  
 

`.par` files record details of participants vocal responses. `0.par` - `15.par` **are for free recall lists**. `ffr.par` is for the final free recall period. `r0.par` - `r15.par` **are for the recognition task**. 

Within these files...
- The first column gives the reaction time, in ms relative to the beginning of the recall period. 
- The second column gives item number of the recalled period (corresponds to columns/rows of lsa.mat). 
- The third column gives the word itself.

Potential persistent problems with this code:
- Some indices are skipped
- Item indices aren't "the first input position of this item"
- Inconsistent number of trials per subject
- We are discarding intrusions (that's normal, I think)

### Example File Contents
Example `0.par` file:
```
2050	701	HELICOPTER
3671	969	OBOE
6176	475	DISHWASHER
8426	785	KNIFE
18054	1411	SUPPER
40029	1062	PIE
41620	219	CAKE
```

Example `ffr.par` file:
```
2080	492	DRAGON
4185	1255	SCREWDRIVER
5185	-1	VV
6769	452	DESK
10072	-1	SCENT
13349	116	BEETLE
15225	188	BUGGY
19685	22	ANGEL
21451	1263	SEASHORE
23771	1627	WRITER
26944	1581	WAITRESS
28590	299	CHILD
30910	1634	YOUTH
35434	1442	TEAPOT
37165	1439	TEA
39395	424	CUP
40867	930	MUG
42997	419	CUB
44668	315	CLAM
47522	971	OCTOPUS
50854	1302	SICKLE
52361	872	MARROW
54930	38	ARM
56022	809	LEG
57100	1579	WAIST
59095	589	FOLLOWER
60243	409	CRIMINAL
61161	989	OUTLAW
66758	1415	SUSPECT
68609	277	CHAMBER
77329	220	CALCULATOR
78287	1216	RULER
80941	888	MESSAGE
81689	479	DOCUMENT
82916	540	ESSAY
90280	488	DORM
91362	111	BEDROOM
98705	1598	WEB
102522	1224	SALAD
103522	-1	VV
105345	1352	SPINACH
108678	1083	PLANT
110923	168	BRANCH
112285	841	LOG
120690	-1	VV
122541	109	BEAVER
123324	1107	PORCUPINE
137837	758	JOINT
140101	809	LEG
142875	1250	SCISSORS
152758	334	COB
169001	680	HAMMER
172563	366	CONTAINER
173700	817	LID
181053	1227	SALMON
189634	1373	STAR
194862	177	BROOK
204560	1532	TYPIST
209204	-1	SCENT
217156	512	EAGLE
220015	129	BIRD
233749	123	BEVERAGE
291303	1294	SHOULDER
296062	1320	SKULL
```

Example `r0.par` file:
```
2140	1	Y
3427	7	5
5143	1	Y
6989	7	5
10261	2	N
12975	7	5
16557	2	N
18912	6	4
22544	2	N
25108	7	5
28146	1	Y
30017	7	5
33239	2	N
36701	6	4
39959	1	Y
41840	6	4
45057	2	N
46769	5	3
49921	1	Y
51667	6	4
55593	1	Y
57464	5	3
59979	1	Y
61375	7	5
63885	1	Y
65356	6	4
68434	1	Y
70066	5	3
72435	1	Y
73897	7	5
76805	1	Y
78367	6	4
80931	1	Y
82537	6	4
84992	1	Y
86528	7	5
88938	1	Y
90439	7	5
93093	1	Y
94093	-1	VV
95328	5	3
98296	2	N
100247	6	4
```

`Math.log` files seem to identify discrete sessions. 

```
1297282002223	0	B	Logging Begins
1297281991179	0	E	Logging Ends
1297283068667	0	B	Logging Begins
1297288000957	0	E	Logging Ends
```

`session.log` files are too long to paste here...

```
1297282002222	0	B	Logging Begins
1297282002223	0	SESS_START	1
1297281991179	0	E	Logging Ends
1297283068667	0	B	Logging Begins
1297283068667	0	SESS_START	1
1297283143161	1	FR_PRES	0	PIE	1062	-1	-1	-1297283143161	white	../fonts/Verdana.ttf	upper
1297283147018	1	FR_PRES	0	CAKE	219	-1	-1	-1297283147018	white	../fonts/Verdana.ttf	upper
1297283151099	1	FR_PRES	0	KITE	779	-1	-1	-1297283151099	white	../fonts/Verdana.ttf	upper
1297283155056	1	FR_PRES	0	BONE	148	-1	-1	-1297283155056	white	../fonts/Verdana.ttf	upper
1297283158887	1	FR_PRES	0	GUARDIAN	668	-1	-1	-1297283158887	white	../fonts/Verdana.ttf	upper
1297283162928	1	FR_PRES	0	THIMBLE	1458	-1	-1	-1297283162928	white	../fonts/Verdana.ttf	upper
1297283166851	1	FR_PRES	0	HAYSTACK	694	-1	-1	-1297283166851	white	../fonts/Verdana.ttf	upper
1297283170783	1	FR_PRES	0	ZOO	1637	-1	-1	-1297283170783	white	../fonts/Verdana.ttf	upper
1297283174907	1	FR_PRES	0	DISHWASHER	475	-1	-1	-1297283174907	white	../fonts/Verdana.ttf	upper
1297283178722	1	FR_PRES	0	TAPE	1433	-1	-1	-1297283178722	white	../fonts/Verdana.ttf	upper
1297283182579	1	FR_PRES	0	MONSTER	912	-1	-1	-1297283182579	white	../fonts/Verdana.ttf	upper
1297283186669	1	FR_PRES	0	SWAMP	1416	-1	-1	-1297283186669	white	../fonts/Verdana.ttf	upper
1297283190809	1	FR_PRES	0	KNIFE	785	-1	-1	-1297283190809	white	../fonts/Verdana.ttf	upper
1297283194741	1	FR_PRES	0	SUPPER	1411	-1	-1	-1297283194741	white	../fonts/Verdana.ttf	upper
1297283198823	1	FR_PRES	0	HELICOPTER	701	-1	-1	-1297283198823	white	../fonts/Verdana.ttf	upper
1297283202655	1	FR_PRES	0	OBOE	969	-1	-1	-1297283202655	white	../fonts/Verdana.ttf	upper
1297283208472	1	REC_START
1297283292932	0	REST
1297283294440	1	FR_PRES	1	RECIPE	1175	-1	-1	-1297283294440	white	../fonts/Verdana.ttf	upper
1297283298289	1	FR_PRES	1	KNOT	788	-1	-1	-1297283298289	white	../fonts/Verdana.ttf	upper
1297283302503	1	FR_PRES	1	GRADUATE	653	-1	-1	-1297283302503	white	../fonts/Verdana.ttf	upper
1297283306685	1	FR_PRES	1	PEACH	1031	-1	-1	-1297283306685	white	../fonts/Verdana.ttf	upper
1297283310567	1	FR_PRES	1	SUPERVISOR	1410	-1	-1	-1297283310567	white	../fonts/Verdana.ttf	upper
1297283314715	1	FR_PRES	1	BUCKET	184	-1	-1	-1297283314715	white	../fonts/Verdana.ttf	upper
1297283318831	1	FR_PRES	1	PRUNE	1134	-1	-1	-1297283318831	white	../fonts/Verdana.ttf	upper
1297283322954	1	FR_PRES	1	OWNER	993	-1	-1	-1297283322954	white	../fonts/Verdana.ttf	upper
1297283327020	1	FR_PRES	1	BOSS	152	-1	-1	-1297283327020	white	../fonts/Verdana.ttf	upper
1297283331152	1	FR_PRES	1	CITRUS	313	-1	-1	-1297283331152	white	../fonts/Verdana.ttf	upper
1297283335118	1	FR_PRES	1	PEAR	1033	-1	-1	-1297283335118	white	../fonts/Verdana.ttf	upper
1297283339008	1	FR_PRES	1	WASHCLOTH	1591	-1	-1	-1297283339008	white	../fonts/Verdana.ttf	upper
1297283343081	1	FR_PRES	1	GOWN	652	-1	-1	-1297283343081	white	../fonts/Verdana.ttf	upper
1297283347122	1	FR_PRES	1	POTATO	1115	-1	-1	-1297283347122	white	../fonts/Verdana.ttf	upper
1297283351070	1	FR_PRES	1	REPORT	1184	-1	-1	-1297283351070	white	../fonts/Verdana.ttf	upper
1297283355244	1	FR_PRES	1	TYPEWRITER	1531	-1	-1	-1297283355244	white	../fonts/Verdana.ttf	upper
1297283360866	1	REC_START
1297283445254	0	REST
1297283446760	1	FR_PRES	2	CLOVE	329	-1	-1	-1297283446760	white	../fonts/Verdana.ttf	upper
1297283450725	1	FR_PRES	2	FRIAR	605	-1	-1	-1297283450725	white	../fonts/Verdana.ttf	upper
1297283454832	1	FR_PRES	2	CROW	414	-1	-1	-1297283454832	white	../fonts/Verdana.ttf	upper
1297283458997	1	FR_PRES	2	WINGS	1614	-1	-1	-1297283458997	white	../fonts/Verdana.ttf	upper
1297283462913	1	FR_PRES	2	MACHINE	852	-1	-1	-1297283462913	white	../fonts/Verdana.ttf	upper
1297283466786	1	FR_PRES	2	NICKEL	950	-1	-1	-1297283466786	white	../fonts/Verdana.ttf	upper
1297283470743	1	FR_PRES	2	POPE	1105	-1	-1	-1297283470743	white	../fonts/Verdana.ttf	upper
1297283474600	1	FR_PRES	2	AGENT	10	-1	-1	-1297283474600	white	../fonts/Verdana.ttf	upper
1297283478699	1	FR_PRES	2	PUPPY	1144	-1	-1	-1297283478699	white	../fonts/Verdana.ttf	upper
1297283482564	1	FR_PRES	2	DIAPER	459	-1	-1	-1297283482564	white	../fonts/Verdana.ttf	upper
1297283486464	1	FR_PRES	2	MAID	856	-1	-1	-1297283486464	white	../fonts/Verdana.ttf	upper
1297283490429	1	FR_PRES	2	DETERGENT	455	-1	-1	-1297283490429	white	../fonts/Verdana.ttf	upper
1297283494303	1	FR_PRES	2	REFEREE	1178	-1	-1	-1297283494303	white	../fonts/Verdana.ttf	upper
1297283498151	1	FR_PRES	2	CHAMBER	277	-1	-1	-1297283498151	white	../fonts/Verdana.ttf	upper
1297283502025	1	FR_PRES	2	SUSPECT	1415	-1	-1	-1297283502025	white	../fonts/Verdana.ttf	upper
1297283506207	1	FR_PRES	2	PRAIRIE	1118	-1	-1	-1297283506207	white	../fonts/Verdana.ttf	upper
1297283512072	1	REC_START
1297283615335	0	REST
...
```
