In [47]:
import pandas as pd
import numpy as np
import cmlreaders as cml
import ptsa
import matplotlib.pyplot as plt
import seaborn as sns
import sys
from copy import copy
from cmlreaders import CMLReader, get_data_index

%load_ext autoreload
%autoreload
import pybeh
from pybeh.temp_fact import temp_fact
from pybeh.crp import crp
from pybeh.make_recalls_matrix import make_recalls_matrix

from pybeh.temp_fact import temp_percentile_rank

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [40]:
# code based on Ethan method:

def get_recall_clustering(recall_cluster_values, recall_serial_pos):
    from scipy.spatial.distance import euclidean
    from scipy.stats import percentileofscore
    import itertools

    #Get temporal/semantic clustering scores. 

    #recall_cluster_values: array of semantic/temporal values
    #recall_serial_pos: array of indices for true recall sequence (indexing depends on when called), e.g. [1, 12, 3, 5, 9, 6]

    recall_cluster_values = copy(np.array(recall_cluster_values).astype(float))
    all_pcts = []
    all_possible_trans = list(itertools.combinations(range(len(recall_cluster_values)), 2))
    for ridx in np.arange(len(recall_serial_pos)-1):  #Loops through each recall event, except last one
        possible_trans = [comb 
                          for comb in all_possible_trans 
                          if (recall_serial_pos[ridx] in comb)
                         ]
        dists = []
        for c in possible_trans: # all possible trans within list...do it this way since can avoid the true one with the except
            try:
                dists.append(euclidean(recall_cluster_values[c[0]], recall_cluster_values[c[1]]))
            except:
                #If we did this transition, then it's a NaN, so append a NaN
                dists.append(np.nan)
        dists = np.array(dists)
        dists = dists[np.isfinite(dists)]
        true_trans = euclidean(recall_cluster_values[recall_serial_pos[ridx]], recall_cluster_values[recall_serial_pos[ridx+1]])
        pctrank = 1.-percentileofscore(dists, true_trans, kind='mean')/100. #kind='strict')/100. # use strict so lag of 1 yields 1.00 percentile
#         import ipdb; ipdb.set_trace()
        all_pcts.append(pctrank) # percentile rank within each list

        recall_cluster_values[recall_serial_pos[ridx]] = np.nan # used serialpos get a NaN so won't be included in %ile

    return all_pcts

# code from David:

# def get_itemno_matrices(df, itemno_values='itemno', list_index=['subject', 'session', 'list'], pres_columns='serialpos'):
def get_itemno_matrices(df, itemno_values='item_num', list_index=['subject', 'session', 'list'], pres_columns='serialpos'):
    """Expects as input a dataframe (df) for one subject"""
    df.loc[:, itemno_values] = df.loc[:, itemno_values].astype(int)
    df.loc[:, pres_columns] = df.loc[:, pres_columns].astype(int)
    word_evs = df.query('type == "WORD"')
    rec_evs = df.query('type == "REC_WORD"')
    rec_evs.loc[:, 'outpos'] = rec_evs.groupby(list_index).cumcount() 
    pres_itemnos_df = pd.pivot_table(word_evs, values=itemno_values, 
                                 index=list_index, 
                                 columns=pres_columns).reset_index()
    rec_itemnos_df = pd.pivot_table(rec_evs, values=itemno_values, 
                                 index=list_index, 
                                 columns='outpos', fill_value=0).reset_index()
    n_index_cols = len(list_index)
    pres_itemnos = pres_itemnos_df.iloc[:, (n_index_cols):].values
    rec_itemnos = rec_itemnos_df.iloc[:, (n_index_cols):].values
    return pres_itemnos, rec_itemnos, pres_itemnos_df, rec_itemnos_df

def pd_crp(df, lag_num=5):
    """Expects as input a dataframe (df) for one subject"""
    pres_itemnos, rec_itemnos, _, _ = get_itemno_matrices(df)
    recalls = pybeh.make_recalls_matrix.make_recalls_matrix(pres_itemnos, rec_itemnos)

    prob = pybeh.crp.crp(recalls=recalls, 
                  subjects=np.array(['_'] * recalls.shape[0]),
                  listLength=pres_itemnos.shape[1],
                  lag_num=lag_num)
    crp_dict = {'prob': prob[0], 
                'lag': np.arange(-lag_num, (lag_num+1))}
    return pd.DataFrame(crp_dict)

def pd_temp_fact(df, skip_first_n=0):
    """Expects as input a dataframe (df) for one subject"""
    pres_itemnos, rec_itemnos, _, _ = get_itemno_matrices(df)
    recalls = pybeh.make_recalls_matrix.make_recalls_matrix(pres_itemnos, rec_itemnos)

    temp_fact = pybeh.temp_fact.temp_fact(recalls=recalls, 
                  subjects=np.array(['a'] * recalls.shape[0]),
                  listLength=pres_itemnos.shape[1],
                  skip_first_n=skip_first_n)
    return temp_fact[0]

In [41]:
subs = ['R1299T']

df = get_data_index("r1") # all RAM subjects
exp = 'FR1'
sub_df = df[(df.subject.isin(subs))  & (df.experiment == exp)] # all sessions for subs
sub_df = sub_df[1:2]
sub_df

all_events = []
for _, df_sess in sub_df.iterrows():
    sess_events = cml.CMLReader(subject=df_sess['subject'], experiment=exp, session=df_sess['session'],
                                localization=df_sess['localization'], montage=df_sess['montage']).load('task_events')
    all_events.append(sess_events)

all_sessions_df = pd.concat(all_events)

Unnamed: 0,Recognition,all_events,contacts,experiment,import_type,localization,math_events,montage,original_experiment,original_session,pairs,ps4_events,session,subject,subject_alias,system_version,task_events
1590,,protocols/r1/subjects/R1299T/experiments/FR1/s...,protocols/r1/subjects/R1299T/localizations/0/m...,FR1,build,0,protocols/r1/subjects/R1299T/experiments/FR1/s...,0,,,protocols/r1/subjects/R1299T/localizations/0/m...,,1,R1299T,R1299T,3.1,protocols/r1/subjects/R1299T/experiments/FR1/s...


In [42]:
all_trial_df = all_sessions_df.query('type == ["WORD", "REC_WORD"]')
# all_trial_df.loc[all_trial_df['itemno'].isnull(), 'itemno'] = -1 # pyFR columns I suppose
all_trial_df.loc[all_trial_df['item_num'].isnull(), 'item_num'] = -1
# all_trial_df.drop_duplicates(subset=['subject', 'session', 'list', 'itemno', 'type', 'rectime', 'eegoffset'], inplace=True)

#check if both presentations and recalls are present for each list by checking if both types exist
both_pres_rec_df = all_trial_df[['subject', 'session', 'list', 'type']].drop_duplicates().groupby(
    ['subject', 'session', 'list']).count().reset_index().query('type < 2')
#only include lists if both presentations and recalls are present
all_trial_df = all_trial_df.merge(both_pres_rec_df[['subject', 'session', 'list']], how='outer', indicator=True)
all_trial_df.query('_merge == "left_only"', inplace=True)
all_trial_df

Unnamed: 0,eegoffset,eegfile,exp_version,experiment,intrusion,is_stim,item_name,item_num,list,montage,...,recognized,rectime,rejected,serialpos,session,stim_list,stim_params,subject,type,_merge
0,75888,R1299T_FR1_1_27Apr17_2015,1.0.0,FR1,-999,False,ATTIC,-1,-1,0,...,-999,-999,-999,1,1,False,[],R1299T,WORD,left_only
1,78663,R1299T_FR1_1_27Apr17_2015,1.0.0,FR1,-999,False,BEAM,-1,-1,0,...,-999,-999,-999,2,1,False,[],R1299T,WORD,left_only
2,81333,R1299T_FR1_1_27Apr17_2015,1.0.0,FR1,-999,False,CAMEL,-1,-1,0,...,-999,-999,-999,3,1,False,[],R1299T,WORD,left_only
3,84014,R1299T_FR1_1_27Apr17_2015,1.0.0,FR1,-999,False,CHEST,-1,-1,0,...,-999,-999,-999,4,1,False,[],R1299T,WORD,left_only
4,86654,R1299T_FR1_1_27Apr17_2015,1.0.0,FR1,-999,False,COTTON,-1,-1,0,...,-999,-999,-999,5,1,False,[],R1299T,WORD,left_only
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
385,2697399,R1299T_FR1_1_27Apr17_2015,1.0.0,FR1,-999,False,STOOL,253,25,0,...,-999,-999,-999,9,1,False,[],R1299T,WORD,left_only
386,2699941,R1299T_FR1_1_27Apr17_2015,1.0.0,FR1,-999,False,BATH,13,25,0,...,-999,-999,-999,10,1,False,[],R1299T,WORD,left_only
387,2702489,R1299T_FR1_1_27Apr17_2015,1.0.0,FR1,-999,False,CHEEK,51,25,0,...,-999,-999,-999,11,1,False,[],R1299T,WORD,left_only
388,2704915,R1299T_FR1_1_27Apr17_2015,1.0.0,FR1,-999,False,DART,71,25,0,...,-999,2033,-999,12,1,False,[],R1299T,WORD,left_only


In [43]:
list_num = 5
list_df = all_trial_df[all_trial_df.list==list_num] # >-1
list_df

Unnamed: 0,eegoffset,eegfile,exp_version,experiment,intrusion,is_stim,item_name,item_num,list,montage,...,recognized,rectime,rejected,serialpos,session,stim_list,stim_params,subject,type,_merge
72,638514,R1299T_FR1_1_27Apr17_2015,1.0.0,FR1,-999,False,DOCK,76,5,0,...,-999,1792,-999,1,1,False,[],R1299T,WORD,left_only
73,640987,R1299T_FR1_1_27Apr17_2015,1.0.0,FR1,-999,False,CLOWN,59,5,0,...,-999,-999,-999,2,1,False,[],R1299T,WORD,left_only
74,643555,R1299T_FR1_1_27Apr17_2015,1.0.0,FR1,-999,False,PEAR,177,5,0,...,-999,-999,-999,3,1,False,[],R1299T,WORD,left_only
75,646050,R1299T_FR1_1_27Apr17_2015,1.0.0,FR1,-999,False,TRAY,280,5,0,...,-999,-999,-999,4,1,False,[],R1299T,WORD,left_only
76,648482,R1299T_FR1_1_27Apr17_2015,1.0.0,FR1,-999,False,HORSE,128,5,0,...,-999,-999,-999,5,1,False,[],R1299T,WORD,left_only
77,650995,R1299T_FR1_1_27Apr17_2015,1.0.0,FR1,-999,False,THUMB,270,5,0,...,-999,-999,-999,6,1,False,[],R1299T,WORD,left_only
78,653402,R1299T_FR1_1_27Apr17_2015,1.0.0,FR1,-999,False,BEAR,17,5,0,...,-999,-999,-999,7,1,False,[],R1299T,WORD,left_only
79,655834,R1299T_FR1_1_27Apr17_2015,1.0.0,FR1,-999,False,TEETH,267,5,0,...,-999,8417,-999,8,1,False,[],R1299T,WORD,left_only
80,658491,R1299T_FR1_1_27Apr17_2015,1.0.0,FR1,-999,False,DEER,72,5,0,...,-999,12507,-999,9,1,False,[],R1299T,WORD,left_only
81,661215,R1299T_FR1_1_27Apr17_2015,1.0.0,FR1,-999,False,BEE,19,5,0,...,-999,5043,-999,10,1,False,[],R1299T,WORD,left_only


## ETHAN METHOD

In [44]:
num_words = len(list_df[list_df.type=='WORD'])
serial_positions = list_df[list_df.type=='REC_WORD'].serialpos.values-1 # minus 1 since ethan goes from 0-index

cluster_values = get_recall_clustering(np.arange(num_words),serial_positions)
cluster_values
np.mean(cluster_values)

[0.2272727272727273, 0.7, 0.8888888888888888, 0.875]

0.672790404040404

## PYBEH method

In [48]:
list_df[list_df.type=='REC_WORD'].serialpos.values
list_df.groupby(['list']).apply(pd_temp_fact).reset_index()

array([ 1, 10,  8,  9, 11])

> [0;32m/home1/john/anaconda3/envs/env1/lib/python3.7/site-packages/pybeh/temp_fact.py[0m(101)[0;36mtemp_percentile_rank[0;34m()[0m
[0;32m    100 [0;31m    [0;32mimport[0m [0mipdb[0m[0;34m;[0m [0mipdb[0m[0;34m.[0m[0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m--> 101 [0;31m    [0;32mif[0m [0mlen[0m[0;34m([0m[0mmatches[0m[0;34m)[0m [0;34m>[0m [0;36m0[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    102 [0;31m        [0;31m# Get the number of possible transitions that were more distant than the actual transition[0m[0;34m[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  l


[1;32m     96 [0m    [0mpossible[0m [0;34m=[0m [0msorted[0m[0;34m([0m[0mpossible[0m[0;34m)[0m[0;34m[[0m[0;34m:[0m[0;34m:[0m[0;34m-[0m[0;36m1[0m[0;34m][0m[0;34m[0m[0;34m[0m[0m
[1;32m     97 [0m[0;34m[0m[0m
[1;32m     98 [0m    [0;31m# Get indices of the one or more possible transitions with the same distance as the actual transition[0m[0;34m[0m[0;34m[0m[0;34m[0m[0m
[1;32m     99 [0m    [0mmatches[0m [0;34m=[0m [0mnp[0m[0;34m.[0m[0mwhere[0m[0;34m([0m[0mpossible[0m [0;34m==[0m [0mactual[0m[0;34m)[0m[0;34m[[0m[0;36m0[0m[0;34m][0m[0;34m[0m[0;34m[0m[0m
[1;32m    100 [0m    [0;32mimport[0m [0mipdb[0m[0;34m;[0m [0mipdb[0m[0;34m.[0m[0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;32m--> 101 [0;31m    [0;32mif[0m [0mlen[0m[0;34m([0m[0mmatches[0m[0;34m)[0m [0;34m>[0m [0;36m0[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[1;32m    102 [0m        [0;31m# Get the number

ipdb>  possible


[11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1]


ipdb>  actual


9


ipdb>  type(actual)


<class 'numpy.int64'>


ipdb>  type(possible)


<class 'list'>


ipdb>  matches


array([2])


ipdb>  l


[1;32m    107 [0m    [0;32melse[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[1;32m    108 [0m        [0mptile_rank[0m [0;34m=[0m [0;32mNone[0m[0;34m[0m[0;34m[0m[0m
[1;32m    109 [0m[0;34m[0m[0m
[1;32m    110 [0m    [0;32mreturn[0m [0mptile_rank[0m[0;34m[0m[0;34m[0m[0m



ipdb>  b 110


Breakpoint 1 at /home1/john/anaconda3/envs/env1/lib/python3.7/site-packages/pybeh/temp_fact.py:110


ipdb>  c


> [0;32m/home1/john/anaconda3/envs/env1/lib/python3.7/site-packages/pybeh/temp_fact.py[0m(110)[0;36mtemp_percentile_rank[0;34m()[0m
[0;32m    108 [0;31m        [0mptile_rank[0m [0;34m=[0m [0;32mNone[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    109 [0;31m[0;34m[0m[0m
[0m[1;31m1[0;32m-> 110 [0;31m    [0;32mreturn[0m [0mptile_rank[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  rank


2.0


ipdb>  possible


[11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1]


ipdb>  c


0.2
> [0;32m/home1/john/anaconda3/envs/env1/lib/python3.7/site-packages/pybeh/temp_fact.py[0m(101)[0;36mtemp_percentile_rank[0;34m()[0m
[0;32m    100 [0;31m    [0;32mimport[0m [0mipdb[0m[0;34m;[0m [0mipdb[0m[0;34m.[0m[0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m--> 101 [0;31m    [0;32mif[0m [0mlen[0m[0;34m([0m[0mmatches[0m[0;34m)[0m [0;34m>[0m [0;36m0[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    102 [0;31m        [0;31m# Get the number of possible transitions that were more distant than the actual transition[0m[0;34m[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  matches


array([6, 7])


ipdb>  possible


[8, 7, 6, 5, 4, 3, 2, 2, 1, 1]


ipdb>  c


0.7222222222222222
> [0;32m/home1/john/anaconda3/envs/env1/lib/python3.7/site-packages/pybeh/temp_fact.py[0m(101)[0;36mtemp_percentile_rank[0;34m()[0m
[0;32m    100 [0;31m    [0;32mimport[0m [0mipdb[0m[0;34m;[0m [0mipdb[0m[0;34m.[0m[0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m--> 101 [0;31m    [0;32mif[0m [0mlen[0m[0;34m([0m[0mmatches[0m[0;34m)[0m [0;34m>[0m [0;36m0[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    102 [0;31m        [0;31m# Get the number of possible transitions that were more distant than the actual transition[0m[0;34m[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  possible


[6, 5, 4, 4, 3, 3, 2, 1, 1]


ipdb>  c


0.9375
> [0;32m/home1/john/anaconda3/envs/env1/lib/python3.7/site-packages/pybeh/temp_fact.py[0m(101)[0;36mtemp_percentile_rank[0;34m()[0m
[0;32m    100 [0;31m    [0;32mimport[0m [0mipdb[0m[0;34m;[0m [0mipdb[0m[0;34m.[0m[0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m--> 101 [0;31m    [0;32mif[0m [0mlen[0m[0;34m([0m[0mmatches[0m[0;34m)[0m [0;34m>[0m [0;36m0[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    102 [0;31m        [0;31m# Get the number of possible transitions that were more distant than the actual transition[0m[0;34m[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  possible


[7, 6, 5, 4, 3, 3, 2, 2]


ipdb>  c


0.9285714285714286


Unnamed: 0,list,0
0,5,0.697073


In [8]:
## Explanation of difference ##

Ethan way (get_recall_clustering): 
    
possible_serialpos = [ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11.] # just length of word list
actual_trans = [0, 9,  7,  8, 10] # order patient said
1) 
poss_dist = [ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.]
actual_dist = dist=9 so 9th of 11th = but 'strict' to allow 1.0 %ile so 1-(8 better/11th) = 27.3th percentile


pybeh way (temp_percentile_rank):
    
possibles_range = range(1,13) 
recalls = [ 1, 10,  8,  9, 11]]
1) 
possibles = [ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11] # distances from selected
actual = dist=9 so 8th of 11th but report those more distant so 2 of remaining 10 = 20th percentile
2) 
possibles = [8, 7, 6, 5, 4, 3, 2, 1, 1, 2]
actual = dist=2 so 2 matches with mean 6.5th of 10 but report those more distant of remaining 9 = 6.5/9 = 72.2th percentile
3) 
possibles = [6, 5, 4, 3, 2, 1, 1, 3, 4]
actual = dist=1 so 2 matches witih mean 7.5th of 9 but report those more distant of remaining 8 = 7.5/8 = 93.8th percentile
4)
possibles = [[7, 6, 5, 4, 3, 3, 2, 2]
actual = dist=2 so 6 more distant and mean two remaining so 6.5/7 remaining non-picked = 92.86th percentile

SyntaxError: invalid syntax (<ipython-input-8-18ad30968641>, line 3)