# Setup

In [47]:
# import packages
import numpy as np
import os
import pandas as pd
import scipy.stats as stats
import matplotlib.pyplot as plt
import seaborn as sns

In [51]:
## define path to input datasets (tidy format)
path_to_recog = '/home/jefan/neurosketch_compmem/neurosketch_voxelmat_freesurfer_recog'
path_to_draw = '/home/jefan/neurosketch_compmem/neurosketch_voxelmat_freesurfer_drawing'
path_to_recog = '/home/jgunn/neurosketch/recmatrices'
path_to_draw = '/home/jgunn/neurosketch/drawmatrices' 
roi_list = np.array(["V1", "V2", "LOC", "IT", "fusiform", "parahippo",  "PRC",  "ento", "hipp", 'mOFC', 'IFG', 'rostMFG', 'caudMFG', 'precentral', 'SMG', 'STG'])
roi_formatted = np.array(["V1", "V2", "LOC", "IT", "fusiform", "para\nhippo",  "PRC",  "ento", "hipp", 'mOFC', 'IFG', 'rost\nMFG', 'caud\nMFG', 'pre\ncentral', 'SMG', 'STG'])
roi_list = np.array(["V1", "V2", "LOC", "IT", "fusiform", "parahippo",  "PRC",  "ento", "hipp", 'mOFC'])

## get raw file list for recognition runs
RECOG_METAS = sorted([i for i in os.listdir(path_to_recog) if i.split('.')[-1]=='csv'])
RECOG_FEATS = sorted([i for i in os.listdir(path_to_recog) if i.split('.')[-1]=='npy'])
RECOG_SUBS = np.array([i.split('_')[0] for i in RECOG_FEATS])

recog_sub_list = np.unique(RECOG_SUBS)

def preprocess_recog(RECOG_METAS, RECOG_FEATS):
    M = [i for i in RECOG_METAS if len(i.split('.')[0].split('_'))==4]
    F = [i for i in RECOG_FEATS if len(i.split('.')[0].split('_'))==4]
    return M,F

RECOG_METAS, RECOG_FEATS = preprocess_recog(RECOG_METAS, RECOG_FEATS)

In [52]:
## get raw file list for drawing runs
DRAW_METAS = sorted([i for i in os.listdir(path_to_draw) if i.split('.')[-1]=='csv'])
DRAW_FEATS = sorted([i for i in os.listdir(path_to_draw) if i.split('.')[-1]=='npy'])
DRAW_SUBS = np.array([i.split('_')[0] for i in DRAW_FEATS])
draw_sub_list = np.unique(DRAW_SUBS)

In [53]:
## get subject ID's that have complete datasets from all phases of experiment
sub_list = np.intersect1d(recog_sub_list,draw_sub_list)
sub_list = [s for s in sub_list if s != '1207162']
print('Number of subs: {}'.format(len(sub_list)))

Number of subs: 30


In [54]:
## filter file list so only contains the sessions that have full datasets
def extract_good_sessions(DRAW_METAS,DRAW_FEATS,RECOG_METAS,RECOG_FEATS):
    _DRAW_METAS = [i for i in DRAW_METAS if i.split('_')[1] in sub_list]
    _DRAW_FEATS = [i for i in DRAW_FEATS if i.split('_')[0] in sub_list]
    _RECOG_METAS = [i for i in RECOG_METAS if i.split('_')[1] in sub_list]
    _RECOG_FEATS = [i for i in RECOG_FEATS if i.split('_')[0] in sub_list]
    return _DRAW_METAS, _DRAW_FEATS, _RECOG_METAS, _RECOG_FEATS

DRAW_METAS,DRAW_FEATS,RECOG_METAS,RECOG_FEATS =  \
extract_good_sessions(DRAW_METAS,DRAW_FEATS,RECOG_METAS,RECOG_FEATS)

RECOG_SUBS = np.array([i.split('_')[0]+'_neurosketch' for i in RECOG_FEATS])
RECOG_ROIS = np.array([i.split('_')[1] for i in RECOG_FEATS])

DRAW_SUBS = np.array([i.split('_')[0]+'_neurosketch' for i in DRAW_FEATS])
DRAW_ROIS = np.array([i.split('_')[1] for i in DRAW_FEATS])

In [55]:
#### Helper data loader functions
def cleanup_df(df):    
    surplus = [i for i in df.columns if 'Unnamed' in i]
    df = df.drop(surplus,axis=1)
    return df

def flatten(x):
    return [item for sublist in x for item in sublist] 

def get_prob_timecourse(iv,DM,version='4way'):
    trained_objs = np.unique(DM.label.values)
    control_objs = [i for i in ['bed','bench','chair','table'] if i not in trained_objs]    
    
    if version=='4way':
        t1 = trained_objs[0]
        t2 = trained_objs[1]
        c1 = control_objs[0]
        c2 = control_objs[1]
        target = np.vstack((DM[DM.label==t1].groupby(iv)['t1_prob'].mean().values,
                       DM[DM.label==t2].groupby(iv)['t2_prob'].mean().values)).mean(0) ## target timecourse
        foil = np.vstack((DM[DM.label==t1].groupby(iv)['t2_prob'].mean().values,
                       DM[DM.label==t2].groupby(iv)['t1_prob'].mean().values)).mean(0) ## foil timecourse
        control = np.vstack((DM[DM.label==t1].groupby(iv)['c1_prob'].mean().values,
                            DM[DM.label==t1].groupby(iv)['c2_prob'].mean().values,
                            DM[DM.label==t2].groupby(iv)['c1_prob'].mean().values,
                            DM[DM.label==t2].groupby(iv)['c2_prob'].mean().values)).mean(0) ## control timecourse    
    elif version=='3way':
        t1 = trained_objs[0]
        t2 = trained_objs[1]
        target = np.vstack((DM[DM.label==t1].groupby(iv)['t1_prob'].mean().values,
                       DM[DM.label==t2].groupby(iv)['t2_prob'].mean().values)).mean(0) ## target timecourse; mean is taken over what?
        foil = np.vstack((DM[DM.label==t1].groupby(iv)['t2_prob'].mean().values,
                       DM[DM.label==t2].groupby(iv)['t1_prob'].mean().values)).mean(0) ## foil timecourse
        control = np.vstack((DM[DM.label==t1].groupby(iv)['c_prob'].mean().values,
                            DM[DM.label==t2].groupby(iv)['c_prob'].mean().values)).mean(0) ## control timecourse
        
    elif version=='2way':
        t1 = trained_objs[0]
        t2 = trained_objs[1]
        target = np.vstack((DM[DM.label==t1].groupby(iv)['t1_prob'].mean().values,
                       DM[DM.label==t2].groupby(iv)['t2_prob'].mean().values)).mean(0) ## target timecourse; mean is taken over what?
        foil = np.vstack((DM[DM.label==t1].groupby(iv)['t2_prob'].mean().values,
                       DM[DM.label==t2].groupby(iv)['t1_prob'].mean().values)).mean(0) ## foil timecourse
        
        control = np.zeros(len(foil)) 
        
    return target, foil, control

In [56]:
version = '4way'
tag = 'raw'
ALLDM = pd.read_csv('./logistic_timeseries_drawing_neural_{}_{}.csv'.format(version,tag))
ALLDM = cleanup_df(ALLDM)
ALLDM

Unnamed: 0,TR_num,bed_prob,bench_prob,c1_prob,c2_prob,chair_prob,label,roi,run_num,subj,t1_prob,t2_prob,table_prob,time_point,trial_num
0,10,0.785915,0.013766,0.010464,0.189855,0.010464,bed,V1,1,110171,0.785915,0.013766,0.189855,1,0
1,11,0.943442,0.012621,0.012706,0.031231,0.012706,bed,V1,1,110171,0.943442,0.012621,0.031231,2,0
2,12,0.223747,0.118222,0.020710,0.637321,0.020710,bed,V1,1,110171,0.223747,0.118222,0.637321,3,0
3,13,0.018421,0.429356,0.000032,0.552191,0.000032,bed,V1,1,110171,0.018421,0.429356,0.552191,4,0
4,14,0.399306,0.000777,0.054215,0.545702,0.054215,bed,V1,1,110171,0.399306,0.000777,0.545702,5,0
5,15,0.024783,0.126185,0.625932,0.223099,0.625932,bed,V1,1,110171,0.024783,0.126185,0.223099,6,0
6,16,0.279504,0.174926,0.469470,0.076099,0.469470,bed,V1,1,110171,0.279504,0.174926,0.076099,7,0
7,17,0.416950,0.000396,0.235187,0.347467,0.235187,bed,V1,1,110171,0.416950,0.000396,0.347467,8,0
8,18,0.254005,0.254568,0.191382,0.300044,0.191382,bed,V1,1,110171,0.254005,0.254568,0.300044,9,0
9,19,0.062705,0.041253,0.200048,0.695994,0.200048,bed,V1,1,110171,0.062705,0.041253,0.695994,10,0


# Implementation

Here's the plan. For each ROI...
1. Generate a subject-by-trial_num matrix where each cell is either `t`, `f`, or `t-f` from the output of `analysis_helpers.get_prob_timecourse` for the associated trial and subject (and roi) pairing. 
2. Generate a subject_num length vector consisting of each subject's pre-post change measure in the same order as they are in the matrix.
3. The vector defined by taking the `stats.pearsonr()` between each column of the subject-by-trial_num matrix and the prepost change vector is the time course we're looking to understand for this ROI.

In [57]:
#Here's the plan. For each ROI...
#1. Generate a subject-by-trial_num matrix where each cell is either `t`, `f`, or `t-f` from the output of `analysis_helpers.get_prob_timecourse` for the associated trial and subject (and roi) pairing. 
#2. Generate a subject_num length vector consisting of each subject's pre-post change measure in the same order as they are in the matrix.
#3. The vector defined by taking the `stats.pearsonr()` between each column of the subject-by-trial_num matrix and the prepost change vector is the time course we're looking to understand for this ROI.

subs = np.unique(ALLDM.subj.values)
numTrials = 20
prepost = pd.read_csv('neural_changes_by_surfroi_and_subject.csv')
variants = ['t', 'f', 't-f', 'c']

for variant in variants:
    for this_roi in roi_list:

        # 1. Generate a subject-by-trial_num matrix where each cell is either `t`, `f`, or `t-f` from the 
        # output of `analysis_helpers.get_prob_timecourse` for the associated trial and subject (and roi) pairing. 
        contrasts = []

        for sub in subs:
            inds = (ALLDM['roi']==this_roi) & (ALLDM['subj']==sub)
            t,f,c = get_prob_timecourse('trial_num',ALLDM[inds])

            if variant == 't':
                contrast = t
            elif variant == 'f':
                contrast = f
            elif variant == 'c':
                contrast = c
            else:
                contrast = t-f        

            if len(contrasts) == 0:
                contrasts = contrast
            else:
                contrasts = np.vstack((contrasts, contrast))

        # 2. Generate a subject_num length vector consisting of each subject's pre-post change measure 
        # in the same order as they are in the matrix.
        recog = prepost['tradiff_{}'.format(this_roi)].values-prepost['condiff_{}'.format(this_roi)].values

        # 3. The vector defined by taking the `stats.pearsonr()` between each column of the subject-by-trial_num matrix
        # and the prepost change vector is the time course we're looking to understand for this ROI.
        corcourse = [stats.pearsonr(contrasts[:,i],recog)[0] for i in range(numTrials)]
        r,p = stats.pearsonr(np.arange(numTrials),corcourse)

        fig = plt.figure(figsize=(8,4))
        plt.plot(corcourse, 'ro', label='data')
        plt.plot(np.arange(numTrials), np.poly1d(np.polyfit(np.arange(numTrials), corcourse, 1))(np.unique(np.arange(numTrials))))
        plt.ylabel('corr({}, prepost_difference)'.format(variant))
        plt.xlabel('trial_num')
        plt.title('ROI: {}  r={}  p={}'.format(this_roi,np.round(r,5),np.round(p,5)))

        if not os.path.exists('./plots/roi/drawrecogcourse'):
            os.makedirs('./plots/roi/drawrecogcourse')
        plt.tight_layout()
        plt.savefig('./plots/roi/drawrecogcourse/draw_recog_course_{}_{}.pdf'.format(this_roi,variant))
        plt.close(fig)

Let's try a variant that plots t and f correlations with prepost separately/on the same graph.

In [58]:
#Here's the plan. For each ROI...
#1. Generate a subject-by-trial_num matrix where each cell is either `t`, `f`, or `t-f` from the output of `analysis_helpers.get_prob_timecourse` for the associated trial and subject (and roi) pairing. 
#2. Generate a subject_num length vector consisting of each subject's pre-post change measure in the same order as they are in the matrix.
#3. The vector defined by taking the `stats.pearsonr()` between each column of the subject-by-trial_num matrix and the prepost change vector is the time course we're looking to understand for this ROI.

subs = np.unique(ALLDM.subj.values)
numTrials = 20
prepost = pd.read_csv('neural_changes_by_surfroi_and_subject.csv')

for this_roi in roi_list:

    # 1. Generate a subject-by-trial_num matrix where each cell is either `t`, `f`, or `t-f` from the 
    # output of `analysis_helpers.get_prob_timecourse` for the associated trial and subject (and roi) pairing. 
    T = []
    F = []
    C = []

    for sub in subs:
        inds = (ALLDM['roi']==this_roi) & (ALLDM['subj']==sub)
        t,f,c = get_prob_timecourse('trial_num',ALLDM[inds])       

        if len(T) == 0:
            T = t
            F = f
            C = c
            DTF = t-f               
        else:
            T = np.vstack((T,t))
            F = np.vstack((F,f))        
            C = np.vstack((C,c)) 
            DTF = np.vstack((DTF,t-f))                

    # 2. Generate a subject_num length vector consisting of each subject's pre-post change measure 
    # in the same order as they are in the matrix.
    recog = prepost['tradiff_{}'.format(this_roi)].values-prepost['condiff_{}'.format(this_roi)].values

    # 3. The vector defined by taking the `stats.pearsonr()` between each column of the subject-by-trial_num matrix
    # and the prepost change vector is the time course we're looking to understand for this ROI.

    fig = plt.figure(figsize=(8,4))

    corcourse = [stats.pearsonr(T[:,i],recog)[0] for i in range(numTrials)]
    meanT = np.mean(np.array(corcourse))
    r,p = stats.pearsonr(np.arange(numTrials),corcourse)
    plt.plot(corcourse, 'ro', label='target')
    plt.plot(np.arange(numTrials), np.poly1d(np.polyfit(np.arange(numTrials), corcourse, 1))(np.unique(np.arange(numTrials))), 'r')

    corcourse = [stats.pearsonr(F[:,i],recog)[0] for i in range(numTrials)]
    meanF = np.mean(np.array(corcourse))
    r,p = stats.pearsonr(np.arange(numTrials),corcourse)
    plt.plot(corcourse, 'cv', label='competitor')
    plt.plot(np.arange(numTrials), np.poly1d(np.polyfit(np.arange(numTrials), corcourse, 1))(np.unique(np.arange(numTrials))), 'c')

    plt.ylabel('corr({}, prepost_difference)'.format(variant))
    plt.xlabel('trial_num')
    plt.title('ROI: {}  mean(t): {}  mean(f): {}'.format(this_roi, meanT, meanF))
    plt.legend()

    if not os.path.exists('./plots/roi/drawrecogcourse'):
        os.makedirs('./plots/roi/drawrecogcourse')
    plt.tight_layout()
    plt.savefig('./plots/roi/drawrecogcourse/draw_recog_course_{}.pdf'.format(this_roi))
    plt.close(fig)

Let's try redo-ing the original draw_contrast vs prepost differentiation analysis focusing the target and competitor scores.

In [59]:
sub_tf = []
sub_tc = []
sub_fc = []
roi = []
lookup = dict(zip(['trial_num','run_num','time_point'],['repetition','run','TR']))

subs = np.unique(ALLDM.subj.values)
ivs = ['time_point'] ## other options 'run_num','trial_num',

## do you want to render the CONDITION-wise plots -- trained vs. foil vs control
## or the DIFFERENCE plots -- trained - foil vs foil - control?
render_cond = 0

for this_iv in ivs:
    for this_roi in roi_list:

        T = []
        F = []
        C = []
        Sub = []
        for sub in subs:
            inds =(ALLDM['roi']==this_roi) & (ALLDM['subj']==sub) 
            t,f,c = get_prob_timecourse(this_iv,ALLDM[inds],version=version)
#             t,f,c = get_prob_timecourse_alt(this_iv,ALLDM[inds])                    
            if len(T)==0:
                T = t
                F = f
                C = c
                DTF = t-f               
                DTC = t
                DFC = f
            else:
                T = np.hstack((T,t))
                F = np.hstack((F,f))        
                C = np.hstack((C,c)) 
                DTF = np.hstack((DTF,t-f))                
                DTC = np.hstack((DTC,t))
                DFC = np.hstack((DFC,f))
            Sub.append([sub]*len(t))   
          
        ## make longform version of dataframe to use in tsplot (difference btw conditions)                    
        Trial = np.tile(np.arange(len(t)),len(subs)*3)
        Condition = np.repeat(['target-foil','target-control','foil-control'],len(T))
        Sub = np.tile(np.array(flatten(Sub)),3)
        Prob = np.hstack((DTF,DTC,DFC))        
        assert len(Trial)==len(Condition)
        assert len(Sub)==len(Prob)
        assert len(Condition)==len(Sub)
        x = pd.DataFrame([Prob,Trial,Condition,Sub])
        x = x.transpose()
        x.columns = ['probability',lookup[this_iv],'condition','sub']
        
        for this_sub in subs:
            sub_tf.append(x[(x['condition']=='target-foil') & (x['sub']==this_sub)]['probability'].mean())
            sub_tc.append(x[(x['condition']=='target-control') & (x['sub']==this_sub)]['probability'].mean())  
            sub_fc.append(x[(x['condition']=='foil-control') & (x['sub']==this_sub)]['probability'].mean()) 
            roi.append(this_roi)
            
## make dataframe with subject-level difference scores
d = pd.DataFrame([sub_tf,sub_tc,sub_fc,roi])
d = d.transpose()
d.columns = ['target-foil','target-control','foil-control','roi']
d = d.astype({'target-foil':'float64','target-control':'float64','foil-control':'float64'})

In [49]:
prepost = pd.read_csv('neural_changes_by_surfroi_and_subject.csv')

## make dataframe to relate drawing contrast to recognition differentiation
roi_list = ['V1', 'V2', 'LOC', 'IT', 'fusiform', 'parahippo', 'PRC', 'ento','hipp', 'mOFC']

for spec in ['target-control', 'foil-control']:
    for this_roi in roi_list:
    #     draw = d[d['roi']==this_roi]['target-control'].values - d[d['roi']==this_roi]['foil-control'].values
        draw = d[d['roi']==this_roi][spec].values
        recog = prepost['tradiff_{}'.format(this_roi)].values-prepost['condiff_{}'.format(this_roi)].values
    #     recog = prepost['tradiff_{}'.format(this_roi)].values

        z = pd.DataFrame([draw,recog])
        z = z.transpose()
        z.columns=['draw','recog']

        ## plot 
        fig = plt.figure(figsize=(6,6))
        sns.set_context('poster')
        sns.regplot(x="draw",
                    y ="recog",
                    data=z)
        r,p = stats.pearsonr(draw,recog)
        plt.title('ROI: {}  r={}  p={}'.format(this_roi,np.round(r,5),np.round(p,5)))
        plt.xlabel('drawing: {}'.format(spec[:spec.find('-')]))
        plt.ylabel('recog: post-pre differentiation')
        if not os.path.exists('./plots/roi/drawrecog'):
            os.makedirs('./plots/roi/drawrecog')
        plt.tight_layout()
        plt.savefig('./plots/roi/drawrecog/draw_recog_scatter_{}_{}.pdf'.format(this_roi, spec[:spec.find('-')]))
        plt.close(fig)    