# Setup

In [21]:
# import packages
import numpy as np
import os
import pandas as pd
import scipy.stats as stats
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_context('notebook', font_scale=1.5)

In [22]:
## define path to input datasets (tidy format)
path_to_recog = '/home/jefan/neurosketch_compmem/neurosketch_voxelmat_freesurfer_recog'
path_to_draw = '/home/jefan/neurosketch_compmem/neurosketch_voxelmat_freesurfer_drawing'
path_to_recog = '/home/jgunn/neurosketch/recmatrices'
path_to_draw = '/home/jgunn/neurosketch/drawmatrices' 
roi_list = np.array(["V1", "V2", "LOC", "IT", "fusiform", "parahippo",  "PRC",  "ento", "hipp", 'mOFC', 'IFG', 'rostMFG', 'caudMFG', 'precentral', 'SMG', 'STG'])
roi_formatted = np.array(["V1", "V2", "LOC", "IT", "fusiform", "para\nhippo",  "PRC",  "ento", "hipp", 'mOFC', 'IFG', 'rost\nMFG', 'caud\nMFG', 'pre\ncentral', 'SMG', 'STG'])
roi_list = np.array(["V1", "V2", "LOC", "IT", "fusiform", "parahippo",  "PRC",  "ento", "hipp", 'mOFC'])

## get raw file list for recognition runs
RECOG_METAS = sorted([i for i in os.listdir(path_to_recog) if i.split('.')[-1]=='csv'])
RECOG_FEATS = sorted([i for i in os.listdir(path_to_recog) if i.split('.')[-1]=='npy'])
RECOG_SUBS = np.array([i.split('_')[0] for i in RECOG_FEATS])

recog_sub_list = np.unique(RECOG_SUBS)

def preprocess_recog(RECOG_METAS, RECOG_FEATS):
    M = [i for i in RECOG_METAS if len(i.split('.')[0].split('_'))==4]
    F = [i for i in RECOG_FEATS if len(i.split('.')[0].split('_'))==4]
    return M,F

RECOG_METAS, RECOG_FEATS = preprocess_recog(RECOG_METAS, RECOG_FEATS)

In [23]:
## get raw file list for drawing runs
DRAW_METAS = sorted([i for i in os.listdir(path_to_draw) if i.split('.')[-1]=='csv'])
DRAW_FEATS = sorted([i for i in os.listdir(path_to_draw) if i.split('.')[-1]=='npy'])
DRAW_SUBS = np.array([i.split('_')[0] for i in DRAW_FEATS])
draw_sub_list = np.unique(DRAW_SUBS)

In [24]:
## get subject ID's that have complete datasets from all phases of experiment
sub_list = np.intersect1d(recog_sub_list,draw_sub_list)
#sub_list = [s for s in sub_list if s != '1207162']
print('Number of subs: {}'.format(len(sub_list)))

Number of subs: 31


In [25]:
## filter file list so only contains the sessions that have full datasets
def extract_good_sessions(DRAW_METAS,DRAW_FEATS,RECOG_METAS,RECOG_FEATS):
    _DRAW_METAS = [i for i in DRAW_METAS if i.split('_')[1] in sub_list]
    _DRAW_FEATS = [i for i in DRAW_FEATS if i.split('_')[0] in sub_list]
    _RECOG_METAS = [i for i in RECOG_METAS if i.split('_')[1] in sub_list]
    _RECOG_FEATS = [i for i in RECOG_FEATS if i.split('_')[0] in sub_list]
    return _DRAW_METAS, _DRAW_FEATS, _RECOG_METAS, _RECOG_FEATS

DRAW_METAS,DRAW_FEATS,RECOG_METAS,RECOG_FEATS =  \
extract_good_sessions(DRAW_METAS,DRAW_FEATS,RECOG_METAS,RECOG_FEATS)

RECOG_SUBS = np.array([i.split('_')[0]+'_neurosketch' for i in RECOG_FEATS])
RECOG_ROIS = np.array([i.split('_')[1] for i in RECOG_FEATS])

DRAW_SUBS = np.array([i.split('_')[0]+'_neurosketch' for i in DRAW_FEATS])
DRAW_ROIS = np.array([i.split('_')[1] for i in DRAW_FEATS])

In [26]:
#### Helper data loader functions
def cleanup_df(df):    
    surplus = [i for i in df.columns if 'Unnamed' in i]
    df = df.drop(surplus,axis=1)
    return df

def flatten(x):
    return [item for sublist in x for item in sublist] 

def get_prob_timecourse(iv,DM,version='4way'):
    trained_objs = np.unique(DM.label.values)
    control_objs = [i for i in ['bed','bench','chair','table'] if i not in trained_objs]    
    
    if version=='4way':
        t1 = trained_objs[0]
        t2 = trained_objs[1]
        c1 = control_objs[0]
        c2 = control_objs[1]
        target = np.vstack((DM[DM.label==t1].groupby(iv)['t1_prob'].mean().values,
                       DM[DM.label==t2].groupby(iv)['t2_prob'].mean().values)).mean(0) ## target timecourse
        foil = np.vstack((DM[DM.label==t1].groupby(iv)['t2_prob'].mean().values,
                       DM[DM.label==t2].groupby(iv)['t1_prob'].mean().values)).mean(0) ## foil timecourse
        control = np.vstack((DM[DM.label==t1].groupby(iv)['c1_prob'].mean().values,
                            DM[DM.label==t1].groupby(iv)['c2_prob'].mean().values,
                            DM[DM.label==t2].groupby(iv)['c1_prob'].mean().values,
                            DM[DM.label==t2].groupby(iv)['c2_prob'].mean().values)).mean(0) ## control timecourse    
    elif version=='3way':
        t1 = trained_objs[0]
        t2 = trained_objs[1]
        target = np.vstack((DM[DM.label==t1].groupby(iv)['t1_prob'].mean().values,
                       DM[DM.label==t2].groupby(iv)['t2_prob'].mean().values)).mean(0) ## target timecourse; mean is taken over what?
        foil = np.vstack((DM[DM.label==t1].groupby(iv)['t2_prob'].mean().values,
                       DM[DM.label==t2].groupby(iv)['t1_prob'].mean().values)).mean(0) ## foil timecourse
        control = np.vstack((DM[DM.label==t1].groupby(iv)['c_prob'].mean().values,
                            DM[DM.label==t2].groupby(iv)['c_prob'].mean().values)).mean(0) ## control timecourse

    elif version=='2way':
        t1 = trained_objs[0]
        t2 = trained_objs[1]
        target = np.vstack((DM[DM.label==t1].groupby(iv)['t1_prob'].mean().values,
                       DM[DM.label==t2].groupby(iv)['t2_prob'].mean().values)).mean(0) ## target timecourse; mean is taken over what?
        foil = np.vstack((DM[DM.label==t1].groupby(iv)['t2_prob'].mean().values,
                       DM[DM.label==t2].groupby(iv)['t1_prob'].mean().values)).mean(0) ## foil timecourse
        
        control = np.zeros(len(foil)) 
        
    return target, foil, control

In [27]:
version = '4way'
tag = 'logged'
#ALLDM = pd.read_csv('./logistic_timeseries_drawing_neural_{}_{}.csv'.format(version,tag))
ALLDM = pd.read_csv('./formatted_partial_sketch.csv')
ALLDM = cleanup_df(ALLDM)
len(ALLDM)

32200

In [28]:
ALLDM.head()

Unnamed: 0,trial_num,subj,label,time_point,bed_prob,bench_prob,chair_prob,table_prob,t1_prob,t2_prob,c1_prob,c2_prob,run_num
0,0,119174,bench,1,0.026822,0.055895,0.916243,0.00104,-2.884274,-0.087474,-3.618527,-6.868781,1
1,0,119174,bench,2,0.934225,0.026225,0.039085,0.000465,-3.641038,-3.24201,-0.068038,-7.673234,1
2,0,119174,bench,3,0.006035,0.97077,0.002714,0.020481,-0.029666,-5.90943,-5.110116,-3.888245,1
3,0,119174,bench,4,0.009842,0.978927,0.003784,0.007447,-0.021298,-5.577057,-4.621096,-4.899969,1
4,0,119174,bench,5,0.019298,0.967674,0.001463,0.011564,-0.03286,-6.527103,-3.947737,-4.459819,1


# Implementation

Here's the plan. For each ROI...
1. Generate a subject-by-trial_num matrix where each cell is either `t`, `f`, or `t-f` from the output of `analysis_helpers.get_prob_timecourse` for the associated trial and subject (and roi) pairing. 
2. Generate a subject_num length vector consisting of each subject's pre-post change measure in the same order as they are in the matrix.
3. The vector defined by taking the `stats.pearsonr()` between each column of the subject-by-trial_num matrix and the prepost change vector is the time course we're looking to understand for this ROI.

In [29]:
# Here's the plan. For each ROI...
# 1. Generate a subject-by-trial_num matrix where each cell is either `t`, `f`, or `t-f` from the output of `analysis_helpers.get_prob_timecourse` for the associated trial and subject (and roi) pairing. 
# 2. Generate a subject_num length vector consisting of each subject's pre-post change measure in the same order as they are in the matrix.
# 3. The vector defined by taking the `stats.pearsonr()` between each column of the subject-by-trial_num matrix and the prepost change vector is the time course we're looking to understand for this ROI.

# some subjects missing in prepost
prepost = pd.read_csv('neural_changes_by_surfroi_and_subject.csv')
prepost['IDs'] = [int(each[:each.find('_')]) for each in prepost['IDs'].values]
missing_subj = np.setdiff1d(np.unique(ALLDM['subj']), prepost['IDs'])
mask = ALLDM['subj'].isin(missing_subj)
ALLDM = ALLDM[~mask]
ALLDM.sort_values('subj')
prepost.sort_values('IDs')
subs = np.unique(ALLDM.subj.values)

variants = ['t', 'f', 't-f', 'c']
ivs = ['run_num','trial_num']#,'time_point']
#this_iv = 'run_num'

for this_iv in ivs:
    for variant in variants:
        for this_roi in roi_list:

            # 1. Generate a subject-by-trial_num matrix where each cell is either `t`, `f`, or `t-f` from the 
            # output of `analysis_helpers.get_prob_timecourse` for the associated trial and subject (and roi) pairing. 
            scores = []

            for sub in subs:
                inds = (ALLDM['subj']==sub)
                t,f,c = get_prob_timecourse(this_iv,ALLDM[inds])

                if variant == 't':
                    score = t
                elif variant == 'f':
                    score = f
                elif variant == 'c':
                    score = c
                else:
                    score = t-f

                if len(scores) == 0:
                    scores = score
                else:
                    scores = np.vstack((scores, score))

            # 2. Generate a subject_num length vector consisting of each subject's pre-post change measure 
            # in the same order as they are in the matrix.
            recog = prepost['tradiff_{}'.format(this_roi)].values-prepost['condiff_{}'.format(this_roi)].values

            # 3. The vector defined by taking the `stats.pearsonr()` between each column of the subject-by-trial_num matrix
            # and the prepost change vector is the time course we're looking to understand for this ROI.
            corcourse = [stats.pearsonr(scores[:,i],recog)[0] for i in range(np.shape(scores)[1])]
            r,p = stats.pearsonr(np.arange(np.shape(scores)[1]),corcourse)

            fig = plt.figure(figsize=(8,4))
            plt.plot(corcourse, 'ro', label='data')
            plt.axhline(y=0.0,linestyle='dashed')
            plt.ylim((-.6,.6))
            plt.ylabel('corr({}, prepost_difference)'.format(variant))
            plt.xlabel(this_iv)
            plt.title('ROI: {}  r={}  p={}'.format(this_roi,np.round(r,5),np.round(p,5)))

            if not os.path.exists('./plots/roi/sketchdiffcourse'):
                os.makedirs('./plots/roi/sketchdiffcourse')
            plt.tight_layout()
            plt.savefig('./plots/roi/sketchdiffcourse/sketchdiffcourse_{}_{}_{}.pdf'.format(this_roi,this_iv,variant))
            plt.close(fig)

Let's try a variant that plots t and f correlations with prepost separately/on the same graph.

In [30]:
numTrials = 20

# some subjects missing in prepost
prepost = pd.read_csv('neural_changes_by_surfroi_and_subject.csv')
prepost['IDs'] = [int(each[:each.find('_')]) for each in prepost['IDs'].values]
missing_subj = np.setdiff1d(np.unique(ALLDM['subj']), prepost['IDs'])
mask = ALLDM['subj'].isin(missing_subj)
ALLDM = ALLDM[~mask]
ALLDM.sort_values('subj')
prepost.sort_values('IDs')
subs = np.unique(ALLDM.subj.values)

ivs = ['run_num','trial_num']#,'time_point']
#this_iv = 'run_num'

for this_iv in ivs:
    for this_roi in roi_list:

        # 1. Generate a subject-by-trial_num matrix where each cell is either `t`, `f`, or `t-f` from the 
        # output of `analysis_helpers.get_prob_timecourse` for the associated trial and subject (and roi) pairing. 
        T = []
        F = []
        C = []

        for sub in subs:
            inds = (ALLDM['subj']==sub)
            t,f,c = get_prob_timecourse(this_iv,ALLDM[inds]) 
            num_ivs = len(t)

            if len(T) == 0:
                T = t
                F = f
                C = c
                DTF = t-f
            else:
                T = np.vstack((T,t))
                F = np.vstack((F,f))
                C = np.vstack((C,c))
                DTF = np.vstack((DTF,t-f))

        # 2. Generate a subject_num length vector consisting of each subject's pre-post change measure 
        # in the same order as they are in the matrix.
        recog = prepost['tradiff_{}'.format(this_roi)].values-prepost['condiff_{}'.format(this_roi)].values

        # 3. The vector defined by taking the `stats.pearsonr()` between each column of the subject-by-trial_num matrix
        # and the prepost change vector is the time course we're looking to understand for this ROI.

        fig = plt.figure(figsize=(8,4))

        corcourse = [stats.pearsonr(T[:,i],recog)[0] for i in range(num_ivs)]
        meanT = np.mean(np.array(corcourse))
        r,p = stats.pearsonr(np.arange(num_ivs),corcourse)
        plt.plot(corcourse, 'ro', label='target')
        plt.plot(np.arange(num_ivs), np.poly1d(np.polyfit(np.arange(num_ivs), corcourse, 1))(np.unique(np.arange(num_ivs))), 'r')

        corcourse = [stats.pearsonr(F[:,i],recog)[0] for i in range(num_ivs)]
        meanF = np.mean(np.array(corcourse))
        r,p = stats.pearsonr(np.arange(num_ivs),corcourse)
        plt.plot(corcourse, 'cv', label='competitor')
        plt.plot(np.arange(num_ivs), np.poly1d(np.polyfit(np.arange(num_ivs), corcourse, 1))(np.unique(np.arange(num_ivs))), 'c')

        plt.ylabel('corr({}, prepost_difference)'.format(variant))
        plt.xlabel('trial_num')
        plt.title('ROI: {}  mean(t): {}  mean(f): {}'.format(this_roi, meanT, meanF))
        plt.legend()

        if not os.path.exists('./plots/roi/sketchdiffcourse'):
            os.makedirs('./plots/roi/sketchdiffcourse')
        plt.tight_layout()
        plt.savefig('./plots/roi/sketchdiffcourse/sketchdiffcourse_{}_{}.pdf'.format(this_roi, this_iv))
        plt.close(fig)

Let's try redo-ing the original draw_contrast vs prepost differentiation analysis focusing the target and competitor scores.

In [31]:
sub_t = []
sub_f = []
sub_c = []
sub_tf = []
lookup = dict(zip(['trial_num','run_num','time_point'],['repetition','run','TR']))
ivs = ['time_point'] ## other options 'run_num','trial_num',

# some subjects missing in prepost
prepost = pd.read_csv('neural_changes_by_surfroi_and_subject.csv')
prepost['IDs'] = [int(each[:each.find('_')]) for each in prepost['IDs'].values]
missing_subj = np.setdiff1d(np.unique(ALLDM['subj']), prepost['IDs'])
mask = ALLDM['subj'].isin(missing_subj)
ALLDM = ALLDM[~mask]
ALLDM.sort_values('subj')
prepost.sort_values('IDs')
subs = np.unique(ALLDM.subj.values)

## do you want to render the CONDITION-wise plots -- trained vs. foil vs control
## or the DIFFERENCE plots -- trained - foil vs foil - control?
render_cond = 0

for this_iv in ivs:
    T = []
    F = []
    C = []
    Sub = []
    for sub in subs:
        inds =(ALLDM['subj']==sub) 
        t,f,c = get_prob_timecourse(this_iv,ALLDM[inds],version=version)
#             t,f,c = get_prob_timecourse_alt(this_iv,ALLDM[inds])                    
        if len(T)==0:
            T = t
            F = f
            C = c
            DTF = t-f               
        else:
            T = np.hstack((T,t))
            F = np.hstack((F,f))        
            C = np.hstack((C,c)) 
            DTF = np.hstack((DTF,t-f))                
        Sub.append([sub]*len(t))   

    ## make longform version of dataframe to use in tsplot (difference btw conditions)                    
    Trial = np.tile(np.arange(len(t)),len(subs)*4)
    Condition = np.repeat(['target','competitor','control', 'draw_contrast'],len(T))
    Sub = np.tile(np.array(flatten(Sub)),4)
    Prob = np.hstack((T,F,C,DTF))        
    assert len(Trial)==len(Condition)
    assert len(Sub)==len(Prob)
    assert len(Condition)==len(Sub)
    x = pd.DataFrame([Prob,Trial,Condition,Sub])
    x = x.transpose()
    x.columns = ['probability',lookup[this_iv],'condition','sub']

    for this_sub in subs:
        sub_t.append(x[(x['condition']=='target') & (x['sub']==this_sub)]['probability'].mean())
        sub_f.append(x[(x['condition']=='competitor') & (x['sub']==this_sub)]['probability'].mean())  
        sub_c.append(x[(x['condition']=='control') & (x['sub']==this_sub)]['probability'].mean())
        sub_tf.append(x[(x['condition']=='draw_contrast') & (x['sub']==this_sub)]['probability'].mean())
            
## make dataframe with subject-level difference scores
d = pd.DataFrame([sub_t,sub_f,sub_c,sub_tf])
d = d.transpose()
d.columns = ['target','competitor','control','draw_contrast']
d = d.astype({'target':'float64','competitor':'float64','control':'float64','draw_contrast':'float64'})

In [32]:
## make dataframe to relate drawing contrast to recognition differentiation
roi_list = ['V1', 'V2', 'LOC', 'IT', 'fusiform', 'parahippo', 'PRC', 'ento','hipp', 'mOFC']
roi_list = ['V1', 'V2', 'LOC', 'IT', 'fusiform', 'parahippo', 
            'PRC', 'ento', 'hipp', 'mOFC', 'IFG', 'rostMFG', 
            'caudMFG', 'precentral', 'SMG', 'STG']

for spec in ['target','competitor','control','draw_contrast']:
    for this_roi in roi_list:
    #     draw = d[d['roi']==this_roi]['target-control'].values - d[d['roi']==this_roi]['foil-control'].values
        draw = d[spec].values
        recog = prepost['tradiff_{}'.format(this_roi)].values-prepost['condiff_{}'.format(this_roi)].values
        #recog = prepost['trained_{}'.format(this_roi)].values-prepost['control_{}'.format(this_roi)].values
        
        z = pd.DataFrame([draw,recog])
        z = z.transpose()
        z.columns=['draw','recog']

        ## plot 
        fig = plt.figure(figsize=(6,6))
        sns.set_context('poster')
        sns.regplot(x="draw",
                    y ="recog",
                    data=z)
        r,p = stats.pearsonr(draw,recog)
        plt.title('ROI: {}  r={}  p={}'.format(this_roi,np.round(r,5),np.round(p,5)))
        plt.xlabel('drawing: {}'.format(spec))
        plt.ylabel('recog: post-pre differentiation')
        if not os.path.exists('./plots/roi/sketchdiff'):
            os.makedirs('./plots/roi/sketchdiff')
        plt.tight_layout()
        plt.savefig('./plots/roi/sketchdiff/sketchdiff_scatter_{}_{}.pdf'.format(this_roi, spec))
        plt.close(fig)
        #plt.show()