In [None]:
from __future__ import division

import numpy as np
import os
from glob import glob

from PIL import Image
from copy import deepcopy

from sklearn import linear_model, datasets, neighbors
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn import svm

%matplotlib inline
from scipy.misc import imread, imresize
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns
sns.set_context('poster')
colors = sns.color_palette("cubehelix", 5)

import pandas as pd

import scipy.stats as stats
from scipy.stats import norm
import sklearn

### define project paths

In [None]:
## add helpers to python path
import sys
if '/home/jefan/neurosketch/python' not in sys.path:
    sys.path.append('/home/jefan/neurosketch/python')

## root paths    
curr_dir = os.getcwd()
data_dir = '../../data/'
proj_dir = '../..'
results_dir = '../../csv/'

## module definitions
import analysis_helpers as h
reload(h)

h.data_dir = data_dir
h.path_to_recog = data_dir + 'features/recog'
h.path_to_draw = data_dir + 'features/drawing'
h.roi_list_draw = np.array(['V1Draw', 'V2Draw', 'LOCDraw', 'InsulaDraw', 'postCentralDraw',
                            'preCentralDraw', 'ParietalDraw', 'FrontalDraw', 'smgDraw'])
h.roi_list_recog = np.array(['V1', 'V2', 'LOC', 'fusiform','parahippo','IT','ento','PRC','hipp','mOFC'])
roi_list_recog = h.roi_list_recog

### relate individual differences in overall target selectivity (log odds of target vs. foil) to prepost differentiation

In [None]:
version = '4way'
tag = 'log'
d = pd.read_csv(os.path.join(proj_dir,'csv/difference_{}probs_{}.csv'.format(tag,version)))
prepost = pd.read_csv(os.path.join(proj_dir,'csv/neural_changes_by_surfroi_and_subject.csv'))

In [None]:
## make dataframe to relate drawing contrast to recognition differentiation
roi_list = ['V1', 'V2', 'LOC', 'IT', 'fusiform', 'parahippo', 'PRC', 'ento','hipp', 'mOFC']

for this_roi in roi_list:
    draw = d[d['roi']==this_roi]['target-foil'].values
    recog = prepost['UnanchoredTrainedDiff_{}'.format(this_roi)].values-prepost['UnanchoredControlDiff_{}'.format(this_roi)].values

    z = pd.DataFrame([draw,recog])
    z = z.transpose()
    z.columns=['draw','recog']

    ## plot 
    fig = plt.figure(figsize=(6,6))
    sns.set_context('poster')
    sns.regplot(x="draw",
                y ="recog",
                data=z)
    r,p = stats.pearsonr(draw,recog)
    plt.title('ROI: {}  r={}  p={}'.format(this_roi,np.round(r,5),np.round(p,5)))
    if np.round(p,5)<0.05:
        accent = '*'
    else:
        accent = ''
    print 'ROI: {}  r={}  p={} {}'.format(this_roi,np.round(r,5),np.round(p,5),accent)
    plt.xlabel('drawing: target vs. foil contrast') 
    plt.ylabel('recog: post-pre differentiation')
    if not os.path.exists(os.path.join(proj_dir,'plots/roi/drawrecog')):
        os.makedirs(os.path.join(proj_dir,'plots/roi/drawrecog'))
    plt.tight_layout()
    plt.savefig(os.path.join(proj_dir,'plots/roi/drawrecog/draw_recog_scatter_{}.pdf'.format(this_roi)))
    plt.close(fig)

### Some observations:
* ROIs where we see positive relationship are: *V1*, *V2*. That is, individuals with GREATER target selectivity during drawing in these regions also show GREATER prepost differentiation.
* ROIs where we see negative relationship are: *fusiform*, *mOFC* (maybe PRC?) That is, individuals with LESS target selectivity during drawing in these regions also show GREATER prepost differentiation.

### When does the relationship between target selectivity and differentiation emerge in these ROIs?


In [None]:
version = '4way'
tag = 'logged'
ALLDM = pd.read_csv(os.path.join(results_dir,'logistic_timeseries_drawing_neural_{}_{}.csv'.format(version,tag)))
ALLDM = h.cleanup_df(ALLDM)

In [None]:
ALLDM.head()

In [None]:
def bootstrapCI(x,estimator,nIter, *args):
    u = []
    for i in tqdm(np.arange(nIter)):
        inds = np.random.RandomState(i).choice(len(x),len(x))
        boot = x[inds]
        u.append(estimator(boot, *args))
        
    p1 = len([i for i in u if i<0])/len(u) * 2
    p2 = len([i for i in u if i>0])/len(u) * 2
    p = np.min([p1,p2])
    U = np.mean(u)
    lb = np.percentile(u,2.5)
    ub = np.percentile(u,97.5)    
    return U,lb,ub,p

def corrbootstrapCI(x,y,roi, nIter):
    '''
    input: x is an array
    '''
    u = []
    for i in np.arange(nIter):
        inds = np.random.RandomState(i).choice(len(x),len(x))
        bootx = x[inds]
        booty = y[inds]
        if roi in ['Frontal']:
            u.append(pd.DataFrame([bootx, booty]).transpose().corr()[0][1])
        else:
            u.append(stats.pearsonr(bootx, booty)[0])
        
    p1 = len([i for i in u if i<0])/len(u) * 2
    p2 = len([i for i in u if i>0])/len(u) * 2
    p = np.min([p1,p2])
    U = np.mean(u)
    lb = np.percentile(u,2.5)
    ub = np.percentile(u,97.5)    
    return U,lb,ub,p    

In [None]:
version = '4way'
logged = True
tag = 'logged' if logged else 'raw'
roi_list = ['V1']
subs = np.unique(ALLDM.subj.values)
ivs = ['trial_num'] # ['run_num','trial_num','time_point']
takeDiffDifference = True ## compare trained object differentiation vs. control object differentiation?

In [None]:
for anchored in [True, False]:
    specs = ('anchored' if anchored else 'unanchored', 'logged' if logged else 'raw')
    for this_iv in ivs:
        for this_roi in roi_list:
            # 1. Generate a subject-by-trial_num matrix where each cell is either `t`, `f`, or `t-f` from the 
            # output of `analysis_helpers.get_prob_timecourse` for the associated trial and subject (and roi) pairing. 
            scores = []
            for sub in subs:
                inds = (ALLDM['roi']==this_roi) & (ALLDM['subj']==sub)
                t,f,c = h.get_prob_timecourse(this_iv,ALLDM[inds],version=version)

                if len(scores)==0:
                    scores = t-f
                else:
                    scores = np.vstack((scores,t-f))

            # 2. Generate a subject_num length vector consisting of each subject's pre-post change measure 
            # in the same order as they are in the matrix.
            if anchored:
                recog = prepost['AnchoredTrainedDiff_{}'.format(this_roi)].values
                recog -= prepost['AnchoredControlDiff_{}'.format(this_roi)].values if takeDiffDifference else 0
            else:
                recog = prepost['UnanchoredTrainedDiff_{}'.format(this_roi)].values
                recog -= prepost['UnanchoredControlDiff_{}'.format(this_roi)].values if takeDiffDifference else 0 

            # 3. The vector defined by taking the `stats.pearsonr()` between each column of the subject-by-trial_num matrix
            # and the prepost change vector is the time course we're looking to understand for this ROI.
            if this_roi == 'Frontal':
                ## SEE HERE: lets decide what to do with classifier output == 0 in rare cases, ignore missing? 
                ## or add 1e-6 or smallest number in dataset so we can avoid having this catch here
                corcourse = [pd.DataFrame([scores[:,i], recog]).transpose().corr()[0][1] for i in range(np.shape(scores)[1])]
            else:
                corcourse = [stats.pearsonr(scores[:,i],recog)[0] for i in range(np.shape(scores)[1])]

            bootstrap = [corrbootstrapCI(scores[:,i],recog, this_roi, 1000) for i in range(np.shape(scores)[1])]
            lesserror, pluserror = [b[1] for b in bootstrap], [b[2] for b in bootstrap]                    