In [105]:
from __future__ import division

import numpy as np
import os
from glob import glob

from PIL import Image
from copy import deepcopy

from sklearn import linear_model, datasets, neighbors
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn import svm

%matplotlib inline
from scipy.misc import imread, imresize
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns
sns.set_context('poster')
colors = sns.color_palette("cubehelix", 5)

import pandas as pd

import scipy.stats as stats
from scipy.stats import norm, linregress
import sklearn
from importlib import reload

### define project paths

In [109]:
## add helpers to python path
import sys
if '/Users/jeffwammes/Working/neurosketch/python' not in sys.path:
    sys.path.append('/Users/jeffwammes/Working/neurosketch/python')

## root paths    
curr_dir = os.getcwd()
data_dir = '/Volumes/ntb/projects/sketchloop02/data/'
proj_dir = '../..'
results_dir = '../../csv/'
nb_name = '3_relate_drawing_training_and_recog_prepost'

## module definitions
import analysis_helpers as h
reload(h)

h.data_dir = data_dir
h.path_to_recog = data_dir + 'features/recog'
h.path_to_draw = data_dir + 'features/drawing'
h.roi_list_draw = np.array(['V1Draw', 'V2Draw', 'LOCDraw', 'InsulaDraw', 'postCentralDraw',
                            'preCentralDraw', 'ParietalDraw', 'FrontalDraw', 'smgDraw'])
h.roi_list_recog = np.array(['V1', 'V2', 'LOC', 'fusiform','parahippo','IT','ento','PRC','hipp','mOFC'])
roi_list_recog = h.roi_list_recog

### relate individual differences in overall target selectivity (log odds of target vs. foil) to prepost differentiation

This plots the raw correlations between classifer evidence for target - foil, and prepost differentiation between trained objects

In [99]:
# set 
version = '4way'
tag = 'log'
d = pd.read_csv(os.path.join(proj_dir,'csv/difference_{}probs_{}.csv'.format(tag,version)))
prepost = pd.read_csv(os.path.join(proj_dir,'csv/neural_changes_by_surfroi_and_subject.csv'))

In [100]:
## make dataframe to relate drawing contrast to recognition differentiation
roi_list = ['V1', 'V2', 'LOC', 'IT', 'fusiform', 'parahippo', 'PRC', 'ento','hipp', 'mOFC']

for this_roi in roi_list:
    draw = d[d['roi']==this_roi]['target-foil'].values
    recog = prepost['UnanchoredTrainedDiff_{}'.format(this_roi)].values-prepost['UnanchoredControlDiff_{}'.format(this_roi)].values

    z = pd.DataFrame([draw,recog])
    z = z.transpose()
    z.columns=['draw','recog']

    ## plot 
    fig = plt.figure(figsize=(6,6))
    sns.set_context('poster')
    sns.regplot(x="draw",
                y ="recog",
                data=z)
    r,p = stats.pearsonr(draw,recog)
    plt.title('ROI: {}  r={}  p={}'.format(this_roi,np.round(r,5),np.round(p,5)))
    if np.round(p,5)<0.05:
        accent = '*'
    else:
        accent = ''
    print('ROI: {}  r={}  p={} {}'.format(this_roi,np.round(r,5),np.round(p,5),accent))
    plt.xlabel('drawing: target vs. foil contrast') 
    plt.ylabel('recog: post-pre differentiation')
    if not os.path.exists(os.path.join(proj_dir,'plots/{}/drawrecog'.format(nb_name))):
        os.makedirs(os.path.join(proj_dir,'plots/{}/drawrecog'.format(nb_name)))
    plt.tight_layout()
    plt.savefig(os.path.join(proj_dir,'plots/{}/drawrecog/draw_recog_scatter_{}.pdf'.format(nb_name,this_roi)))
    plt.close(fig)
    

ROI: V1  r=0.37475  p=0.03779 *
ROI: V2  r=0.46675  p=0.00812 *
ROI: LOC  r=-0.01186  p=0.94953 
ROI: IT  r=-0.10254  p=0.58306 
ROI: fusiform  r=-0.39503  p=0.02785 *
ROI: parahippo  r=0.12824  p=0.49175 
ROI: PRC  r=-0.32176  p=0.07754 
ROI: ento  r=0.08294  p=0.65734 
ROI: hipp  r=0.06561  p=0.72584 
ROI: mOFC  r=-0.37663  p=0.03676 *


### Some observations:
* ROIs where we see positive relationship are: *V1*, *V2*. That is, individuals with GREATER target selectivity during drawing in these regions also show GREATER prepost differentiation.
* ROIs where we see negative relationship are: *fusiform*, *mOFC* (maybe PRC?) That is, individuals with LESS target selectivity during drawing in these regions also show GREATER prepost differentiation.

### When does the relationship between target selectivity and differentiation emerge in these ROIs?


In [101]:
# Set initial parameters

version = '4way'
tag = 'logged'
ALLDM = pd.read_csv(os.path.join(results_dir,'logistic_timeseries_drawing_neural_{}_{}.csv'.format(version,tag)))
ALLDM = h.cleanup_df(ALLDM)

In [102]:
def bootstrapCI(x,estimator,nIter, *args):
    u = []
    for i in np.arange(nIter):
        inds = np.random.RandomState(i).choice(len(x),len(x))
        boot = x[inds]
        u.append(estimator(boot, *args))
        
    p1 = len([i for i in u if i<0])/len(u) * 2
    p2 = len([i for i in u if i>0])/len(u) * 2
    p = np.min([p1,p2])
    U = np.mean(u)
    lb = np.percentile(u,2.5)
    ub = np.percentile(u,97.5)    
    return U,lb,ub,p

 

In [103]:
version = '4way'
logged = True
tag = 'logged' if logged else 'raw'
roi_list = roi_list_recog
subs = np.unique(ALLDM.subj.values)
ivs = ['trial_num'] # ['run_num','trial_num','time_point']
takeDiffDifference = True ## compare trained object differentiation vs. control object differentiation?

In [110]:
for anchored in [False]:
    specs = ('anchored' if anchored else 'unanchored', 'logged' if logged else 'raw')
    for this_iv in ivs:
        for this_roi in roi_list:
            # 1. Generate a subject-by-trial_num matrix where each cell is either `t`, `f`, or `t-f` from the 
            # output of `analysis_helpers.get_prob_timecourse` for the associated trial and subject (and roi) pairing. 
            scores = []
            for sub in subs:
                inds = (ALLDM['roi']==this_roi) & (ALLDM['subj']==sub)
                t,f,c = h.get_prob_timecourse(this_iv,ALLDM[inds],version=version)

                if len(scores)==0:
                    scores = t-f
                else:
                    scores = np.vstack((scores,t-f))

            # 2. Generate a subject_num length vector consisting of each subject's pre-post change measure 
            # in the same order as they are in the matrix.
            if anchored:
                recog = prepost['AnchoredTrainedDiff_{}'.format(this_roi)].values
                recog -= prepost['AnchoredControlDiff_{}'.format(this_roi)].values if takeDiffDifference else 0
            else:
                recog = prepost['UnanchoredTrainedDiff_{}'.format(this_roi)].values
                recog -= prepost['UnanchoredControlDiff_{}'.format(this_roi)].values if takeDiffDifference else 0 

            # 3. The vector defined by taking the `stats.pearsonr()` between each column of the subject-by-trial_num matrix
            # and the prepost change vector is the time course we're looking to understand for this ROI.
            if this_roi == 'Frontal':
                ## SEE HERE: lets decide what to do with classifier output == 0 in rare cases, ignore missing? 
                ## or add 1e-6 or smallest number in dataset so we can avoid having this catch here
                trial_corrs = [pd.DataFrame([scores[:,i], recog]).transpose().corr()[0][1] for i in range(np.shape(scores)[1])]
            else:
                trial_corrs = [stats.pearsonr(scores[:,i],recog)[0] for i in range(np.shape(scores)[1])]

            bootstrap = [h.corrbootstrapCI(scores[:,i], recog, 1000) for i in range(np.shape(scores)[1])]
            lower_bound, upper_bound = [b[1] for b in bootstrap], [b[2] for b in bootstrap]                 
            
            fig, ax = plt.subplots(figsize=(8,4))
            ax.plot(trial_corrs, 'ro', label='data')
            plt.axhline(y=0.0,linestyle='dashed')
            plt.ylim((-1,1))
            plt.ylabel('Correlation (t-c / prepost)')
            plt.xlabel(this_iv)
            plt.title('ROI: {}'.format(this_roi))
            plt.fill_between(np.arange(len(trial_corrs)), lower_bound, upper_bound, alpha=.2)
        
            if not os.path.exists(os.path.join(proj_dir,'plots/{}/drawrecog'.format(nb_name))):
                os.makedirs(os.path.join(proj_dir,'plots/{}/drawrecog'.format(nb_name)))
            plt.tight_layout()
            plt.savefig(os.path.join(proj_dir,'plots/{}/drawrecog/trial_corrs_{}_{}_{}.pdf'.format(nb_name, this_roi, *specs)))
            plt.close(fig)
            

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  **kwargs)


### Create summary plot of either mean correlation across subjects, or mean slope.

In [70]:
version = '4way'
ALLDM = pd.read_csv(results_dir+'logistic_timeseries_drawing_neural_{}_{}.csv'.format(version,'logged' if logged else 'raw'))
prepost = pd.read_csv(str(results_dir)+'neural_changes_by_surfroi_and_subject.csv')
# clf_dataframe = ALLDM, rec_dataframe = prepost, notebook = nb_name, iv = 'trial_num'
# plotting = 'mean', roi_list, logged=[True, False], anchored=[True, False], clfmeasure=['t','f',t-f','txf']

# Choose what measure to plot ['mean' or 'slope']:
plotting = 'corr'

takeDiffDifference = True
this_iv = 'trial_num'
roi_list = np.array(['V1','V2','LOC','IT','fusiform','parahippo', 'PRC', 'ento','hipp','mOFC'])


def compute_clf_measure(target, foil, measure):
    if measure == 't-f':
        return target-foil
    elif measure == 'txf':
        return target+foil if logged else target*foil
    elif measure == 't':
        return target
    else:
        return foil
    
def scoreVSdiff(subdata, this_roi):
    clfscores = [np.mean(c['clf']) for c in subdata]
    diffscores = [sub['diff'] for sub in subdata]
    
    if this_roi == 'Frontal':
        return pd.DataFrame([clfscores, diffscores]).transpose().corr()[0][1]
    else:
        return stats.pearsonr(clfscores, diffscores)[0]
    
def slope_scoreVSdiff(subdata, this_roi):
    num_ivs = 20
    diffscores = [sub['diff'] for sub in subdata]
    clfscores = [c['clf'] for c in subdata]
    
    if this_roi == 'Frontal':
        coefficients = [pd.DataFrame([[c[i] for c in clfscores], diffscores]).transpose().corr()[0][1] for i in range(num_ivs)]
    else:
        coefficients = [stats.pearsonr([c[i] for c in clfscores], diffscores)[0] for i in range(num_ivs)]
    return linregress(np.arange(num_ivs),coefficients)[0]

for logged in [True]:
    for anchored in [False]:
        for clfmeasure in ['t-f']:
            print('logged: {}; anchored: {}; clf: {}'.format(logged, anchored, clfmeasure))
            specs = ('anchored' if anchored else 'unanchored', 'logged' if logged else 'raw')
            subs = np.unique(ALLDM.subj.values)
            
            columns = []
            for this_roi in roi_list:
                _scores = np.array([h.get_prob_timecourse(this_iv,ALLDM[(ALLDM['roi']==this_roi) & (ALLDM['subj']==sub)]) for sub in subs])
                scores = [compute_clf_measure(_scores[i,0,:], _scores[i,1,:], clfmeasure) for i in range(_scores.shape[0])]

                objs = [str(np.unique(ALLDM[(ALLDM['subj']==sub)].label.values)) for sub in subs]
                if anchored:
                    recog = prepost['AnchoredTrainedDiff_{}'.format(this_roi)].values
                    recog -= prepost['AnchoredControlDiff_{}'.format(this_roi)].values if takeDiffDifference else 0
                    print(recog)
                else:
                    recog = prepost['UnanchoredTrainedDiff_{}'.format(this_roi)].values
                    recog -= prepost['UnanchoredControlDiff_{}'.format(this_roi)].values if takeDiffDifference else 0

                columns.append([{'clf':scores[i], 'diff':recog[i], 'objs':objs[i]} for i in range(len(scores))])
            bardf = pd.DataFrame(columns).transpose()
            bardf.columns = roi_list
                       
            if plotting == 'mean':

                # derive from bardf the df we want to plot and the error bars we want
                meandf = pd.DataFrame([scoreVSdiff(np.array(bardf)[:,i], roi_list[i]) for i in range(len(roi_list))]).transpose()
                meandf.columns = roi_list
                error = [bootstrapCI(np.array(bardf)[:,i], scoreVSdiff, 1000, roi_list[i])[1:3] for i in range(len(roi_list))]
                title = 'Subjectwise Relationship B/t Mean({}) & PrePost Diff'.format(clfmeasure)
                ylab = 'Average Correlation'
                outfig = 'mean_{}_{}_{}.png'.format(clfmeasure, *specs)
            
            else:
                
                # derive from bardf the df we want to plot and the error bars we want
                meandf = pd.DataFrame([slope_scoreVSdiff(np.array(bardf)[:,i], roi_list[i]) for i in range(len(roi_list))]).transpose()
                meandf.columns = roi_list
                error = [bootstrapCI(np.array(bardf)[:,i],slope_scoreVSdiff, 1000, roi_list[i])[1:3] for i in range(len(roi_list))]
                title = 'Slope over Timepoints of Subjectwise Relationship B/t {} & Prepost Diff'.format(clfmeasure)
                ylab = 'Average Slope'
                outfig = 'slope_{}_{}_{}.png'.format(clfmeasure, *specs)
            
            fig = plt.figure(figsize=(17,6))
            sns.barplot(data=meandf,palette='husl',ci=None)
            plt.xlabel('ROIs')
            plt.title(title) 
            plt.ylabel(ylab) 
            for i in range(len(roi_list)):
                plt.vlines(i, error[i][0], error[i][1])
            plt.tight_layout()
            if not os.path.exists(os.path.join(proj_dir,'plots/{}/drawrecog'.format(nb_name))):
                os.makedirs(os.path.join(proj_dir,'plots/{}/drawrecog'.format(nb_name)))
            plt.tight_layout()
            plt.savefig(os.path.join(proj_dir,'plots/{}/drawrecog'.format(nb_name), outfig))
            plt.close(fig)

logged: True; anchored: False; clf: t-f
