## Setup

In [0]:
import warnings
warnings.filterwarnings("ignore")

from __future__ import division

import numpy as np
import os
from glob import glob

from PIL import Image
from copy import deepcopy

from sklearn import linear_model, datasets, neighbors
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn import svm

%matplotlib inline
from scipy.misc import imread, imresize
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns
sns.set_context('poster')
colors = sns.color_palette("cubehelix", 5)

import pandas as pd

import scipy.stats as stats
from scipy.stats import norm
import sklearn

from importlib import reload

##################################

import nibabel as nib
import brainiak
import json
import _pickle as cPickle
import analysis_helpers as helpers
from nilearn import image
from numpy import shape
from brainiak.searchlight.searchlight import Searchlight
from brainiak.searchlight.searchlight import Diamond
from sklearn.metrics.pairwise import pairwise_distances as pd
from timeit import default_timer as timer

ModuleNotFoundError: No module named 'brainiak'

### define paths to data

In [0]:
## root paths
curr_dir = os.getcwd()
proj_dir = 'C:\\Users\\jgunn\\Google Drive\\neurosketch'
data_dir = 'D:\\data'
results_dir = os.path.join(proj_dir, 'csv')
nb_name = 'clfsearchlight'

## add helpers to python path
import sys
if os.path.join(proj_dir, 'python') not in sys.path:
    sys.path.append(os.path.join(proj_dir, 'python'))

## module definitions
import analysis_helpers as h
reload(h)
h.data_dir = data_dir
h.path_to_recog = os.path.join(data_dir, 'features/recog')
h.path_to_draw = os.path.join(data_dir, 'features/drawing')
h.roi_list_draw = np.array(['V1Draw', 'V2Draw', 'LOCDraw', 'InsulaDraw', 'postCentralDraw',
                            'preCentralDraw', 'ParietalDraw', 'FrontalDraw', 'smgDraw'])
h.roi_list_recog = np.array(['V1', 'V2', 'LOC', 'fusiform','parahippo','IT','ento','PRC','hipp','mOFC'])
roi_list_recog = h.roi_list_recog

### get file list

In [0]:
## get raw file list for recognition runs
path_to_recog = h.path_to_recog

RECOG_METAS = sorted([i for i in os.listdir(path_to_recog) if (i.split('.')[-1]=='csv') & (i.split('_')[2][-4:] != 'Draw')])
RECOG_FEATS = sorted([i for i in os.listdir(path_to_recog) if (i.split('.')[-1]=='npy') & (i.split('_')[1][-4:] != 'Draw')])
RECOG_SUBS = np.array([i.split('_')[0] for i in RECOG_FEATS])

recog_sub_list = np.unique(RECOG_SUBS)

def preprocess_recog(RECOG_METAS, RECOG_FEATS):
    M = [i for i in RECOG_METAS if len(i.split('.')[0].split('_'))==4]
    F = [i for i in RECOG_FEATS if len(i.split('.')[0].split('_'))==4]
    return M,F

RECOG_METAS, RECOG_FEATS = preprocess_recog(RECOG_METAS, RECOG_FEATS)

In [0]:
## get raw file list for drawing runs
path_to_draw = h.path_to_draw

DRAW_METAS = sorted([i for i in os.listdir(path_to_draw) if (i.split('.')[-1]=='csv') & (i.split('_')[2][-4:] == 'Draw')])
DRAW_FEATS = sorted([i for i in os.listdir(path_to_draw) if (i.split('.')[-1]=='npy') & (i.split('_')[1][-4:] == 'Draw')])
DRAW_SUBS = np.array([i.split('_')[0] for i in DRAW_FEATS])
draw_sub_list = np.unique(DRAW_SUBS)

In [0]:
## get subject ID's that have complete datasets from all phases of experiment
sub_list = np.intersect1d(recog_sub_list,draw_sub_list)
#print('Number of subs: {}'.format(len(sub_list)))

In [0]:
## filter file list so only contains the sessions that have full datasets
def extract_good_sessions(DRAW_METAS,DRAW_FEATS,RECOG_METAS,RECOG_FEATS):
    _DRAW_METAS = [i for i in DRAW_METAS if i.split('_')[1] in sub_list]
    _DRAW_FEATS = [i for i in DRAW_FEATS if i.split('_')[0] in sub_list]
    _RECOG_METAS = [i for i in RECOG_METAS if i.split('_')[1] in sub_list]
    _RECOG_FEATS = [i for i in RECOG_FEATS if i.split('_')[0] in sub_list]
    return _DRAW_METAS, _DRAW_FEATS, _RECOG_METAS, _RECOG_FEATS

DRAW_METAS,DRAW_FEATS,RECOG_METAS,RECOG_FEATS =  \
extract_good_sessions(DRAW_METAS,DRAW_FEATS,RECOG_METAS,RECOG_FEATS)

RECOG_SUBS = np.array([i.split('_')[0]+'_neurosketch' for i in RECOG_FEATS])
RECOG_ROIS = np.array([i.split('_')[1] for i in RECOG_FEATS])

DRAW_SUBS = np.array([i.split('_')[0]+'_neurosketch' for i in DRAW_FEATS])
DRAW_ROIS = np.array([i.split('_')[1] for i in DRAW_FEATS])

In [0]:
sub_list

array(['0110171', '0110172', '0111171', '0112171', '0112172', '0112173',
       '0113171', '0115174', '0117171', '0118171', '0118172', '0119171',
       '0119172', '0119173', '0119174', '0120171', '0120172', '0120173',
       '0123171', '0123173', '0124171', '0125171', '0125172', '1121161',
       '1130161', '1202161', '1203161', '1206161', '1206162', '1206163',
       '1207162'], dtype='<U7')

## Analysis / Data Collection
How searchlight works with brainiak:

1. Initiate a searchlight object, articulating certain parameters (e.g., searchlight shape, radius, max edge length, in voxels, of the 3D block(?)).
2. Distribute data to be searched to the searchlight object, sorting between MPI ranks (idk what MPI ranks are tbh).
3. Broadcast data, i.e., define other variables to be available for each execution of the searchlight function.
4. Run the searchlight, this time articulating as a parameter the function to be applied at each searchlight location.

We want to perform searchlight again, but this time the objective is to perform a version of helpers.make_drawing_predictions on each searchlight mask.

In [0]:
# some global parameters
num_copes = 4
num_runs = 2
null_mask = np.ones((94, 94, 72))
genericarray = np.ones((94, 94, 72))
SAVE_PATH = 'searchlight_output_nii'

In [0]:
# function to operate over every searchlight region
# collects similarities between (btw) and within (wit) object categories
def compare_btw_wit_obj_similarity_across_runs(subject_data, mask, sl_rad, bcast_var):
    
    # reshape to 3D so old code can be reused
    for i in range(len(subject_data)):
        shaping = shape(subject_data[i])
        subject_data[i] = np.reshape(subject_data[i], (shaping[0], shaping[1], shaping[2]))
    
    # stack by cope for each run like roi-based function did, and then stack runs
    mat1 = np.vstack((subject_data[0].ravel(),
                     subject_data[1].ravel(),
                     subject_data[2].ravel(),
                     subject_data[3].ravel()))
    mat2 = np.vstack((subject_data[4].ravel(),
                      subject_data[5].ravel(),
                      subject_data[6].ravel(),
                      subject_data[7].ravel()))
    fAB = np.vstack((mat1, mat2))
    
    # take pairwise distance matrix life the roi-based function did and store summary statistics
    DAB = pd(fAB, metric='correlation')
    offblock = DAB[:len(mat1),range(len(mat1),shape(DAB)[1])]
    wit_obj = DAB[:len(mat1),range(len(mat1),shape(DAB)[1])].diagonal()
    btw_obj = np.hstack((offblock[np.triu_indices(shape(offblock)[0],k=1)],
                         offblock[np.tril_indices(shape(offblock)[0],k=-1)]))
    wit_mean = np.mean(wit_obj)
    btw_mean = np.mean(btw_obj)
    return btw_mean-wit_mean

def make_drawing_predictions(subject_data, mask, sl_rad, bcast_var):
    RM, RF, DM, DF, trained_objs, control_objs, this_sub = bcast_var
    
    logged = False
    normalize_on = 1
    if normalize_on:
        _RF = normalize(RF)
        _DF = normalize(DF)
    else:
        _RF = RF
        _DF = DF

    # single train/test split
    X_train = _RF
    y_train = RM.label.values

    X_test = _DF
    y_test = DM.label.values
    clf = linear_model.LogisticRegression(penalty='l2',C=1).fit(X_train, y_train)

    ## add prediction probabilities to metadata matrix
    ## must sort so that trained are first, and control is last
    ## also save out new columns in the same order
    cats = list(clf.classes_)
    _ordering = np.argsort(np.hstack((trained_objs,control_objs))) ## e.g., [chair table bench bed] ==> [3 2 0 1]
    ordering = np.argsort(_ordering) ## get indices that sort from alphabetical to (trained_objs, control_objs)
    probs = (np.log(clf.predict_proba(X_test)) if logged
             else clf.predict_proba(X_test))
    out = probs[:,ordering]
    DM['t1_prob'] = out[:,0]
    DM['t2_prob'] = out[:,1]
    DM['c1_prob'] = out[:,2]
    DM['c2_prob'] = out[:,3]
    DM['bed_prob'] = probs[:,0]
    DM['bench_prob'] = probs[:,1]
    DM['chair_prob'] = probs[:,2]
    DM['table_prob'] = probs[:,3]
    DM['subj'] = np.repeat(this_sub,DM.shape[0])
    return DM

In [0]:
# function that sets up and organizes searchlight over a set of subjects
def searchlight_over_each(subjects, sl_rad):
    
    for s in subjects:
        print(s)
        
        # set up an instance of the searchlight class
        sl = Searchlight(sl_rad=sl_rad, shape=Diamond)
        
        # arrange data to be distributed to searchlight
        ### load subject data in
        RM12, RF12 = load_recog_data(this_sub,this_roi,'12')
        RM = RM12
        RF = RF12
        DM, DF = load_draw_data(this_sub,this_roi)
        assert RF.shape[1]==DF.shape[1] ## that number of voxels is identical
        
        ### identify control objects;
        ### we wil train one classifier with
        trained_objs = np.unique(DM.label.values)
        control_objs = [i for i in ['bed','bench','chair','table'] if i not in trained_objs]
        
        # distribute and broadcast needed data to searchlight
        sl.distribute(genericarray, null_mask)
        sl.broadcast((RM, RF, DM, DF, trained_objs, control_objs, s))
        
        # run searchlight
        subject_outputs = np.array(sl.run_searchlight(make_drawing_predictions))
        
        img = nib.Nifti1Image(subject_outputs.astype(np.float32), affine)
        # store output for this subject
        if not os.path.exists(SAVE_PATH):
            os.makedirs(SAVE_PATH)
        nib.save(img, os.path.join(SAVE_PATH, s + 'searchlight.nii.gz'))

In [0]:
# run searchlight over all subjects and print time to compute results
start = timer()
searchlight_over_each(sub_list, 1)
end = timer()
print('time to run searchlight:', end - start)

0110171_neurosketch
0110172_neurosketch
0111171_neurosketch
0112171_neurosketch
0112172_neurosketch
0112173_neurosketch
0113171_neurosketch
0115172_neurosketch
0115174_neurosketch
0117171_neurosketch
0118171_neurosketch
0118172_neurosketch
0119171_neurosketch
0119172_neurosketch
0119173_neurosketch
0119174_neurosketch
0120171_neurosketch
0120172_neurosketch
0120173_neurosketch
0123171_neurosketch
0123173_neurosketch
0124171_neurosketch
0125171_neurosketch


'hello'