# setup

In [1]:
import warnings
warnings.filterwarnings('ignore')

import os
import scipy
import numpy as np
import pandas as pd
import nibabel as nib
from nilearn import image
from sklearn import linear_model
from brainiak.searchlight.searchlight import Searchlight
from brainiak.searchlight.searchlight import Cube

phases = ['34']

  return f(*args, **kwds)
  return f(*args, **kwds)


In [2]:
nb_name = ''

data_dir = '/jukebox/ntb/projects/sketchloop02/data'
SAVE_PATH = '/jukebox/ntb/projects/sketchloop02/data/searchlight_output'
curr_dir = '/jukebox/ntb/projects/sketchloop02/prototype/link'
roi_dir = os.path.join(data_dir, 'copes/roi')
cope_dir = os.path.join(data_dir, 'copes/recog/objectGLM')
sub_dirs = sorted(os.listdir(roi_dir))
sub_dirs = [each[:7] for each in sub_dirs]

## root paths
os.path.abspath(os.path.join(curr_dir,'..','..'))
proj_dir = os.path.abspath(os.path.join(curr_dir,'..','..')) ## use relative paths
feat_dir = os.path.abspath(os.path.join(proj_dir,'data/features')) ## use relative paths 'D:\\data'

# null mask
null_mask = np.ones((88, 128, 128))
normalize_on = True
logged = True
iv = 'Unnamed: 0'


In [3]:
def load_draw_metadata(subject):
    metadata = pd.read_csv(
        '/jukebox/ntb/projects/sketchloop02/data/feature_matrices_and_metadata/metadata_' +
        subject + '_drawing.csv')
    return metadata

# z-score normalization to de-mean & standardize variances within-voxel
def normalize(X):
    X = X - X.mean(0)
    X = X / np.maximum(X.std(0), 1e-5)
    return X

# define and run searchlight
How searchlight works with brainiak:

1. Initiate a searchlight object, articulating certain parameters (e.g., searchlight shape, radius, max edge length, in voxels, of the 3D block(?)).
2. Distribute data to be searched to the searchlight object, sorting between MPI ranks (idk what MPI ranks are tbh).
3. Broadcast data, i.e., define other variables to be available for each execution of the searchlight function.
4. Run the searchlight, this time articulating as a parameter the function to be applied at each searchlight location.

We want to perform searchlight again, but this time the objective is to perform a version of helpers.make_drawing_predictions on each searchlight mask.

In [4]:
def prepostclf(subject_data, mask, sl_rad, bcast_var):
    t1, t2 = trained_objs
    subject_data = subject_data[0].transpose((3, 0, 1, 2))
    
    # format the train/test split
    x_A = subject_data[:80,:].reshape(80, -1)
    x_B = subject_data[80:, :].reshape(80, -1)
    y_A = RM.label.values[:80]
    y_B = RM.label.values[80:]
    
    # normalize if we want
    if normalize_on:
        x_A = normalize(x_A)
        x_B = normalize(x_B)
        
    # dual classifiers
    clfA = linear_model.LogisticRegression(
        penalty='l2',C=1).fit(x_A, y_A)
    clfB = linear_model.LogisticRegression(
        penalty='l2',C=1).fit(x_B, y_B)
    
    ## add prediction probabilities to metadata matrix
    ## must sort so that trained are first, and control is last
    ## also save out new columns in the same order
    _ordering = np.argsort(np.hstack((trained_objs,control_objs))) ## e.g., [chair table bench bed] ==> [3 2 0 1]
    ordering = np.argsort(_ordering) ## get indices that sort from alphabetical to (trained_objs, control_objs)
    probsA = (np.log(clfA.predict_proba(x_B)) if logged else clfA.predict_proba(x_B))
    probsB = (np.log(clfB.predict_proba(x_A)) if logged else clfB.predict_proba(x_A))
    probs = np.concatenate((probsA, probsB), axis=0)
    
    out = probs[:,ordering]
    RM['t1_prob'] = out[:,0]
    RM['t2_prob'] = out[:,1]
    RM['c1_prob'] = out[:,2]
    RM['c2_prob'] = out[:,3]
    RM['bed_prob'] = probs[:,0]
    RM['bench_prob'] = probs[:,1]
    RM['chair_prob'] = probs[:,2]
    RM['table_prob'] = probs[:,3]
    
    target = np.vstack((RM[RM.label==t1].groupby(iv)['t1_prob'].mean().values,
                       RM[RM.label==t2].groupby(iv)['t2_prob'].mean().values)).mean(0)
    foil = np.vstack((RM[RM.label==t1].groupby(iv)['t2_prob'].mean().values,
                       RM[RM.label==t2].groupby(iv)['t1_prob'].mean().values)).mean(0)
    
    return np.mean(target-foil)

In [5]:
# function that sets up and organizes searchlight over a set of subjects
def searchlight_over_each(subjects, sl_rad, phases):
    global RM, DM, trained_objs, control_objs, subject
    
    for subject in subjects:
        print(subject)
        
        # set up searchlight object
        sl = Searchlight(sl_rad=sl_rad, shape=Cube)
        
        # arrange data to be distributed to searchlight
        # list of 4D not 3D arrays
        subject_data = []
        for phase in phases:
            this_file = '{}/recog/{}_{}_featurematrix.npy'.format(
                feat_dir, subject, phase)
            subject_data.append(np.load(this_file).transpose((2, 3, 1, 0)))
        subject_data = np.stack(subject_data, axis=0)
        
        # load metadata
        this_file = "{}/recog/metadata_{}_{}.csv".format(feat_dir, subject, phase)
        RM, DM = pd.read_csv(this_file), load_draw_metadata(subject)
        trained_objs = np.unique(DM.label.values)
        control_objs = [i for i in ['bed','bench','chair','table'] if i not in trained_objs]
        
        # distribute and broadcast needed data to searchlight
        sl.distribute(subject_data, null_mask)
        sl.broadcast(None)
        
        # run searchlight
        subject_outputs = np.array(sl.run_searchlight(prepostclf))
        print(np.shape(subject_outputs.astype(np.float32)))
        img = nib.Nifti1Image(subject_outputs.astype(np.float32), affine)
        
        # store output for this subject
        if not os.path.exists(SAVE_PATH):
            os.makedirs(SAVE_PATH)
        
        localizer_tmap_filename = os.path.join(SAVE_PATH, subject + 'prepostclf_{}_searchlight.nii.gz'.format(phase))
        nib.save(img, localizer_tmap_filename)

In [None]:
# run searchlight over all subjects and print time to compute results
for phases in [['34'], ['56'], ['34', '56']]:
    print(phases)
    searchlight_over_each(sub_dirs, 3, phases)

['34']
0110171
