# setup

In [None]:
import warnings
warnings.filterwarnings('ignore')

import itertools
import numpy as np
import pandas as pd
import nibabel as nib
from tqdm import tqdm
from sklearn import linear_model

In [None]:
# root paths
data_dir = '/jukebox/ntb/projects/sketchloop02/data'
SAVE_PATH = '/jukebox/ntb/projects/sketchloop02/data/searchlight_output/'

# global params
sl_rad = 5
iv = 'time_point'
normalize_on = True
logged = True

# affine to save each image with
affine = np.array([[-1.996683955192566, -0.026332620531320572, -0.11206881701946259, 91.78023529052734],
                   [-0.026291240006685257, 1.9998265504837036, -0.0014756681630387902, -125.46440124511719],
                   [-0.11207851767539978, 7.630718279472148e-09, 1.9968571662902832, -120.91204833984375],
                   [0.0, 0.0, 0.0, 1.0]])


# helper functions
rewritten to help optimize searchlight runtime

In [2]:
def load_recog_data(subject):
    features = np.load('/jukebox/ntb/projects/sketchloop02/data/feature_matrices_and_metadata/' +
                       subject + '_12_featurematrix.npy')
    return features

def load_recog_metadata(subject):
    metadata = pd.read_csv(
        '/jukebox/ntb/projects/sketchloop02/data/feature_matrices_and_metadata/metadata_' +
        subject + '_V1_12.csv')
    return metadata

def load_draw_metadata(subject):
    metadata = pd.read_csv(
        '/jukebox/ntb/projects/sketchloop02/data/feature_matrices_and_metadata/metadata_' +
        subject + '_drawing.csv')
    return metadata

def load_draw_data(subject):
    features = np.load('/jukebox/ntb/projects/sketchloop02/data/feature_matrices_and_metadata/' +
                       subject + '_featurematrix.npy')
    return features

def makemask(coordinates):
    dims = np.shape(coordinates)
    mask = np.ma.make_mask_none((88, 128, 128))
    for x, y, z in itertools.product(range(dims[0]), range(dims[1]), range(dims[2])):
        a, b, c = coordinates[x, y, z]
        mask[a, b, c] = True
    return mask

def maskfeatures(mask, features):
    x = [0] * features.shape[0]
    for i, n in enumerate(f[mask] for f in features):
        x[i] = n
    x = np.array(x)
    return x
    return np.array([f[mask] for f in features])

# z-score normalization to de-mean & standardize variances within-voxel
def normalize(X):
    X = X - X.mean(0)
    X = X / np.maximum(X.std(0), 1e-5)
    return X

def nan_if(arr, value):
    return np.where(arr == value, np.nan, arr)

## Analysis / Data Collection
How searchlight works with brainiak:

1. Initiate a searchlight object, articulating certain parameters (e.g., searchlight shape, radius, max edge length, in voxels, of the 3D block(?)).
2. Distribute data to be searched to the searchlight object, sorting between MPI ranks (idk what MPI ranks are tbh).
3. Broadcast data, i.e., define other variables to be available for each execution of the searchlight function.
4. Run the searchlight, this time articulating as a parameter the function to be applied at each searchlight location.

We want to perform searchlight again, but this time the objective is to perform a version of helpers.make_drawing_predictions on each searchlight mask.

In [3]:
def make_drawing_predictions(subject_data):
    # format the train/test split
    t1, t2 = trained_objs
    subject_data = subject_data.transpose((3, 0, 1, 2))
    X_train = subject_data[920:,:].reshape((160, -1))
    X_test = subject_data[:920,:].reshape((920, -1))
    y_train = RM.label.values
    y_test = DM.label.values
    
    # normalize if we want
    if normalize_on:
        X_train = normalize(X_train)
        X_test = normalize(X_test)

    # single train/test split
    clf = linear_model.LogisticRegression(penalty='l2',C=1).fit(X_train, y_train)
    
    ## add prediction probabilities to metadata matrix
    ## must sort so that trained are first, and control is last
    ## also save out new columns in the same order
    cats = list(clf.classes_)
    _ordering = np.argsort(np.hstack((trained_objs,control_objs))) ## e.g., [chair table bench bed] ==> [3 2 0 1]
    ordering = np.argsort(_ordering) ## get indices that sort from alphabetical to (trained_objs, control_objs)
    probs = (np.log(clf.predict_proba(X_test)) if logged else clf.predict_proba(X_test))
    
    out = probs[:,ordering]
    DM['t1_prob'] = out[:,0]
    DM['t2_prob'] = out[:,1]
    DM['c1_prob'] = out[:,2]
    DM['c2_prob'] = out[:,3]
    DM['bed_prob'] = probs[:,0]
    DM['bench_prob'] = probs[:,1]
    DM['chair_prob'] = probs[:,2]
    DM['table_prob'] = probs[:,3]
    
    target = np.vstack((DM[DM.label==t1].groupby(iv)['t1_prob'].mean().values,
                       DM[DM.label==t2].groupby(iv)['t2_prob'].mean().values)).mean(0)
    foil = np.vstack((DM[DM.label==t1].groupby(iv)['t2_prob'].mean().values,
                       DM[DM.label==t2].groupby(iv)['t1_prob'].mean().values)).mean(0)
    
    return np.mean(target-foil)

In [4]:
# function that sets up and organizes searchlight over a set of subjects
def searchlight_over_each(subjects):
    
    for s in subjects:
        sub = s
        
        # set up an relevant objects for searchlight
        result = np.zeros(((88, 128, 128)))
        
        # arrange data to be distributed to searchlight
        ### load subject data in
        RM, DM = load_recog_metadata(s),  load_draw_metadata(s)
        CF = np.concatenate((load_draw_data(s), load_recog_data(s).transpose((1, 0, 2, 3))), axis=0).transpose((1, 2, 3, 0))
        
        ### identify control objects;
        ### we wil train one classifier with
        trained_objs = np.unique(DM.label.values)
        control_objs = [i for i in ['bed','bench','chair','table'] if i not in trained_objs]
        
        # distribute and broadcast needed data to searchlight
        counter = 0
        for x, y, z in tqdm(itertools.product(range(88), range(128), range(128))):
            if y == 0:
                counter += 1
                nib.save(nib.Nifti1Image(result.astype(np.float32), affine), SAVE_PATH + s + 'clf_searchlight.nii.gz')
            result[x, y, z] = make_drawing_predictions(CF[x-sl_rad if x-sl_rad > 0 else 0:x+sl_rad if x+sl_rad < 88 else 88,
                                                          y-sl_rad if y-sl_rad > 0 else 0:y+sl_rad if y+sl_rad < 128 else 128,
                                                          z-sl_rad if z-sl_rad > 0 else 0:z+sl_rad if z+sl_rad < 128 else 128,
                                                          :])
        
        # store output for this subject
        nib.save(nib.Nifti1Image(result.astype(np.float32), affine),
                 SAVE_PATH + s + 'clf_searchlight.nii.gz')

In [None]:
# run searchlight over all subjects and print time to compute results
searchlight_over_each(sub_list)

438115it [19:16:37,  1.63it/s]