In [1]:
import nibabel as nib
import os
import numpy as np
import os.path as op
import scipy
import pandas as pd
import pickle
import scipy.stats
from ipyparallel import Client
from statsmodels.formula.api import ols
import statsmodels.api as sm 
import statsmodels.formula.api as smf
from sklearn import manifold
from sklearn.decomposition import PCA
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt

#classification
from sklearn.linear_model import LogisticRegression
from sklearn import preprocessing, metrics, feature_selection
from sklearn.model_selection import cross_val_score, LeaveOneGroupOut
from sklearn.pipeline import make_pipeline

%matplotlib inline

In [2]:
rc = Client()

In [3]:
#preliminary housekeeping
home_dir = '/data/home/iballard/fd/'
subj_file = home_dir + 'subjects.txt'
subs = list(np.loadtxt(subj_file,'string'))
os.chdir(home_dir)

In [4]:
def compute_inv_shrunk_covariance(x,mask):
    #see http://www.diedrichsenlab.org/pubs/Walther_Neuroimage_2016.pdf
    t,n = x.shape #t measurements by n voxels

    #demean
    x = x - x.mean(0)

    #compute covariance
    sample = (1.0/t) * np.dot(np.transpose(x),x)

    #copute prior
    prior = np.diag(np.diag(sample))

    #compute shrinkage
    d = 1.0/n * np.linalg.norm(sample - prior,ord = 'fro')**2
    y = np.square(x)
    r2 = 1.0/n/t**2 * np.sum(np.sum(np.dot(np.transpose(y),y)))- \
    1.0/n/t*np.sum(np.sum(np.square(sample)))

    #compute the estimator
    shrinkage = max(0,min(1,r2/d))
    sigma = shrinkage*prior + (1-shrinkage)*sample

    #compute the inverse
    try:
        inv_sigma = np.linalg.inv(sigma)
    except numpy.linalg.linalg.LinAlgError as err:
        if 'Singular matrix' in err.message:
            inv_sigma = np.linalg.inv(prior) #univariate
        else:
            raise
    
    return inv_sigma

In [5]:
#project a onto be
def vector_reject(a,b): #a gives variance to be
    return a - (np.dot(a,b)/np.dot(b,b)) * b    

In [6]:
#make sure MTL masks are exclusive
def trim_mask(sub,mask,m):
    exclusions = overlap_masks[:]
    exclusions.remove(m)

    m1 = op.join(home_dir,'data', sub,  'masks', exclusions[0] +'.nii.gz')
    m1 = nib.load(m1).get_data().astype(bool)

    m2 = op.join(home_dir,'data', sub,  'masks', exclusions[1] +'.nii.gz')
    m2 = nib.load(m2).get_data().astype(bool)

    bad = np.logical_or(m1,m1)
    good = np.invert(bad)

    mask = np.logical_and(mask,good)
    return mask

In [7]:
def compute_inverse_sigma(sub,exp,smooth,masks):
    sub_path = op.join(home_dir,'analysis', exp, sub, 'reg','epi', smooth)
    for run in map(str,range(1,3)):
        res = op.join(sub_path, 'run_'  + run,'res4d_xfm.nii.gz')

        if op.exists(res):
            res = nib.load(res).get_data().astype(float)

            for m in masks:
                out_f = op.join(home_dir,'covariance','_'.join([exp,sub,smooth,run,m]) + '.txt')
                
                if not op.exists(out_f): #don't recompute
                    mask = op.join(home_dir,'data', sub,  'masks', m + '.nii.gz')
                    mask = nib.load(mask).get_data().astype(bool)
                    if m in overlap_masks:
                        mask = trim_mask(sub,mask,m)

                    x = res[mask]
                    x = np.transpose(x)

                    inv_sigma = compute_inv_shrunk_covariance(x,m)

                    inv_sigma = scipy.linalg.fractional_matrix_power(inv_sigma,.5) #take square root

                    np.savetxt(out_f,inv_sigma)


In [8]:
def delete_inverse_sigma(sub,exp,smooth,masks):
    for run in map(str,range(1,3)):
        for m in masks:
            out_f = op.join(home_dir,'covariance','_'.join([exp,sub,run,m]) + '.txt')
            if op.exists(out_f):
                os.remove(out_f)

In [9]:
def get_condition(i):
    if i ==1:
        cond = 'popout'
        trial = 1
    elif i < 12:
        cond = 'body'
        trial = i - 1
    elif i < 22:
        cond = 'character'
        trial = i - 11
    elif i < 32:
        cond = 'face'
        trial = i - 21
    elif i < 42:
        cond = 'place'
        trial = i - 31
    elif i < 52:
        cond = 'object'
        trial = i - 41 
    return cond,trial

In [10]:
def extract_betas(sub,exp,smooth,masks):

    nbetas = 50
    exp_id = 'loc-betas'
            
    all_betas = []
    for m in masks:
        out_f = op.join(home_dir,'betas', '_'.join([exp_id,sub,smooth,m]) + '.csv')
        
        #extract saved betas
        if not op.exists(out_f):
            betas = {'sub':[],'mask':[],'run':[],'condition':[],'value':[],'trial':[],'voxel':[],'row':[]}
            sub_path = op.join(home_dir,'analysis', exp_id, sub, 'reg','epi', smooth )

            mask = op.join(home_dir,'data', sub,  'masks', m + '.nii.gz')
            mask = nib.load(mask).get_data().astype(bool)
            if m in overlap_masks:
                mask = trim_mask(sub,mask,m)


            for run in map(str,range(1,3)):
                run_dir = op.join(sub_path, 'run_'  + run)

                if os.path.exists(run_dir):

                    for i in range(2,nbetas + 2):
                        f = run_dir + '/cope' + str(i) + '_xfm.nii.gz'

                        cond, trial = get_condition(i)

                        #load stat image
                        stat = nib.load(f).get_data().astype(float)
                        stat = stat[mask]

                        for n,val in enumerate(stat):
                            betas['voxel'].append(n)                        
                            betas['sub'].append(sub)
                            betas['value'].append(val)
                            betas['mask'].append(m)
                            betas['run'].append(int(run))
                            betas['condition'].append(cond)
                            betas['trial'].append(trial)
                            betas['row'].append(i - 1)
                else:
                    print run_dir

            betas = pd.DataFrame(betas)
            betas.to_csv(out_f,index = False)
            
        else: #load from disk
            betas = pd.read_csv(out_f)
            
        all_betas.append(betas)
    all_betas = pd.concat(all_betas)
    all_betas = all_betas.set_index(['sub', 'run','mask','condition','trial'])
    return all_betas

In [19]:
def prewhiten_betas(old_betas,sub,exp,smooth,masks):
    
    all_betas = []

    exp_id = 'loc-betas'
        
    for m in masks:
        out_f = op.join(home_dir,'betas', 'whitened','_'.join([exp_id,sub,smooth,m]) + '.csv')
        
        if op.exists(out_f):
            all_betas.append(pd.read_csv(out_f))
            
        else:
            betas = old_betas.xs(m, level='mask', axis=0)
            
            for run in map(str,range(1,3)):
                #load covariance
                inv_sigma = op.join(home_dir,'covariance','_'.join([exp,sub,smooth,run,m]) + '.txt')
                if op.exists(inv_sigma):
                    inv_sigma = np.loadtxt(inv_sigma)

                    for cond in conds:
                        for trial in range(1,11):
                            vals = betas.loc[(sub,int(run),cond,trial),'value'].values
                            whiten_vals = np.dot(inv_sigma,vals)
                            betas.loc[(sub,int(run),cond,trial),'value'] = whiten_vals
                else:
                    print 'no covariance file',inv_sigma
                    
            out_betas = pd.DataFrame(betas.to_records()) 
            out_betas['mask'] = m
            out_betas.to_csv(out_f,index = False)
            all_betas.append(out_betas)
        
    all_betas = pd.concat(all_betas)
    all_betas = all_betas.set_index(['sub', 'run','mask','condition','trial'])
    return all_betas

In [28]:
def run_sub(sub):

    compute_inverse_sigma(sub,exp,smooth,masks)
    betas = extract_betas(sub,exp,smooth,masks)
    betas = prewhiten_betas(betas,sub,exp,smooth,masks)
#         delete_inverse_sigma(sub,exp,smooth,masks) #save disk space

In [11]:
overlap_masks = ['peri_sim','para_sim','hipp']
masks = ['hipp','peri_sim','para_sim']
exp = 'loc-betas'
smooth = 'smoothed'
conds = ['body','character','face','object','place']
nconds = len(conds)

In [27]:
dview = rc[0:16]
dview.block = True

dview.push(dict(home_dir=home_dir,
                masks = masks,
                exp = exp,
                conds = conds,
                smooth = smooth,
                get_condition = get_condition,
                overlap_masks = overlap_masks,
                prewhiten_betas = prewhiten_betas,
                compute_inv_shrunk_covariance = compute_inv_shrunk_covariance,
                compute_inverse_sigma = compute_inverse_sigma,
                extract_betas = extract_betas,
                delete_inverse_sigma = delete_inverse_sigma,
                trim_mask = trim_mask
                ))
dview.execute("import numpy as np")
dview.execute("import os.path as op")
dview.execute("import nibabel as nib")
dview.execute("import pandas as pd")
with dview.sync_imports():
    import os
    import numpy
    import scipy
    import scipy.stats
    import pickle
dview.map_sync(run_sub,subs)

# Now classify the visual categories

In [12]:
def load_features(exp,sub,smooth,m):

    out_f = op.join(home_dir,'betas','whitened', '_'.join([exp,sub,smooth,m]) + '.csv')
    betas = pd.read_csv(out_f)

    #drop categories we don't care about
    drop_conds = ['character','object']
    for c in drop_conds:
        betas = betas[betas['condition'] != c]
        
    return betas
    

In [13]:
cond_map = {'body':0, 'place':1, 'object':3, 'character':4, 'face':2}

In [15]:
def prepare_data(betas):
    runs = []
    X = []
    y = []
    for run in range(1,3):
        run_betas = betas[betas['run'] == run]

        X_run = run_betas.pivot(index = 'row',columns = 'voxel',values = 'value').values
        y_run = run_betas.pivot(index = 'row',columns = 'voxel',values = 'condition')[0].values
        y_run = [cond_map[x] for x in y_run]
        run = [run]*len(y_run)

        #scale 
        X_run = preprocessing.scale(X_run)

        runs.append(run)
        y.append(y_run)
        X.append(X_run)

    X = np.concatenate(X)
    y = np.concatenate(y)
    runs = np.concatenate(runs)
    
    return X,y,runs

In [17]:
def classify(X,y,runs):
    logistic = LogisticRegression(C=1,  
                             multi_class = 'multinomial',
                            solver = 'lbfgs')
    transform = feature_selection.SelectKBest(feature_selection.f_classif,
                                                   k = 1000)

    clf = make_pipeline(transform, preprocessing.StandardScaler(), logistic  )

    #get cross-validated score
    cv = LeaveOneGroupOut().split(X, y, runs)
    scores = cross_val_score(clf, X,y,cv = cv,
                             groups=runs, scoring='accuracy')
    
    return np.mean(scores),clf


In [18]:
classifier_results = {'mask':[],'sub':[],'accuracy':[]}
for m in masks:
    for sub in subs:
        #
        betas = load_features(exp,sub,smooth,m)
        X,y,runs = prepare_data(betas)
        score, clf = classify(X,y,runs)

        #fit model to both runs
        clf = clf.fit(X,y)

        #save to disk
        out_f = op.join(home_dir,'classifiers', '_'.join([sub,exp,smooth,m]) + '_logistic.pkl')
        output = open(out_f, 'wb')
        pickle.dump(clf, output)
        output.close()



In [20]:
subs

['fd_104',
 'fd_105',
 'fd_107',
 'fd_108',
 'fd_109',
 'fd_110',
 'fd_112',
 'fd_113',
 'fd_114',
 'fd_115',
 'fd_117',
 'fd_118',
 'fd_119',
 'fd_122',
 'fd_123',
 'fd_124',
 'fd_126',
 'fd_127',
 'fd_128',
 'fd_129',
 'fd_130',
 'fd_132',
 'fd_133',
 'fd_135',
 'fd_136',
 'fd_137',
 'fd_138',
 'fd_140',
 'fd_141',
 'fd_144',
 'fd_147',
 'fd_148']