In [29]:
from moss.mosaic import Mosaic
import nibabel as nib
import multiprocessing
#os and i/o
import os
import numpy as np
import os.path as op
import seaborn as sns
import matplotlib
import scipy
import matplotlib.pyplot as plt
import pandas as pd
import sklearn
from sklearn import preprocessing
from sklearn.cross_decomposition import PLSRegression
%matplotlib inline

In [42]:
#preliminary housekeeping
home_dir = '/data/home/iballard/fd/'
subj_file = home_dir + 'subjects.txt'
subs = list(np.loadtxt(subj_file,'string'))
os.chdir(home_dir)

In [43]:
exp = 'sim_4mm-betas'
masks = ['yeo17_ifs']
smooth = 'unsmoothed'

In [44]:
def get_condition(i):
    if i < 11:
        cond = 'b_plus'
        trial = i
    elif i < 21:
        cond = 'c_plus'
        trial = i - 10
    elif i < 31:
        cond = 'c_minus'
        trial = i - 20
    elif i < 41:
        cond = 'b_minus'
        trial = i - 30
    return cond,trial

In [45]:
#Saves betas to csv so we don't have to constantly rerun this slow code block
for m in masks:
    betas = {'sub':[],'mask':[],'run':[],'condition':[],'trial':[],'value':[],'voxel':[],'row':[]}
    out_f = op.join(home_dir,'betas', '_'.join([exp,smooth,m]) + '.csv')
    
    for sub in subs:
        sub_path = op.join(home_dir,'analysis', exp, sub, 'reg','epi', smooth )

        mask = op.join(home_dir,'data', sub,  'masks', m + '.nii.gz')
        mask = nib.load(mask).get_data().astype(bool)

        for run in map(str,range(1,4)):
            run_dir = op.join(sub_path, 'run_'  + run)
            
            if os.path.exists(run_dir):
                
                for i in range(1,41):
                    f = run_dir + '/cope' + str(i) + '_xfm.nii.gz'
                    cond, trial = get_condition(i)

                    #load stat image
                    stat = nib.load(f).get_data().astype(float)
                    stat = stat[mask]
                    
                    for n,val in enumerate(stat):
                        betas['voxel'].append(n)                        
                        betas['sub'].append(sub)
                        betas['value'].append(val)
                        betas['mask'].append(m)
                        betas['run'].append(run)
                        betas['condition'].append(cond)
                        betas['trial'].append(trial)
                        betas['row'].append(i)

    betas = pd.DataFrame(betas)
    betas.to_csv(out_f)
    del betas

In [56]:
del betas

In [57]:
#set ROIs of interest and load from hard disk
x_roi = 'hipp'
y_roi = 'yeo17_ifs'
rois = [x_roi,y_roi]
betas = []

for m in rois:
    f = op.join(home_dir,'betas', '_'.join([exp,smooth,m]) + '.csv')
    betas.append(pd.read_csv(f))
    
betas = pd.concat(betas)
betas = betas.set_index(['sub', 'mask','run'])

#set ROIs of interest and load from hard disk
# x_roi = 'yeo17_ifs'
# y_roi = 'yeo17_ifs'

# f = op.join(home_dir,'betas', '_'.join([exp,smooth,x_roi]) + '.csv')
  
# betas = pd.read_csv(f)
# betas = betas.set_index(['sub', 'mask','run'])


In [None]:
scores_df = {'sub':[],'score_test':[],'score_train':[],'y_roi':[]}

for sub in subs:
    scores_train = []
    scores_test = []
    runs = list(set(betas.loc[(sub,x_roi)].index))
    if len(runs) == 3:
        train_runs = [['1','2'],['1','3'],['2','3']]
    elif len(runs) == 2:
        train_runs = [['1'],['2']]

    for train_set in train_runs:
        X_train = []
        Y_train = []
        for run in runs:
            X = betas.loc[(sub,x_roi,run)]
            X = X.pivot(index = 'row',columns='voxel', values='value').values

            Y = betas.loc[(sub,y_roi,run)]
            Y = Y.pivot(index = 'row',columns='voxel', values='value').values

            if str(run) in train_set:
                X_train.append(np.copy(X))
                Y_train.append(np.copy(Y))
            else:
                X_test = np.copy(X)
                Y_test = np.copy(Y)

        X_train = np.array(X_train)
        Y_train = np.array(Y_train)        

        X_train = X_train.reshape(-1, X_train.shape[-1])
        Y_train = Y_train.reshape(-1, Y_train.shape[-1])
        
        #standardize
        X_train = preprocessing.scale(X_train)
        Y_train = preprocessing.scale(Y_train)
        X_test = preprocessing.scale(X_test)
        Y_test = preprocessing.scale(Y_test)

        pls = PLSRegression(n_components=100,max_iter = 2000)
        pls.fit(X_train,Y_train)
        
        score_train = pls.score(X_train,Y_train)
        scores_train.append(score_train)
        
        score_test = pls.score(X_test,Y_test)
        scores_test.append(score_test)
    
    scores_df['sub'].append(sub)
    scores_df['y_roi'].append(y_roi)
    scores_df['score_test'].append(np.mean(scores_test))
    scores_df['score_train'].append(np.mean(scores_train))
    
scores_df = pd.DataFrame(scores_df)



In [54]:
X_train.shape

(80, 2253)