In [2]:
import pandas as pd
import numpy as np
import os
import h5py
from sklearn import linear_model
import matplotlib.pyplot as plt

import deepdish as dd
import string
try:
    os.chdir('/data/MoL_clean/scripts')
except:
    pass
import util
# util has some variables in them
# import GLM_helper as gh
import random 

import scipy.stats as stats
import glob

import statsmodels.api as sm
import statsmodels.formula.api as smf
from sklearn.linear_model import LinearRegression
from sentence_transformers import SentenceTransformer
from scipy.stats import ttest_1samp

from scipy import stats, linalg
import warnings
from joblib import Parallel, delayed
import multiprocessing
warnings.filterwarnings('ignore')

sub2subj = {"sub-01":"subj001", "sub-02":"subj002","sub-03":"subj003","sub-04":"subj005",
              "sub-05":"subj006", "sub-06":"subj007", "sub-07":"subj008", "sub-08":"subj009", 
           "sub-09":"subj010","sub-10":"subj011","sub-11":"subj013", "sub-12":"subj014", 
            "sub-13":"subj017", "sub-14":"subj018", "sub-15":"subj019", "sub-16":"subj020",
           "sub-17":"subj021", "sub-18":"subj022", "sub-19":"subj023", "sub-20":"subj024",'sub-21':'subj025',
           'sub-22':'subj026','sub-23':'subj027','sub-24':'subj029','sub-25':'subj031'}
ses2w = {"ses-01":"W2", "ses-02":"W4D1", "ses-03":"W4D2"}

os.chdir('/data/MoL_clean/scripts')
nv = 40962

subjects = ['sub-%.2d'%s for s in range(1,26)]
sessions = ['ses-%.2d'%s for s in range(1,3)]
runs = ['run-%.2d'%s for s in range(1,3)]
TR = 1.5
nTRs = {'Item':302, 'Loci':302, 'Encode':355}

nTRs_w4d2 = {'Item': 156, 'Loci': 156, 'Encode': 182}
SL_lh = list(dd.io.load('SLlist_verydense.lh.h5').values())
SL_rh = list(dd.io.load('SLlist_verydense.rh.h5').values())
ag = list(dd.io.load('ROIs/Ang_verts.h5').values())
pmc = list(dd.io.load('ROIs/PMC_verts.h5').values())
mPFC = list(dd.io.load('ROIs/mPFC_verts.h5').values())
ROIs = {'ag':ag, 'pmc':pmc, 'mpfc':mPFC}
SLlist = {'L':SL_lh, "R": SL_rh}
nSL_L = len(SLlist['L'])

In [3]:
def remove_xs(string_list):
    return [s for s in string_list if s!='x']


def add_numbers_to_duplicates(string_list):
    count = {}
    new_list = []
    
    for item in string_list:
        if item in count and item!='x':
            count[item] += 1
            new_item = f"{item} {count[item]}"
        else:
            count[item] = 1
            new_item = item
        
        new_list.append(new_item.lower())
        
    return new_list


def partial_corr(C, desired_i= None, desired_j=None):
    """
    Returns the sample linear partial correlation coefficients between pairs of variables in C, controlling 
    for the remaining variables in C.
    Parameters
    ----------
    C : array-like, shape (n, p)
        Array with the different variables. Each column of C is taken as a variable
    Desired_i, desired_j: int
        If only wants to calculate the partial correlation between desired_i and desired_j, set them to be the index of the variables

    
    Returns
    -------
    P : array-like, shape (p, p)
        P[i, j] contains the partial correlation of C[:, i] and C[:, j] controlling
        for the remaining variables in C
    """
    
    C = np.asarray(C)
    p = C.shape[1]
    P_corr = np.zeros((p, p), dtype=np.float)
    if desired_i is not None and desired_j is not None:
        P_corr[desired_i, desired_j] = 1
        P_corr[desired_j, desired_i] = 1
        idx = np.ones(p, dtype=np.bool)
        idx[desired_i] = False
        idx[desired_j] = False
        beta_i = linalg.lstsq(C[:, idx], C[:, desired_j])[0]
        beta_j = linalg.lstsq(C[:, idx], C[:, desired_i])[0]

        res_j = C[:, desired_j] - C[:, idx].dot( beta_i)
        res_i = C[:, desired_i] - C[:, idx].dot(beta_j)

        corr = stats.pearsonr(res_i, res_j)[0]
        P_corr[desired_i, desired_j] = corr
        P_corr[desired_j, desired_i] = corr
        P_corr = corr
    else:
        for i in range(p):
            P_corr[i, i] = 1
            for j in range(i+1, p):
                idx = np.ones(p, dtype=np.bool)
                idx[i] = False
                idx[j] = False
                beta_i = linalg.lstsq(C[:, idx], C[:, j])[0]
                beta_j = linalg.lstsq(C[:, idx], C[:, i])[0]

                res_j = C[:, j] - C[:, idx].dot( beta_i)
                res_i = C[:, i] - C[:, idx].dot(beta_j)
                
                corr = stats.pearsonr(res_i, res_j)[0]
                P_corr[i, j] = corr
                P_corr[j, i] = corr
            
    return P_corr

def SLtoVox(D, SLlist, nv, zeronan=True):
    # D is dict of L, R, with N x arbitrary dims
    # SLlist is dict of L, R list of length N, with vertices for each SL

    Dvox = dict()
    Dcount = dict()
    for hem in ['L', 'R']:
        Dvox[hem] = np.zeros((nv,)+ D[hem].shape[1:])
        Dcount[hem] = np.zeros((nv,)+(1,)*len(D[hem].shape[1:]))
        for i in range(len(SLlist[hem])):
            Dvox[hem][SLlist[hem][i]] += D[hem][i]
            Dcount[hem][SLlist[hem][i]] += 1

        Dcount[hem][Dcount[hem] == 0] = np.nan
        Dvox[hem] = Dvox[hem] / Dcount[hem]

        if zeronan:
            Dvox[hem][np.isnan(Dvox[hem])] = 0

    return Dvox

In [None]:
# this version get beta maps for locus and item without averaging the two loci and item runs.
def get_beta_dicts_li(sub,ses,hippo=False):
    task = 'Item'
    # load item 
    item_filenames = sorted(glob.glob(f'../behavioral/{sub2subj[sub]}/{ses2w[ses]}/*{task.lower()}*.csv'))
    item_words_lists = [[w for w in pd.read_csv(item_filenames[0])['Word'] if w is not np.nan],[w for w in pd.read_csv(item_filenames[1])['Word'] if w is not np.nan]]
    item_beta_dict = {'lh':{w:[] for w in item_words_lists[0]}, 'rh':{w:[] for w in item_words_lists[0]},'anterior_hipp':{w:[] for w in item_words_lists[0]},'posterior_hipp':{w:[] for w in item_words_lists[0]}}
    task = 'Loci'
    # load loci lists, with loci names in two lists. Create a dictionary accordingly taking the union of the two lists
    loci_lists = [add_numbers_to_duplicates(list(pd.read_excel('../sheets/%s_recallperformance.xlsx'%sub2subj[sub], sheet_name='%sloci1'%ses2w[ses].lower())['spoken_loci'])),
                        add_numbers_to_duplicates(list(pd.read_excel('../sheets/%s_recallperformance.xlsx'%sub2subj[sub], sheet_name='%sloci2'%ses2w[ses].lower())['spoken_loci'])),]
    loci_beta_dict = {"lh":{l:[] for l in list(set(loci_lists[0]+loci_lists[1]))}, "rh":{l:[] for l in list(set(loci_lists[0]+loci_lists[1]))},"anterior_hipp":{l:[] for l in list(set(loci_lists[0]+loci_lists[1]))},"posterior_hipp":{l:[] for l in list(set(loci_lists[0]+loci_lists[1]))}}
    
    # create dictionary of locus and item for each of the two runs
    for hem in ['lh','rh','anterior_hipp','posterior_hipp']:
        run = 'run-01'
        loci_fmri = np.loadtxt('../outputs/betas/%s/%s_%s_%s_%s_beta.txt'%(task.lower(),sub,ses,run,hem))
        for i, l in enumerate(loci_lists[0]):
            loci_beta_dict[hem][l].append(loci_fmri[:,i])
        run = 'run-02'
        loci_fmri = np.loadtxt('../outputs/betas/%s/%s_%s_%s_%s_beta.txt'%(task.lower(),sub,ses,run,hem))
        for i, l in enumerate(loci_lists[1]):
            loci_beta_dict[hem][l].append(loci_fmri[:,i])
    # average locus rep if spoken in both runs

    
    for hem in ['lh','rh','anterior_hipp','posterior_hipp']: 
        run = 'run-01'
        task = 'Item'
        item_fmri = np.loadtxt('../outputs/betas/%s/%s_%s_%s_%s_beta.txt'%(task.lower(),sub,ses,run,hem))
        for i, w in enumerate(item_words_lists[0]):
            item_beta_dict[hem][w].append(item_fmri[:,i])
        run = 'run-02'
        item_fmri = np.loadtxt('../outputs/betas/%s/%s_%s_%s_%s_beta.txt'%(task.lower(),sub,ses,run,hem))
        for i, w in enumerate(item_words_lists[1]):
            item_beta_dict[hem][w].append(item_fmri[:,i])

    return loci_beta_dict, item_beta_dict



In [None]:
def get_beta_dicts(sub,ses,hippo=False):
    """
    Returns a dictionary with beta maps for loci, item, encoding, and retrieval for a given subject and session.
    """
    task = 'Item'
    # load item 
    item_filenames = sorted(glob.glob(f'../behavioral/{sub2subj[sub]}/{ses2w[ses]}/*{task.lower()}*.csv'))
    item_words_lists = [[w for w in pd.read_csv(item_filenames[0])['Word'] if w is not np.nan],[w for w in pd.read_csv(item_filenames[1])['Word'] if w is not np.nan]]
    item_beta_dict = {'lh':{w:[] for w in item_words_lists[0]}, 'rh':{w:[] for w in item_words_lists[0]},'anterior_hipp':{w:[] for w in item_words_lists[0]},'posterior_hipp':{w:[] for w in item_words_lists[0]}}
    task = 'Loci'
    # load loci lists, with loci names in two lists. Create a dictionary accordingly taking the union of the two lists
    loci_lists = [add_numbers_to_duplicates(list(pd.read_excel('../sheets/%s_recallperformance.xlsx'%sub2subj[sub], sheet_name='%sloci1'%ses2w[ses].lower())['spoken_loci'])),
                        add_numbers_to_duplicates(list(pd.read_excel('../sheets/%s_recallperformance.xlsx'%sub2subj[sub], sheet_name='%sloci2'%ses2w[ses].lower())['spoken_loci'])),]
    loci_beta_dict = {"lh":{l:[] for l in list(set(loci_lists[0]+loci_lists[1]))}, "rh":{l:[] for l in list(set(loci_lists[0]+loci_lists[1]))},"anterior_hipp":{l:[] for l in list(set(loci_lists[0]+loci_lists[1]))},"posterior_hipp":{l:[] for l in list(set(loci_lists[0]+loci_lists[1]))}}
    
    # create dictionary of locus and item for each of the two runs
    for hem in ['lh','rh','anterior_hipp','posterior_hipp']:
        run = 'run-01'
        loci_fmri = np.loadtxt('../outputs/betas/%s/%s_%s_%s_%s_beta.txt'%(task.lower(),sub,ses,run,hem))
        for i, l in enumerate(loci_lists[0]):
            loci_beta_dict[hem][l].append(loci_fmri[:,i])
        run = 'run-02'
        loci_fmri = np.loadtxt('../outputs/betas/%s/%s_%s_%s_%s_beta.txt'%(task.lower(),sub,ses,run,hem))
        for i, l in enumerate(loci_lists[1]):
            loci_beta_dict[hem][l].append(loci_fmri[:,i])
    # average locus rep if spoken in both runs
        for l in loci_beta_dict[hem]:
            if len(loci_beta_dict[hem][l]) == 2:
                loci_beta_dict[hem][l] = np.mean(loci_beta_dict[hem][l],axis=0)
            else:
                loci_beta_dict[hem][l] = loci_beta_dict[hem][l][0]
    
    for hem in ['lh','rh','anterior_hipp','posterior_hipp']: 
        run = 'run-01'
        task = 'Item'
        item_fmri = np.loadtxt('../outputs/betas/%s/%s_%s_%s_%s_beta.txt'%(task.lower(),sub,ses,run,hem))
        for i, w in enumerate(item_words_lists[0]):
            item_beta_dict[hem][w].append(item_fmri[:,i])
        run = 'run-02'
        item_fmri = np.loadtxt('../outputs/betas/%s/%s_%s_%s_%s_beta.txt'%(task.lower(),sub,ses,run,hem))
        for i, w in enumerate(item_words_lists[1]):
            item_beta_dict[hem][w].append(item_fmri[:,i])

        for w in item_beta_dict[hem]:
            item_beta_dict[hem][w] = np.mean(item_beta_dict[hem][w],axis=0)

    retrieve_beta_dict = {'lh':{},'rh':{},'anterior_hipp':{},'posterior_hipp':{}}
    encode_beta_dict = {'lh':{},'rh':{},'anterior_hipp':{},'posterior_hipp':{}}
    run = 'run-01'
    for hem in ['lh','rh','anterior_hipp','posterior_hipp']:
        recall_sheet = pd.read_excel('../sheets/%s_recallperformance.xlsx'%sub2subj[sub], sheet_name=ses2w[ses].lower())
        recall_sheet['retrieval'] = recall_sheet['retrieval'].apply(lambda x: x.split(' ')[0])
        recall_fmri = np.loadtxt('../outputs/betas/%s/%s_%s_%s_%s_beta.txt'%('retrieve',sub,ses,run,hem))
        encode_fmri = np.loadtxt('../outputs/betas/%s/%s_%s_%s_%s_beta.txt'%('encode',sub,ses,run,hem))
        if ses != 'ses-03':
            encode_pairs = [recall_sheet['loci'][r]+'-'+recall_sheet['encode'][r] for r in range(len(recall_sheet))][:40]
        else:
            encode_pairs = [recall_sheet['loci'][r]+'-'+recall_sheet['encode'][r] for r in range(len(recall_sheet))][:20]
        valid_encode_pairs = [p for p in encode_pairs if p[0]!='x']
        for r, pair in enumerate(valid_encode_pairs):
            encode_beta_dict[hem][pair.lower()] = encode_fmri[:,r]
        ret_idx = 0
        for r in range(len(recall_sheet)):
            if recall_sheet['spoken_loci'][r]!='x':
                retrieve_beta_dict[hem][(recall_sheet['spoken_loci'][r]+'-'+recall_sheet['retrieval'][r]).lower()] = recall_fmri[:,ret_idx]
                ret_idx+=1
    return loci_beta_dict, item_beta_dict, encode_beta_dict, retrieve_beta_dict



def get_residuals(C, dv):
    """
    Returns the residuals of the dv(th) column of matrix C, accounting for all variances explained by other columns
    Parameters
    ----------
    C : array-like, shape (n, p)
        Array with the different variables. Each column of C is taken as a variable
    
    Returns
    -------
    res : array-like, shape (n,)
        Residual of dv
    """
    C = np.asarray(C)
    p = C.shape[1]
    idx = np.ones(p, dtype=np.bool)
    idx[dv] = False
    beta = linalg.lstsq(C[:, idx], C[:, dv])[0]

    res = C[:, dv] - C[:, idx].dot(beta)
            
    return res


def get_residual(target, var1, var2):
    # Create a 2D array for the independent variables
    X = np.column_stack((var1, var2))
    
    # Fit a linear regression model with the independent variables
    model = LinearRegression()
    model.fit(X, target)
    
    # Get the predicted values for the target variable based on the model
    predicted = model.predict(X)
    
    # Calculate the residuals by subtracting the predicted values from the original target values
    residuals = target - predicted
    
    return residuals

def calculate_weights(x, y, z, A):
    """
    Calculate the weights of variables x, y, z in predicting each timepoint of matrix A.

    Parameters:
    x, y, z: Arrays of shape (nv,) representing the variables in each vertices in the ROI.
    A: Matrix of shape (nv, nTR) where each column is a timepoint.

    Returns:
    weights: A matrix of shape (nTR, 3) containing the weights for each variable for each timepoint.
    """

    # Combine x, y, z into a single matrix
    predictors = np.column_stack((x, y, z))
    try:
        num_TRs = A.shape[1]
            # Initialize an array to store weights
        weights = np.zeros((num_TRs, 3))

        # Loop over each timepoint
        for i in range(num_TRs):
            # Create a linear regression model
            model = LinearRegression()

            # Fit the model
            model.fit(predictors, A[:, i])

            # Store the weights
            weights[i, :] = model.coef_
    except:
        num_TRs = 1
        model = LinearRegression()
        model.fit(predictors, A)
        weights = model.coef_
    

    return weights

In [None]:
def get_diag_diff_perm(corrmat, nPerm=1000):
    '''
    return the (diagonal mean - off-diagonal) mean in a correlation matrix, including the all the means 
    if the correlation matrix is randomly shuffled

    Parameters
    ----------
    corrmat : array-like, shape (n_roi, n, n)
        Array with the different variables. 
        n_roi: number of rois
        n: the dimension of the correlation matrix
    nPerm: int, number of permutations

    Returns
    -------
    diag_diff : array-like, shape (nPerm+1,)
        Residual of dv
    '''
    diag_diff = np.zeros((corrmat.shape[0], nPerm+1))
    for i in range(corrmat.shape[0]):       
        corr_i = corrmat[i, :, :].copy()     
        corr_perm = corr_i[np.ix_(np.arange(1,corr_i.shape[1]), np.arange(1,corr_i.shape[1]))].copy()
        np.random.seed(0)
        for p in range(nPerm+1):
            diag_diff[i, p] = corr_perm[np.eye(corr_perm.shape[0],dtype=bool)].mean() - corr_perm[~np.eye(corr_perm.shape[0],dtype=bool)].mean()
            corr_perm = corr_perm[np.random.permutation(corr_perm.shape[0]), :].copy()
    return diag_diff



def generate_shuffled_indexes(indexes_to_shuffle, num_permutation):

    """
    Generates a list of shuffled indexes based on the provided list and number of permutations.
    """
    
    random_index_container = []
    for p in range(num_permutation):
        shuffled_list = indexes_to_shuffle[:]
        random.shuffle(shuffled_list)
        random_index_container.append(shuffled_list)
    return random_index_container

Encoding recall partial correlation (version 1, get SL map of partial correlation between encoding and recall)

In [None]:
# Generates correlation matrix between tasks (L-L, I-I, I-E, L-E, I-R, L-R
# with same item/loci/combination) for each subject in each session
# also get weights for L, I, E (residual, removing I and L), predicting R
# For the weights, generate a null distributino with permutation, and get the z-score, and the 
# results of a null distribution (weight_store_by_sl_shuffled)

def get_sub_corr_SL(sub):
    sessions = ['ses-01','ses-02','ses-03']
    for ses in sessions:
        weight_store_by_sl = {}
        weight_store_by_sl_shuffled = {}
        weight_store_by_sl_z = {}
        e_r_semipartial = {}
        l_e_corr = {}
        l_r_corr = {}
        i_e_corr = {}
        i_r_corr = {}
        l_l_corr = {}
        i_i_corr = {}
        
        # get beta dictionaries for loci and item, without averaging the two runs
        loci_beta_dict_noaverage, item_beta_dict_noaverage = get_beta_dicts_li(sub,ses)

        # get beta dictionaries for loci, item, encode, and retrieve, averaging two loci and item runs
        loci_beta_dict, item_beta_dict, encode_beta_dict, retrieve_beta_dict = get_beta_dicts(sub,ses)

        for hem in ['lh','rh']:
            # load the SL list for the current hemisphere
            sl = dd.io.load('SLlist_verydense.'+hem+'.h5')
            # initialize the dictionaries to store the weights and correlation matrices
            weight_store_by_sl[hem] = np.zeros((len(sl),3))
            weight_store_by_sl_shuffled[hem] = np.zeros((len(sl),3))
            weight_store_by_sl_z[hem] = np.zeros((len(sl),3))
            e_r_semipartial[hem] = []
            l_e_corr[hem] = []
            l_r_corr[hem] = []
            i_e_corr[hem] = []
            i_r_corr[hem] = []
            l_l_corr[hem] = []
            i_i_corr[hem] = []

            
            for sl_i, l in enumerate(sl):
                # initialize lists to store the weights and correlation matrices for the current SL
                loci_weights = []
                item_weights = []
                encode_weights = []
                retrieve_weights = []
                retrieve_all= []
                encode_all = []
                encode_residual_all = []
                locus_all = []
                item_all = []

                loci_weights_z = []
                item_weights_z = []
                encode_weights_z = []
                
                loci_weights_shuffled = []
                item_weights_shuffled = []
                encode_weights_shuffled = []

                locus_weights1 = []
                locus_weights2 = []
                item_weights1 = []
                item_weights2 = []

                valid_pairs = []

                # get valid pairs of locus and item for the current SL
                for i, key in enumerate(retrieve_beta_dict[hem]):
                    locus,word = key.split('-')
                    if locus in loci_beta_dict[hem] and word != 'x' and locus+'-'+word in encode_beta_dict[hem]:
                        valid_pairs.append(key)
                # generate 100 permutations of the valid pairs with shuffled 
                shuffled_keys = generate_shuffled_indexes(valid_pairs, 100)
                for i, key in enumerate(valid_pairs):
                    locus,word = key.split('-')
                    if locus in loci_beta_dict[hem] and word != 'x' and locus+'-'+word in encode_beta_dict[hem]:
                        locus_rep = loci_beta_dict[hem][locus][sl[l]]
                        item_rep = item_beta_dict[hem][word][sl[l]]
                        encode_rep = encode_beta_dict[hem][locus+'-'+word][sl[l]]
                        retrieve_rep = retrieve_beta_dict[hem][key][sl[l]]

                        # look at locus representation limited to correctly retrieved locus/item
                        locus_rep_noaverage = loci_beta_dict_noaverage[hem][locus]
                        item_rep_noaverage = item_beta_dict_noaverage[hem][word]

                        encode_rep_residual = get_residual(encode_rep, locus_rep, item_rep)
                        encode_residual_all.append(encode_rep_residual)

                        retrieve_all.append(retrieve_rep)
                        encode_all.append(encode_rep)
                        locus_all.append(locus_rep)
                        item_all.append(item_rep)

                        # for locus-locus and item-item correlations, store the weights for the two runs
                        item_weights1.append(item_rep_noaverage[0][sl[l]])
                        item_weights2.append(item_rep_noaverage[1][sl[l]])
                        if len(locus_rep_noaverage) == 2:
                            locus_weights1.append(locus_rep_noaverage[0][sl[l]])
                            locus_weights2.append(locus_rep_noaverage[1][sl[l]])
                        
                        # run regression to get the weights of locus, item, encoding residual, for the SL
                        l_weights,i_weights,e_weights = calculate_weights(locus_rep, item_rep, encode_rep_residual, retrieve_rep)
                        loci_weights.append(l_weights)
                        item_weights.append(i_weights)  
                        encode_weights.append(e_weights)

                        loci_weights_shuffled_item = []
                        item_weights_shuffled_item = []
                        encode_weights_shuffled_item = []
                        # for each of the 100 permutations, get the weights for the shuffled keys
                        for p in range(100):
                            retrieve_rep_shuffled = retrieve_beta_dict[hem][shuffled_keys[p][i]][sl[l]]
                            l_weights_shuffled,i_weights_shuffled,e_weights_shuffled = calculate_weights(locus_rep, item_rep, encode_rep_residual, retrieve_rep_shuffled)
                            loci_weights_shuffled_item.append(l_weights_shuffled)
                            item_weights_shuffled_item.append(i_weights_shuffled)
                            encode_weights_shuffled_item.append(e_weights_shuffled)
                        # compute the z-scores for the weights, comparing the weight when predicting the right vs. 
                        # wrong retrieval representation from the same locus-item-encoding_residual
                        loci_weights_z.append((l_weights-np.mean(loci_weights_shuffled_item))/np.std(loci_weights_shuffled_item)) 
                        item_weights_z.append((i_weights-np.mean(item_weights_shuffled_item))/np.std(item_weights_shuffled_item))
                        encode_weights_z.append((e_weights-np.mean(encode_weights_shuffled_item))/np.std(encode_weights_shuffled_item))

                        loci_weights_shuffled.append(np.mean(loci_weights_shuffled_item))
                        item_weights_shuffled.append(np.mean(item_weights_shuffled_item))
                        encode_weights_shuffled.append(np.mean(encode_weights_shuffled_item))

                # store the weights, the z-scores, and the shuffled weights for the current SL. 
                weight_store_by_sl[hem][sl_i,0] = np.mean(np.nan_to_num(loci_weights))
                weight_store_by_sl[hem][sl_i,1] = np.mean(np.nan_to_num(item_weights))
                weight_store_by_sl[hem][sl_i,2] = np.mean(np.nan_to_num(encode_weights))
                
                weight_store_by_sl_z[hem][sl_i,0] = np.mean(np.nan_to_num(loci_weights_z))
                weight_store_by_sl_z[hem][sl_i,1] = np.mean(np.nan_to_num(item_weights_z))
                weight_store_by_sl_z[hem][sl_i,2] = np.mean(np.nan_to_num(encode_weights_z))

                weight_store_by_sl_shuffled[hem][sl_i,0] = np.mean(np.nan_to_num(loci_weights_shuffled))
                weight_store_by_sl_shuffled[hem][sl_i,1] = np.mean(np.nan_to_num(item_weights_shuffled))
                weight_store_by_sl_shuffled[hem][sl_i,2] = np.mean(np.nan_to_num(encode_weights_shuffled))

                # compute the correlation matrices for the current SL


                le_corrmat = np.corrcoef(locus_all, encode_all)[len(locus_all):, :len(encode_all)]
                lr_corrmat = np.corrcoef(locus_all, retrieve_all)[len(locus_all):, :len(retrieve_all)]
                ie_corrmat = np.corrcoef(item_all, encode_all)[len(item_all):, :len(encode_all)]
                ir_corrmat = np.corrcoef(item_all, retrieve_all)[len(item_all):, :len(retrieve_all)]

                # compute the correlation matrices for locus-locus and item-item
                ll_corrmat = np.corrcoef(locus_weights1,locus_weights2)[len(locus_weights1):, :len(locus_weights2)]
                ii_corrmat = np.corrcoef(item_weights1,item_weights2)[len(item_weights1):, :len(item_weights2)]
                # er_corrmat = np.corrcoef(encode_all, retrieve_all)[len(encode_all):, :len(retrieve_all)]
                # er_corrmat_semipartial = np.corrcoef(encode_residual_all, retrieve_all)[len(encode_all):, :len(retrieve_all)]
                # e_r_semipartial[hem].append(er_corrmat_semipartial)
                
                l_e_corr[hem].append(le_corrmat)
                l_r_corr[hem].append(lr_corrmat)
                i_e_corr[hem].append(ie_corrmat)
                i_r_corr[hem].append(ir_corrmat)
                l_l_corr[hem].append(ll_corrmat)
                i_i_corr[hem].append(ii_corrmat)
                
        # save the weights and correlation matrices for the current subject and session
        np.savetxt(f'../outputs/regression_weights_predict_retrieval/{sub}_{ses}_enc_ret_par_corr_shuffled_lh.txt',weight_store_by_sl_shuffled['lh'])
        np.savetxt(f'../outputs/regression_weights_predict_retrieval/{sub}_{ses}_enc_ret_par_corr_shuffled_rh.txt',weight_store_by_sl_shuffled['rh'])   

        np.savetxt(f'../outputs/regression_weights_predict_retrieval/{sub}_{ses}_enc_ret_par_corr_z_lh.txt',weight_store_by_sl_z['lh'])
        np.savetxt(f'../outputs/regression_weights_predict_retrieval/{sub}_{ses}_enc_ret_par_corr_z_rh.txt',weight_store_by_sl_z['rh']) 

        np.savetxt(f'../outputs/regression_weights_predict_retrieval/{sub}_{ses}_enc_ret_par_corr_lh.txt',weight_store_by_sl['lh'])
        np.savetxt(f'../outputs/regression_weights_predict_retrieval/{sub}_{ses}_enc_ret_par_corr_rh.txt',weight_store_by_sl['rh'])
        
        np.save(f'../outputs/corrmats/item_encoding/{sub}_{ses}_SL_lh.npy',np.array(i_e_corr['lh']))
        np.save(f'../outputs/corrmats/item_encoding/{sub}_{ses}_SL_rh.npy',np.array(i_e_corr['rh']))
        np.save(f'../outputs/corrmats/item_retrieval/{sub}_{ses}_SL_lh.npy',np.array(i_r_corr['lh']))
        np.save(f'../outputs/corrmats/item_retrieval/{sub}_{ses}_SL_rh.npy',np.array(i_r_corr['rh']))
        np.save(f'../outputs/corrmats/locus_encoding/{sub}_{ses}_SL_lh.npy',np.array(l_e_corr['lh']))
        np.save(f'../outputs/corrmats/locus_encoding/{sub}_{ses}_SL_rh.npy',np.array(l_e_corr['rh']))
        np.save(f'../outputs/corrmats/locus_retrieval/{sub}_{ses}_SL_lh.npy',np.array(l_r_corr['lh']))
        np.save(f'../outputs/corrmats/locus_retrieval/{sub}_{ses}_SL_rh.npy',np.array(l_r_corr['rh']))
        np.save(f'../outputs/corrmats/locus_locus/{sub}_{ses}_SL_lh.npy',np.array(l_l_corr['lh']))
        np.save(f'../outputs/corrmats/locus_locus/{sub}_{ses}_SL_rh.npy',np.array(l_l_corr['rh']))
        np.save(f'../outputs/corrmats/item_item/{sub}_{ses}_SL_lh.npy',np.array(i_i_corr['lh']))
        np.save(f'../outputs/corrmats/item_item/{sub}_{ses}_SL_rh.npy',np.array(i_i_corr['rh']))
        # np.save(f'../outputs/corrmats/encoding_retrieval_residuals/{sub}_{ses}_semipartial_SL_lh.npy',np.array(e_r_semipartial['lh']))
        # np.save(f'../outputs/corrmats/encoding_retrieval_residuals/{sub}_{ses}_semipartial_SL_rh.npy',np.array(e_r_semipartial['rh']))


In [16]:
x = Parallel(n_jobs=13)(delayed(get_sub_corr_SL)(sub) for sub in subjects)

In [None]:
# Parse the data for the z score of the weights, get a per subject mean across sessions, and convert to vortex. 
for sub in subjects:
    weight_store_l = []
    weight_store_r = []

    for ses in ['ses-01','ses-02','ses-03']:
        weight_store_l.append(np.loadtxt(f'../outputs/regression_weights_predict_retrieval/{sub}_{ses}_enc_ret_par_corr_z_lh.txt'))
        weight_store_r.append(np.loadtxt(f'../outputs/regression_weights_predict_retrieval/{sub}_{ses}_enc_ret_par_corr_z_rh.txt'))

    sub_weight_l = np.mean(np.array(weight_store_l),axis=0)
    sub_weight_r = np.mean(np.array(weight_store_r),axis=0)
    # extract mean locus, item, # and encoding weights for the left and right hemisphere
    L, I, E = {},{},{}
    L['L'] = sub_weight_l[:,0]
    L['R'] = sub_weight_r[:,0]
    I['L'] = sub_weight_l[:,1]
    I['R'] = sub_weight_r[:,1]
    E['L'] = sub_weight_l[:,2]
    E['R'] = sub_weight_r[:,2]
    # convert the weights from SL to voxel space, and save them in the outputs folder
    locus_vox_l = SLtoVox(L, SLlist,nv)['L']
    
    locus_vox_r = SLtoVox(L, SLlist,nv)['R']
    item_vox_l = SLtoVox(I, SLlist,nv)['L']
    item_vox_r = SLtoVox(I, SLlist,nv)['R']
    encode_vox_l = SLtoVox(E, SLlist,nv)['L']
    encode_vox_r = SLtoVox(E, SLlist,nv)['R']
    
    np.savetxt(f'../outputs/regression_weights_predict_retrieval/{sub}_vox_locus_weights_z_lh.txt',locus_vox_l)
    np.savetxt(f'../outputs/regression_weights_predict_retrieval/{sub}_vox_locus_weights_z_rh.txt',locus_vox_r)
    np.savetxt(f'../outputs/regression_weights_predict_retrieval/{sub}_vox_item_weights_z_lh.txt',item_vox_l)
    np.savetxt(f'../outputs/regression_weights_predict_retrieval/{sub}_vox_item_weights_z_rh.txt',item_vox_r)
    np.savetxt(f'../outputs/regression_weights_predict_retrieval/{sub}_vox_encode_weights_z_lh.txt',encode_vox_l)
    np.savetxt(f'../outputs/regression_weights_predict_retrieval/{sub}_vox_encode_weights_z_rh.txt',encode_vox_r)

In [None]:
# parse data for raw weights, get a per subject mean across sessions, and convert to vortex. 
for sub in subjects:
    weight_store_l = []
    weight_store_r = []

    for ses in ['ses-01','ses-02','ses-03']:
        weight_store_l.append(np.loadtxt(f'../outputs/regression_weights_predict_retrieval/{sub}_{ses}_enc_ret_par_corr_lh.txt'))
        weight_store_r.append(np.loadtxt(f'../outputs/regression_weights_predict_retrieval/{sub}_{ses}_enc_ret_par_corr_rh.txt'))

    sub_weight_l = np.mean(np.array(weight_store_l),axis=0)
    sub_weight_r = np.mean(np.array(weight_store_r),axis=0)
    L, I, E = {},{},{}
    L['L'] = sub_weight_l[:,0]
    L['R'] = sub_weight_r[:,0]
    I['L'] = sub_weight_l[:,1]
    I['R'] = sub_weight_r[:,1]
    E['L'] = sub_weight_l[:,2]
    E['R'] = sub_weight_r[:,2]
    locus_vox_l = SLtoVox(L, SLlist,nv)['L']
    locus_vox_r = SLtoVox(L, SLlist,nv)['R']
    item_vox_l = SLtoVox(I, SLlist,nv)['L']
    item_vox_r = SLtoVox(I, SLlist,nv)['R']
    encode_vox_l = SLtoVox(E, SLlist,nv)['L']
    encode_vox_r = SLtoVox(E, SLlist,nv)['R']
    np.savetxt(f'../outputs/regression_weights_predict_retrieval/{sub}_vox_locus_weights_lh.txt',locus_vox_l)
    np.savetxt(f'../outputs/regression_weights_predict_retrieval/{sub}_vox_locus_weights_rh.txt',locus_vox_r)
    np.savetxt(f'../outputs/regression_weights_predict_retrieval/{sub}_vox_item_weights_lh.txt',item_vox_l)
    np.savetxt(f'../outputs/regression_weights_predict_retrieval/{sub}_vox_item_weights_rh.txt',item_vox_r)
    np.savetxt(f'../outputs/regression_weights_predict_retrieval/{sub}_vox_encode_weights_lh.txt',encode_vox_l)
    np.savetxt(f'../outputs/regression_weights_predict_retrieval/{sub}_vox_encode_weights_rh.txt',encode_vox_r)

In [None]:
# For each subject in each session
# compute the difference between diagonal and off diagonal of the correlation matrices between
# locus-locus, item-item, item-encoding, locus-encoding 
# Get a z-score of difference between diag - off-diag for real data, and for permutated data
# and convert into vertex maps
def process_corrmats(sub):
    for ses in ['ses-01','ses-02','ses-03']:
        ie_corrmats_l = np.load(f'../outputs/corrmats/item_encoding/{sub}_{ses}_SL_lh.npy')
        ie_corrmats_r = np.load(f'../outputs/corrmats/item_encoding/{sub}_{ses}_SL_rh.npy')
        ie_corrmats_concate = np.concatenate([ie_corrmats_l,ie_corrmats_r])
        # generate true (diagonal - offdiagonal) value of the correlation matrix, 
        # and a null distribution with (diagonal - offdiagonal) shuffled correlation matrix 
        perm_diag_diff_maps = get_diag_diff_perm(corrmat=ie_corrmats_concate, nPerm=1000)
        # compute a z-score of the difference between diagonal and off-diagonal for the real data vs. the shuffled data
        # convert it into vertex maps, and save it.
        # this analysis is the same for all the subsequent correlation matrices
        util.SL_array_to_maps(perm_diag_diff_maps, f'../outputs/brain_maps/{sub}_{ses}_item_encoding_SL_z',nSL_L, SLlist, nv)
        
        ie_diag_mean_l = np.mean(np.array([np.diag(matrix) for matrix in ie_corrmats_l]), axis=1)
        ie_diag_mean_r = np.mean(np.array([np.diag(matrix) for matrix in ie_corrmats_r]), axis=1)

        ie_diag_mean_vox = SLtoVox({'L':ie_diag_mean_l,'R':ie_diag_mean_r},SLlist,nv)
        np.savetxt(f'../outputs/brain_maps/{sub}_{ses}_item_encoding_diag_mean_lh.txt',ie_diag_mean_vox['L'])
        np.savetxt(f'../outputs/brain_maps/{sub}_{ses}_item_encoding_diag_mean_rh.txt',ie_diag_mean_vox['R'])

        le_corrmats_l = np.load(f'../outputs/corrmats/locus_encoding/{sub}_{ses}_SL_lh.npy')
        le_corrmats_r = np.load(f'../outputs/corrmats/locus_encoding/{sub}_{ses}_SL_rh.npy')
        le_corrmats_concate = np.concatenate([le_corrmats_l,le_corrmats_r])
        perm_diag_diff_maps = get_diag_diff_perm(corrmat=le_corrmats_concate, nPerm=1000)
        util.SL_array_to_maps(perm_diag_diff_maps, f'../outputs/brain_maps/{sub}_{ses}_locus_encoding_SL_z',nSL_L, SLlist, nv)

        le_diag_mean_l = np.mean(np.array([np.diag(matrix) for matrix in le_corrmats_l]), axis=1)
        le_diag_mean_r = np.mean(np.array([np.diag(matrix) for matrix in le_corrmats_r]), axis=1)

        le_diag_mean_vox = SLtoVox({'L':le_diag_mean_l,'R':le_diag_mean_r},SLlist,nv)
        np.savetxt(f'../outputs/brain_maps/{sub}_{ses}_locus_encoding_diag_mean_lh.txt',le_diag_mean_vox['L'])
        np.savetxt(f'../outputs/brain_maps/{sub}_{ses}_locus_encoding_diag_mean_rh.txt',le_diag_mean_vox['R'])

        ll_corrmats_l = np.load(f'../outputs/corrmats/locus_locus/{sub}_{ses}_SL_lh.npy')
        ll_corrmats_r = np.load(f'../outputs/corrmats/locus_locus/{sub}_{ses}_SL_rh.npy')
        ll_corrmats_concate = np.concatenate([ll_corrmats_l,ll_corrmats_r])
        perm_diag_diff_maps = get_diag_diff_perm(corrmat=ll_corrmats_concate, nPerm=1000)
        util.SL_array_to_maps(perm_diag_diff_maps, f'../outputs/brain_maps/{sub}_{ses}_locus_locus_SL_z',nSL_L, SLlist, nv)

        ll_diag_mean_l = np.mean(np.array([np.diag(matrix) for matrix in ll_corrmats_l]), axis=1)
        ll_diag_mean_r = np.mean(np.array([np.diag(matrix) for matrix in ll_corrmats_r]), axis=1)

        ll_diag_mean_vox = SLtoVox({'L':ll_diag_mean_l,'R':ll_diag_mean_r},SLlist,nv)
        np.savetxt(f'../outputs/brain_maps/{sub}_{ses}_locus_locus_diag_mean_lh.txt',ll_diag_mean_vox['L'])
        np.savetxt(f'../outputs/brain_maps/{sub}_{ses}_locus_locus_diag_mean_rh.txt',ll_diag_mean_vox['R'])


        ii_corrmats_l = np.load(f'../outputs/corrmats/item_item/{sub}_{ses}_SL_lh.npy')
        ii_corrmats_r = np.load(f'../outputs/corrmats/item_item/{sub}_{ses}_SL_rh.npy')
        ii_corrmats_concate = np.concatenate([ii_corrmats_l,ii_corrmats_r])
        perm_diag_diff_maps = get_diag_diff_perm(corrmat=ii_corrmats_concate, nPerm=1000)
        util.SL_array_to_maps(perm_diag_diff_maps, f'../outputs/brain_maps/{sub}_{ses}_item_item_SL_z',nSL_L, SLlist, nv)

        ii_diag_mean_l = np.mean(np.array([np.diag(matrix) for matrix in ii_corrmats_l]), axis=1)
        ii_diag_mean_r = np.mean(np.array([np.diag(matrix) for matrix in ii_corrmats_r]), axis=1)

        ii_diag_mean_vox = SLtoVox({'L':ii_diag_mean_l,'R':ii_diag_mean_r},SLlist,nv)
        np.savetxt(f'../outputs/brain_maps/{sub}_{ses}_item_item_diag_mean_lh.txt',ii_diag_mean_vox['L'])
        np.savetxt(f'../outputs/brain_maps/{sub}_{ses}_item_item_diag_mean_rh.txt',ii_diag_mean_vox['R'])

        

In [13]:
with multiprocessing.Pool(13) as pool:
        pool.map(process_corrmats, subjects)

[0.13867891 0.4388947  0.07444446 ... 0.02855295 0.0310866  0.04240775]
[0.38603704 0.20613178 0.16024064 ... 0.01647424 0.0196219  0.02074948]
[0.49249731 0.00525682 0.02540761 ... 0.09148278 0.10338712 0.12434796]
[0.11756232 0.38541199 0.36388231 ... 0.42574992 0.39438516 0.37035936]
[0.01962757 0.36110768 0.10398444 ... 0.34128727 0.34652508 0.37368431]
[0.2125592  0.21100839 0.45887494 ... 0.09095453 0.07756802 0.06533166]
[0.25612088 0.09578015 0.23639844 ... 0.10351693 0.10464535 0.10448302]
[0.17266921 0.2207868  0.32567664 ... 0.4719365  0.4812872  0.48813009]
[0.18232421 0.38324154 0.33095486 ... 0.14871993 0.1402674  0.12479885]
[0.46036107 0.46882978 0.2620461  ... 0.37691398 0.37653086 0.35225653]
[0.14287156 0.38599657 0.25934312 ... 0.078234   0.06493888 0.04175953]
[0.24575394 0.03969549 0.09048875 ... 0.11016197 0.1175785  0.11583619]
[0.47783587 0.01076545 0.00385277 ... 0.27366749 0.32901234 0.38702611]
[0.31388524 0.35967374 0.39688313 ... 0.09029291 0.09713435 0.09