In [1]:
#imports
%matplotlib inline

import os
import sys
sys.path.append('/Users/jeffreyknotts/Documents/Wheatley_Lab/')
import numpy as np
import pandas as pd
import time
from datetime import datetime
import math
import scipy.io as sio
import statsmodels.api as sm
import matplotlib.pyplot as mplot
from numpy.random import random_sample
from math import pi
from math import sqrt
from joblib import Parallel, delayed

In [2]:
#input parameters
dbicSubs = list(range(1,3)) #using 1 and 9 as inputs gives 1:8, which will function as 2:9 according to matlab indexing
cbsSubs = list(range(1,3))
winTRs = 41 #window size [TRs]
stepSize = 41 #step size [TRs] 
maxT = 6 #max TR lag to include in linear models
voxelCoords = list(range(0,1000)) #whole brain = 1:69880 -- should add step above this that gets ROI voxel coords
fitPermuts = 0 #0 = don't use permutation test for pair fits
groupPermuts = 1000
method = 1 #0=old scrambling method, 1=new method of scrambling all voxels at once
local = 0 #for JD debugging locally -- will eventually remove
numJobs = 8 #number of parallel processes for joblib to use
totTRs = 615 #kind of hacky but predefining the total number of TRs that will be in each timeseries
voxParallel = 0 #use joblib on voxelwise for loop

In [3]:
#load pair and run data
pairsAndRuns = pd.read_csv(r'/afs/.dbic.dartmouth.edu/usr/wheatley/jd/hyperscanning_pair_and_run_lookup.csv')
print(pairsAndRuns)

   sub       DBIC        CBS  ind_run  joint_run
0    1  sid000014  hid000001        2          1
1    2  sid000007  hid000002        2          1
2    3  sid000009  hid000003        2          1
3    4  sid000560  hid000004        1          2
4    5  sid000535  hid000005        1          2
5    6  sid000102  hid000006        2          1
6    7  sid000416  hid000007        2          1
7    8  sid000499  hid000008        1          2
8    9  sid000142  hid000009        1          2


In [4]:
#mark starting time 
startTime = time.time()

#get numbers of pairs
realPairN = len(list(set(dbicSubs) & set(cbsSubs)))
pseudoPairN = len(dbicSubs)*len(cbsSubs) - realPairN
totalPairN = realPairN + pseudoPairN

#define some rolling window parameters based on inputs above
numSteps = int(math.ceil((totTRs - winTRs + 1) / stepSize)) #total number of steps based on winTR and stepSize
lastTr = winTRs + (numSteps - 1) * stepSize #last TR this approach will analyze
TRsLeftOut = totTRs - lastTr #number of TRs that will be left out by the current approach
if TRsLeftOut > 0: #if any TRs will end up getting left out based on window and step sizes...
    print('The last ' + str(TRsLeftOut) + ' TRs will be left out due to the window and step size!')
    
#preallocate timing log
timeLog_header = ['dbic','cbs','duration']
timeLog = np.empty([totalPairN,len(timeLog_header)])

In [5]:
def naiveColumnCorr(a, b): 
    """
    Naive, slow, "baseline" function correlating corresponding columns of two matrices.
    Uses a for loop across columns.
    """
    c = np.zeros((a.shape[1]))
    for i in range(a.shape[1]):
        c[i] = np.corrcoef(a[:, i], b[:, i])[0, 1]

    return c


def fastColumnCorr(a, b):
    """
    Fast function for correlating corresponding columns of two matrices.
    Uses numpy.einsum to avoid loops and do computations directly on matrices.
    About ~ 10 times faster than the naive approach in 'naiveColumnCorr'.
    Inputs are 2D numpy arrays with the same shape, both sized samples X vars.
    NOTES:
    Could be further optimized using numpy.einsum_path for contraction order before first use,
    then simply calling einsum with that order subsequently. However, it only seems to give a
    few percents at best.
    contr_order = np.einsum_path("ij,ij->j", aa, bb, optimize='optimal')
    cov = np.einsum("ij,ij->j", aa, bb, optimize=contr_order[1])
    """
    # extract the means from each var, in both matrices
    aa = a - (np.sum(a, 0) / a.shape[0]) # compute a - mean(a)
    bb = b - (np.sum(b, 0) / b.shape[0]) # compute b - mean(b)

    # multiply and sum across rows, that is, get dot products of column pairs
    cov = np.einsum("ij,ij->j", aa, bb)

    # for normalization we need the variances, separately for each var
    var_a = np.sum(aa ** 2, 0)
    var_b = np.sum(bb ** 2, 0)

    return cov / np.sqrt(var_a*var_b)

def phase_scrambling(data_matrix, fft_axis=0):
    """
    FOR REAL DATA ONLY, NOT COMPLEX!
    Phase-scrambling function for matrices. Preserves the original covariance structure.
    After FFT, we add a random phase vector to the FFT components of all time series / vars and do inverse FFT,
    as described in Prichard and Theiler (1994, Generating surrogate data for time series with several
    simultaneously measured variables. Physical review letters, 73(7), 951).
    The returned phase-scrambled data has the same power spectrum as the original but is linearly independent
    (zero expected correlation). Covariance structure is preserved, meaning that linear dependencies
    across time series is the same in the phase-scrambled data as in the original.
    Inputs:
    data_matrix:        2D numpy array of reals. Time series (Vars) X samples by default,
                        set fft_axis if samples X time series.
    fft_axis:           Axis along which FFT / iFFT is calculated. Defaults to 0,
                        meaning that FFT is calculated across rows (= each column is a separate time series / var)
    Outputs:
    data_scrambled:     2D numpy array of reals, contains the phase-scrambled data.
                        Same size and dimensions as input "data_matrix".
                        Returns 0 if input checks failed.
    TODO:
    - look into implementation with FFTW, which is supposedly faster with repetitive usage (our use case)
    """

    # input checks
    if not isinstance(data_matrix, np.ndarray) or len(data_matrix.shape) != 2 or np.iscomplex(data_matrix).any():
        print('Input arg "data_matrix" should be a 2D numpy array of reals!')
        return 0
    if fft_axis not in [0, 1]:
        print('Input arg "fft_axis" should be 0 or 1!')
        return 0

    # if fft_axis != 0, transpose the data
    transposeFlag = False
    if fft_axis == 1:
        data_matrix = np.transpose(data_matrix)
        transposeFlag = True

    # do forward FFT, use version for reals, treat data as if vars were in columns
    data_fft = np.fft.rfft(data_matrix, axis=0)

    # convert to polar coordinates (amplitude/magnitude + phase)
    data_fft_amp = np.abs(data_fft)
    data_fft_angle = np.angle(data_fft)

    # get random phase vector  (values between 0 - 2pi) for all FFT components that are not real by definition
    #rng = default_rng()  # new recommended method for random values #JD EDIT  # new recommended method for random values
    if data_matrix.shape[0] % 2 == 0:  # if even, first and last components are real
        rand_phases = np.hstack(([0] ,(np.random.random_sample((data_fft.shape[0]-2)) * 2 * pi), [0]))
    else:  # otherwise only the first component is real
        rand_phases = np.hstack(([0], (np.random.random_sample((data_fft.shape[0] - 1)) * 2 * pi)))

    # add random phases to the angles of FFT components of all time series / vars,
    # addition is with broadcasting (newaxis is needed for broadcasting)
    data_fft_angle_rand = data_fft_angle + rand_phases[:, np.newaxis]

    # transform back from polar to cartesian, using the randomized phases but the original magnitude / amplitude values
    data_scrambled_fft = data_fft_amp * np.exp(1j * data_fft_angle_rand)  # returns complex FFT coefficients

    # do inverse FFT
    data_scrambled = np.fft.irfft(data_scrambled_fft, n=data_matrix.shape[0], axis=0)

    # transpose if necessary
    if transposeFlag:
        data_scrambled = np.transpose(data_scrambled)

    return data_scrambled


def phase_scrambling_tests(data_matrix, data_scrambled, fft_axis=0, epsilon=1e-10):
    """
    Tests for the phase_scrambling function:
    (1) compare original FFT amplitudes to scrambled data FFT amplitudes
    (2) compare original covariance matrix to scrambled data covariance matrix
    (3) check if correlations between original and corresponding scrambled time series are around 0
    In the third test, we expect the correlation coefficients to show a normal
    distribution around 0, with a "small" std. To keep things simple, we do not fit
    a normal distribution or try a formal statistical test, but plot the histogram
    of the values and decide the test on the basis of the mean and median values
    (we check if they are "close" to zero, meaning < 0.05).
    IMPORTANT: For the second check we calculate the covariance matrices, so for really large data
    (e.g. tens of thousands of variables) consider the memory requirements of that step
    (~ 1.6 GB for 10^3 variables, considering we need two matrices). The function does not have
    internal checks for that.
    Inputs:
    data_matrix:        2D numpy array of reals. Original data set before phase scrambling.
                        Time series (Vars) X samples by default, set fft_axis if samples X time series.
    data_scrambled:     2D numpy array of reals, phase scrambled version of "data_matrix".
                        Same size and shape as "data_matrix".
    fft_axis:           Axis along which FFT / iFFT is calculated. Defaults to 0,
                        meaning that FFT is calculated across rows (= each column is a separate time series / var).
    epsilon:            Numeric value, threshold for machine accuracy. Tests 1 and 2 are considered "passed"
                        (that is, output "test_results" values set to True), if numeric inaccuracies
                        are below the threshold "epsilon". Defaults to 1e-10.
    Output:
    test_results:       List of booleans, 3-element long. Each boolean value corresponds
                        to pass (True) / fail (False) on a test.
                        The three values correspond to the (1) FFT amplitude test, (2) covariance matrix test,
                        and (3) correlations test.
                        Returns 0 if input checks failed.
    """

    # input checks
    if not isinstance(data_matrix, np.ndarray) or len(data_matrix.shape) != 2 or np.iscomplex(data_matrix).any():
        print('Input arg "data_matrix" should be a 2D numpy array of reals!')
        return 0
    if type(data_matrix) != type(data_scrambled) or data_matrix.shape != data_scrambled.shape or np.iscomplex(data_scrambled).any():
        print('Input arg "data_scrambled" should be a 2D numpy array of reals, with the same shape as "data_matrix"!')
        return 0
    if fft_axis not in [0, 1]:
        print('Input arg "fft_axis" should be 0 or 1!')
        return 0

    # if fft_axis != 0, transpose the data
    if fft_axis == 1:
        data_matrix = np.transpose(data_matrix)
        data_scrambled = np.transpose(data_scrambled)

    # init output list
    test_results = [False, False, False]

    # Check FFT component amplitudes / magnitudes. They should be the same, with differences only due to numeric inaccuracies
    # do forward FFT, use version for reals, treat data as if vars were in columns
    data_fft = np.fft.rfft(data_matrix, axis=0)
    data_scrambled_fft = np.fft.rfft(data_scrambled, axis=0)
    # compare magnitudes
    amp_diffs = np.abs(data_fft)-np.abs(data_scrambled_fft)
    print('Maximum difference between FFT component magnitudes: {:.3e}'.format(amp_diffs.max()))
    # set relevant output to True if passed the test
    if not (amp_diffs>epsilon).any():
        test_results[0] = True
        print('First test passed, original and scrambled data have matching FFT component amplitudes.')
    else:
        print('First test failed, found substantial difference\n' +
              'between original and scrambled data FFT component magnitudes.')

    # Check the covariance matrices
    data_cov = np.cov(data_matrix, rowvar=False)
    data_scrambled_cov = np.cov(data_scrambled, rowvar=False)
    maxDiff = (data_cov-data_scrambled_cov).flatten().max()  # maximum difference
    print('Maximum difference between covariance matrices: {:.3e}'.format(maxDiff))
    # set relevant output to True if passed the test
    if maxDiff <= epsilon:
        test_results[1] = True
        print('Second test passed, original and scrambled data have matching covariance structures.')
    else:
        print('Second test failed, found substantial difference\n' +
              'between original and scrambled data covariance structures.')

    # Check the correlations across original and scrambled vars
    ccoeffs = fastColumnCorr(data_matrix, data_scrambled)
    # if the mean and median are close to zero (<0.05) we consider that a success
    tmp = mplot.hist(ccoeffs, bins=data_matrix.shape[1]//40)
    if np.mean(ccoeffs) < 0.05 and np.median(ccoeffs) < 0.05:
        test_results[2] = True
        print('Third test passed, correlation coefficients group around 0.\n' +
              ' Look at the histogram for further details.')
    else:
        print('Third test failed, correlation coefficients seem to be biased.\n' +
              'Look at the histogram for further details.')

    return test_results

In [6]:
#ISC functions

#OLS - COMPARE WITH sklearn.linear_model.LinearRegression **********************************
def reg_m(y, x):
    x = np.array(x).T
    x = sm.add_constant(x)
    results = sm.OLS(endog=y, exog=x).fit()
    return results

#make 2d X-matrix with variably lagged timeseries [TRs x 2*maxT+1]
def make2dVshift(data,maxT):
    
    """
    make2dVshift -- make 2d X-matrix with variably lagged timeseries [TRs x 2*maxT+1]
    INPUTS:
    data = a [TRs x 1] timeseries vector
    maxT = absolute value of maximum lag to include in the model [TRs]
    OUTPUTS:
    vshift = the X-matrix for voxelwise linear models using variably lagged timeseries [TRs x 2*maxT+1]
    """
    vShift = np.empty([len(data), 2*maxT+1]) #preallocate [TRs x shifts]
    for i in list(range(-maxT,maxT+1)):
        if i < 0:
            vShift[:,i+maxT] = np.hstack((data[list(range(-i,data.shape[0]))],np.zeros(-i)))
        elif i == 0:
            vShift[:,i+maxT] = data
        else:
            vShift[:,i+maxT] = np.hstack((np.zeros(i), data[list(range(0,data.shape[0]-i))]))
    
    return vShift

#make 3d X-matrix with variably lagged timeseries [TRs x voxels x 2*maxT+1]
def make3dVshift(data,maxT):
    
    """
    make3dVshift -- make 3d X-matrix with variably lagged timeseries
    INPUTS:
    data = a [TRs x voxels] timeseries matrix
    maxT = absolute value of maximum lag to include in the model [TRs]
    OUTPUTS:
    vshift = horizontally stacked X-matrices for voxelwise linear models using variably lagged timeseries [TRs x voxels x 2*maxT+1]
    """
    #initialize real time-lagged dataset [TRs x voxels x 2*maxT+1]
    vShift = np.empty([data.shape[0],data.shape[1],2*maxT+1])

    #if it's the first iteration, then get the real time lagged dataset
    for i in list(range(-maxT,maxT+1)): #for each lag

        if i < 0: #if no lag
            vShift[:,:,i+maxT] = np.vstack((data[list(range(-i,data.shape[0])),:], np.zeros((-i,data.shape[1]))))
        elif i == 0:
            vShift[:,:,i+maxT] = data
        else:
            vShift[:,:,i+maxT] = np.vstack((np.zeros((i,data.shape[1])), data[list(range(0,data.shape[0]-i)),:]))
    
    return vShift

def voxWrapper(vShift3D,listener,VOX):
    """
    INPUTS
    vShift3D: ___
    listener: listener time series
    VOX: voxel index

    OUTPUTS
    b: beta
    Rsq: R^2
    F: F-test
    pF: F-test p-value 
    """
    #get voxel-specific X-matrix from the non-scrambled data in 2D
    X = np.reshape(vShift3D[:,VOX,:],(vShift3D.shape[0],vShift3D.shape[2]))

    #solve linear model vShift*b=y where y is the vector from listener's data
    model = reg_m(listener[:,VOX].T, X.T)
    
    b = model.params[list(range(1,len(model.params)))]
    Rsq = model.rsquared
    F = model.fvalue
    pF = model.f_pvalue

    return b, Rsq, F, pF

def voxWrapperPerm(vShift3D_scr,listener,Rperm,VOX):
    """
    INPUTS
    vShift3D_scr: scrambled ___
    listener: listener time series
    Rperm: preallocated array for permutation test R^2 values [permutations x voxels]
    VOX: voxel index

    OUTPUT
    pP_Rsq: p-value from [1 x voxels]
    """
    #get voxel-specific X-matrix from the scrambled data in 2D
    X = np.reshape(vShift3D_scr[:,VOX,:],(vShift3D_scr.shape[0],vShift3D_scr.shape[2]))

    #solve linear model vShift*b=y where y is the vector from listener's data
    model = reg_m(listener[:,VOX].T, X.T)

    return model.rsquared
    
#ISC computation
def couplingFMRI(speaker, listener, maxT, fitPermuts, method):

    #no. of voxels and TRs
    voxelN = speaker.shape[1]
    trN = speaker.shape[0]

    #preallocate results matrices
    b = np.empty([2*maxT+1, voxelN])
    Rsq = np.empty([voxelN]);
    F = np.empty([voxelN]);
    pF = np.empty([voxelN]);
    pP_F = np.empty([voxelN]);

    #print('Data dimensions: ' + str(trN) + ' X ' + str(voxelN));
    print('Modeling listener data using lagged time series from speaker...');

    if method == 0: #original method

        #For each voxel...
        for VOX in list(range(0,voxelN)):

            #get X matrix of time-shifted speaker timeseries
            vShift = make2dVshift(speaker[:,VOX],maxT)

            #solve linear model vShift*b=y where y is the vector from listener's data
            model = reg_m(listener[:,VOX].T, vShift.T)
            b[:,VOX] = model.params[list(range(1,len(model.params)))]
            Rsq[VOX] = model.rsquared
            F[VOX] = model.fvalue
            pF[VOX] = model.f_pvalue

            #occassional feedback
            if ((VOX + 1) % 10000 == 0) | (voxelN < 10000 & VOX == voxelN-1):
                print('\nFinished with voxel no. ' + str(VOX + 1))

            #run permutation test if selected
            if fitPermuts > 0:

                #preallocate
                pP_Rsq = np.empty([voxelN]);
                
                for PERM in list(range(0,fitPermuts)):
                    
                    #make time-lagged X-matrices without scrambling the speaker timeseries
                    vShift2D = make2dVshift(speaker[:,VOX],maxT)
                    
                    #scramble the time series
                    X = phase_scrambling(vShift2D)
                    X = np.vstack((vShift_scr,vShift_scr[0,])) #JD HACK because row nums not preserved

                    #solve linear model vShift*b=y where y is the vector from listener's data
                    model = reg_m(listener[:,VOX].T, X.T)
                    Rperm[PERM,VOX] = model.rsquared
                    
                    #get permutation test R^2 p-value
                    if PERM == firPermuts - 1: #if we're on the last permutation...
                        pP_Rsq[VOX] = len(Rperm[:,VOX][Rperm[:,VOX] > Rsq[VOX]]) / float(fitPermuts)

            else:
                pP_Rsq[VOX] = None

    else: #new method
        
        #preallocate array for Rsq from permutation test
        pP_Rsq = np.empty([voxelN]);

        if fitPermuts > 0:
            
            Rperm = np.empty([fitPermuts,voxelN])

            for PERM in list(range(0,fitPermuts)):

                #if it's the first permutation, set up the non-scrambled time series
                if PERM == 0:

                    #make time-lagged X-matrices without scrambling the speaker timeseries
                    vShift3D = make3dVshift(speaker,maxT)

                #scramble the time series
                speaker_scr = phase_scrambling(speaker)

                #make time-lagged X-matrices
                vShift3D_scr = make3dVshift(speaker_scr,maxT)

                #if using joblib for voxelwise computations...
                if voxParallel == 1:
                    
                    #non-scrambled
                    if PERM == 0:
                        
                        #run ISC for each voxel using joblib
                        tmp = Parallel(n_jobs=numJobs)(delayed(voxWrapper)(vShift3D,listener,VOX) for VOX in range(voxelN))
                        
                        #reformat
                        for VOX in range(voxelN):
                            b[:,VOX] = tmp[VOX][0]
                            Rsq[VOX] = tmp[VOX][1]
                            F[VOX] = tmp[VOX][2]
                            pF[VOX] = tmp[VOX][3]
                        
                    #scrambled
                    pP_Rsq_tmp = Parallel(n_jobs=numJobs)(delayed(voxWrapperPerm)(vShift3D_scr,listener,VOX) for VOX in range(voxelN))
                    Rperm[PERM,:] = pP_Rsq_tmp
                    
                    #get p-value from permutated Rsq and F values
                    if PERM == fitPermuts-1: #if it's the last permutation...
                        pP_Rsq = Parallel(n_jobs=numJobs)(delayed(len(Rperm[:,VOX][Rperm[:,VOX] > Rsq[VOX]]) / float(fitPermuts))(Rperm,fitPermuts,VOX) for VOX in range(voxelN))
                    
                else:
                    
                    #for each voxel...
                    for VOX in list(range(0,voxelN)):

                        #if it's the first permutation... fit the voxelwise models to the non-scrambled data
                        if PERM == 0:

                            #get voxel-specific X-matrix from the non-scrambled data in 2D
                            X = np.reshape(vShift3D[:,VOX,:],(vShift3D.shape[0],vShift3D.shape[2]))

                            #solve linear model vShift*b=y where y is the vector from listener's data
                            model = reg_m(listener[:,VOX].T, X.T)
                            b[:,VOX] = model.params[list(range(1,len(model.params)))]
                            Rsq[VOX] = model.rsquared
                            F[VOX] = model.fvalue
                            pF[VOX] = model.f_pvalue

                        #get voxel-specific X-matrix from the scrambled data in 2D
                        X = np.reshape(vShift3D_scr[:,VOX,:],(vShift3D_scr.shape[0],vShift3D_scr.shape[2]))

                        #solve linear model vShift*b=y where y is the vector from listener's data
                        model = reg_m(listener[:,VOX].T, X.T)
                        Rperm[PERM,VOX] = model.rsquared

                        #get permutation test p-values
                        if PERM == fitPermuts-1: #if it's the last permutation...
                            pP_Rsq[VOX] = len(Rperm[:,VOX][Rperm[:,VOX] > Rsq[VOX]]) / float(fitPermuts)
                        
        else: 
            
            #make time-lagged X-matrices without scrambling the speaker timeseries
            vShift3D = make3dVshift(speaker,maxT)
            
            #if using joblib for voxelwise computations...
            if voxParallel == 1:
                
                #run ISC for each voxel using joblib
                tmp = Parallel(n_jobs=numJobs)(delayed(voxWrapper)(vShift3D,listener,VOX) for VOX in range(voxelN))
                
                #reformat
                for VOX in range(voxelN):
                    b[:,VOX] = tmp[VOX][0]
                    Rsq[VOX] = tmp[VOX][1]
                    F[VOX] = tmp[VOX][2]
                    pF[VOX] = tmp[VOX][3]
                    pP_Rsq[VOX] = None
                
            else: 
                
                #for each voxel...
                for VOX in list(range(0,voxelN)):

                    #get voxel-specific X-matrix from the non-scrambled data in 2D
                    X = np.reshape(vShift3D[:,VOX,:],(vShift3D.shape[0],vShift3D.shape[2]))

                    #solve linear model vShift*b=y where y is the vector from listener's data
                    model = reg_m(listener[:,VOX].T, X.T)
                    b[:,VOX] = model.params[list(range(1,len(model.params)))]
                    Rsq[VOX] = model.rsquared
                    F[VOX] = model.fvalue
                    pF[VOX] = model.f_pvalue
                    pP_Rsq[VOX] = None

    return b, Rsq, F, pF, pP_Rsq

In [7]:
#make pairMap data frame as a reference for pair information

#set column names
pairMap_header = ['dbicNum', #DBIC subject number
                  'dbicID', #DBIC subject ID (e.g., sid000007)
                  'cbsNum', #CBS subject number
                  'cbsID', #CBS subject ID
                  'pairType', #1=real, 0=pseudo
                  'condition', #1=joint, 0=independent
                  'dbicSpeaker', #1=DBIC speaker, CBS listener, 0=CBS speaker, DBIC listener
                  'sFile', #speaker file name
                  'lFile', #listener file name
                  'duration'] #ISC duration [min]

#number of rows to preallocate for real fit arrays
numRows = (realPairN + pseudoPairN) * 4; 

#preallocate pair map data frame
pairMap = pd.DataFrame(columns = pairMap_header, index=range(numRows))

#set order in which to load tasks for DBIC and CBS subs within each pair
dTask = ['listener','speaker','listener','speaker']
cTask = ['speaker','listener','speaker','listener']

#initialize pairMap row counter
ROW = 0

#for each DBIC subject...
for dbicPairN in dbicSubs: 
    
    #get DBIC sub ID and run info
    dbicSub = pairsAndRuns['DBIC'][dbicPairN] #e.g., 'sid000007'
    dbicRunInd = pairsAndRuns['ind_run'][dbicPairN] #current subject's independent run #s
    dbicRunJoint = pairsAndRuns['joint_run'][dbicPairN] #current subject's joint run #s
    dRun = [dbicRunInd, dbicRunInd, dbicRunJoint, dbicRunJoint] #make an array of independent and joint run #s to simplify loading below
     
    #get dbic file names
    dbicFiles = [[]]*4
    for FILE in range(4):
        if local == 1: #if loading loading local files...
            dbicFiles[FILE] = '/Users/jeffreyknotts/Documents/MATLAB/Wheatley/isc_test/sub-' + dbicSub + \
            '_ses-pair0' + str(dbicPairN + 1) + '_task-storytelling' + str(dRun[FILE]) + '_run-0' + \
            str(dRun[FILE]) + '_bold_space-MNI152NLin2009cAsym_preproc_nuisRegr_' + dTask[FILE] + '.mat'  #[TRs x voxels]
        else: #if loading loading files from drzeuss...
            dbicFiles[FILE] = '/flash/wheatley/adamb/hyperscanning_DBIC_ses2/sub-' + dbicSub + \
            '_fmriprep/fmriprep/sub-' + dbicSub + '/ses-pair0' + str(dbicPairN + 1) + '/func/' + \
            'sub-' + dbicSub + '_ses-pair0' + str(dbicPairN + 1) + '_task-storytelling' + str(dRun[FILE]) + '_run-0' + \
            str(dRun[FILE]) + '_bold_space-MNI152NLin2009cAsym_preproc_nuisRegr_' + dTask[FILE] + '.mat'  #[TRs x voxels]
    
    #for each CBS subject...
    for cbsPairN in cbsSubs: 
        
        #get CBS sub ID and run info
        cbsSub = pairsAndRuns['CBS'][cbsPairN] #e.g., 'hid000002'
        cbsRunInd = pairsAndRuns['ind_run'][cbsPairN]
        cbsRunJoint = pairsAndRuns['joint_run'][cbsPairN]
        cRun = [cbsRunInd, cbsRunInd, cbsRunJoint, cbsRunJoint] #make an array of independent and joint run #s to simplify loading below
        
        for FILE in range(4):
            
            #get dbic file names
            cbsFiles = [[]]*4
            if local == 1: #if loading loading files from drzeuss...
                cbsFiles[FILE] = '/Users/jeffreyknotts/Documents/MATLAB/Wheatley/isc_test/sub-' + cbsSub + \
                '_ses-pair0' + str(cbsPairN + 1) + '_task-storytelling' + str(cRun[FILE]) + '_run-0' + \
                str(cRun[FILE]) + '_bold_space-MNI152NLin2009cAsym_preproc_nuisRegr_' + cTask[FILE] + '.mat'  #[TRs x voxels]
            else: 
                cbsFiles[FILE] = '/flash/wheatley/adamb/hyperscanning_CBS/sub-' + cbsSub + \
                '_fmriprep/fmriprep/sub-' + cbsSub + '/ses-pair0' + str(cbsPairN + 1) + '/func/' + \
                'sub-' + cbsSub + '_ses-pair0' + str(cbsPairN + 1) + '_task-storytelling' + str(cRun[FILE]) + '_run-0' + \
                str(cRun[FILE]) + '_bold_space-MNI152NLin2009cAsym_preproc_nuisRegr_' + cTask[FILE] + '.mat'  #[TRs x voxels]
            
            ##############################
            ### index stuff to pairMap ###
            ##############################
            
            #subject numbers and IDs
            pairMap['dbicNum'][ROW] = dbicPairN + 1
            pairMap['dbicID'][ROW] = dbicSub
            pairMap['cbsNum'][ROW] = cbsPairN + 1
            pairMap['cbsID'][ROW] = cbsSub
            
            #pair type
            if pairMap['dbicNum'][ROW] == pairMap['cbsNum'][ROW]:
                pairMap['pairType'][ROW] = 1 #real pair
            else:
                pairMap['pairType'][ROW] = 0 #pseudo pair
            
            #speaker/listener
            if ROW % 2: #if it's an odd row - DBIC speaker, CBS listener
                pairMap['dbicSpeaker'][ROW] = 1
                pairMap['sFile'][ROW] = dbicFiles[FILE] #speaker file name
                pairMap['lFile'][ROW] = cbsFiles[FILE] #listener file name
            else: #if it's an even row - CBS speaker, DBIC listener
                pairMap['dbicSpeaker'][ROW] = 0
                pairMap['sFile'][ROW] = cbsFiles[FILE] #speaker file name
                pairMap['lFile'][ROW] = dbicFiles[FILE] #listener file name
            
            #independent/joint condition
            if FILE < 2:
                pairMap['condition'][ROW] = 0 #independent
            else: 
                pairMap['condition'][ROW] = 1 #joint
            
            ROW += 1
    
pairMap

Unnamed: 0,dbicNum,dbicID,cbsNum,cbsID,pairType,condition,dbicSpeaker,sFile,lFile,duration
0,2,sid000007,2,hid000002,1,0,0,/flash/wheatley/adamb/hyperscanning_CBS/sub-hi...,/flash/wheatley/adamb/hyperscanning_DBIC_ses2/...,
1,2,sid000007,2,hid000002,1,0,1,/flash/wheatley/adamb/hyperscanning_DBIC_ses2/...,/flash/wheatley/adamb/hyperscanning_CBS/sub-hi...,
2,2,sid000007,2,hid000002,1,1,0,/flash/wheatley/adamb/hyperscanning_CBS/sub-hi...,/flash/wheatley/adamb/hyperscanning_DBIC_ses2/...,
3,2,sid000007,2,hid000002,1,1,1,/flash/wheatley/adamb/hyperscanning_DBIC_ses2/...,/flash/wheatley/adamb/hyperscanning_CBS/sub-hi...,
4,2,sid000007,3,hid000003,0,0,0,/flash/wheatley/adamb/hyperscanning_CBS/sub-hi...,/flash/wheatley/adamb/hyperscanning_DBIC_ses2/...,
5,2,sid000007,3,hid000003,0,0,1,/flash/wheatley/adamb/hyperscanning_DBIC_ses2/...,/flash/wheatley/adamb/hyperscanning_CBS/sub-hi...,
6,2,sid000007,3,hid000003,0,1,0,/flash/wheatley/adamb/hyperscanning_CBS/sub-hi...,/flash/wheatley/adamb/hyperscanning_DBIC_ses2/...,
7,2,sid000007,3,hid000003,0,1,1,/flash/wheatley/adamb/hyperscanning_DBIC_ses2/...,/flash/wheatley/adamb/hyperscanning_CBS/sub-hi...,
8,3,sid000009,2,hid000002,0,0,0,/flash/wheatley/adamb/hyperscanning_CBS/sub-hi...,/flash/wheatley/adamb/hyperscanning_DBIC_ses2/...,
9,3,sid000009,2,hid000002,0,0,1,/flash/wheatley/adamb/hyperscanning_DBIC_ses2/...,/flash/wheatley/adamb/hyperscanning_CBS/sub-hi...,


In [8]:
###########
### ISC ###
###########

'''
Note: removed print updates around couplingISC because joblib renders them kind of meaningless.
But using the verbose option in joblib provides helpful progress outputs.
'''

def ISCwrapper(sFile, lFile, winArray, voxelCoords, pairMap, ROW):
            
    #get time of start of current ISC
    iscStart = time.time() #get starting time
    
    #get time variables
    winTRs = winArray[0] #window size [TRs]
    stepSize = winArray[1] #step size [TRs]
    numSteps = winArray[2] #number of steps to cover the time series given winTRs and stepSize
    
    #preallocate results matrices
    b = np.empty([2*maxT+1, len(voxelCoords), numSteps])
    Rsq = np.empty([numSteps, len(voxelCoords)]);
    F = np.empty([numSteps, len(voxelCoords)]);
    pF = np.empty([numSteps, len(voxelCoords)]);
    pP_Rsq = np.empty([numSteps, len(voxelCoords)]);
    
    #preallocate step duration array
    stepDur = np.empty([numSteps]);
    stepDur[:] = np.NaN

    #load speaker timeseries
    if os.path.isfile(sFile): #if there is a file to load...
        dummyFile = sio.loadmat(sFile) #load file
        if pairMap['dbicSpeaker'][ROW] == 1:
            speaker = dummyFile['dbicSpeaker'][:,voxelCoords] #get timeseries data 
        else: 
            speaker = dummyFile['cbsSpeaker'][:,voxelCoords] #get timeseries data    
        del dummyFile
    else: 
        print(ROW + ' shit')

    #load listener timeseries
    if os.path.isfile(lFile):
        dummy = sio.loadmat(lFile) #load data
        if pairMap['dbicSpeaker'][ROW] == 1:
            listener = dummy['cbsListener'][:,voxelCoords] #get time series data
        else: 
            listener = dummy['dbicListener'][:,voxelCoords] #get time series data
        del dummy

    #Loop over rolling windows...
    for STEP in range(numSteps): #for each window...
        
        #get TRs to use in current window
        TRs = list(range(stepSize * STEP,stepSize * STEP + winTRs)); #make sure to start at ZERO here

        #run couplingFMRI
        b[:,:,STEP], Rsq[STEP,:], F[STEP,:], pF[STEP,:], pP_Rsq[STEP,:]  = couplingFMRI(speaker[TRs,:], listener[TRs,:], maxT, fitPermuts, method)
        
    #get approximate time to run couplingFMRI for current pair [min]
    duration = round((time.time() - iscStart) / 60,2)
    
    #estimate time remaining and get it in string form
    rowsRemaining = pairMap.shape[0] - ROW #number of ISCs left to perform
    estTimeRemain = rowsRemaining * duration #estimated time remaining
    hrs = math.floor(estTimeRemain / 60)
    minFloat = estTimeRemain % 60
    mins = math.floor(minFloat)
    secs = round((minFloat - mins)*60)
    if hrs > 0:
        timeString = str(hrs) + ' hr ' + str(mins) + ' min ' + str(secs) + ' s' 
    else:
        timeString = str(mins) + ' min ' + str(secs) + ' s'

    #display another update
    print('\n#################################################')
    print('### ISC finished! Estimated time remaining: ' + timeString + ' ###')
    print('#################################################')
    
    return b, Rsq, F, pF, pP_Rsq, duration

#for each row in pairMap...
winArray = [winTRs,stepSize,numSteps]
modFits = Parallel(n_jobs=numJobs, verbose=50)(delayed(ISCwrapper)
                                              (pairMap['sFile'][ROW],
                                               pairMap['lFile'][ROW], 
                                               winArray, 
                                               voxelCoords, 
                                               pairMap, 
                                               ROW)
                                              for ROW in range(pairMap.shape[0]))

#add duration to pairMaps
for ROW in range(pairMap.shape[0]):
    pairMap['duration'][ROW] = modFits[ROW][5]

[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   1 tasks      | elapsed:   26.2s
[Parallel(n_jobs=8)]: Done   2 out of  16 | elapsed:   26.4s remaining:  3.1min
[Parallel(n_jobs=8)]: Done   3 out of  16 | elapsed:   26.5s remaining:  1.9min
[Parallel(n_jobs=8)]: Done   4 out of  16 | elapsed:   26.5s remaining:  1.3min
[Parallel(n_jobs=8)]: Done   5 out of  16 | elapsed:   27.0s remaining:   59.4s
[Parallel(n_jobs=8)]: Done   6 out of  16 | elapsed:   27.0s remaining:   45.0s
[Parallel(n_jobs=8)]: Done   7 out of  16 | elapsed:   27.1s remaining:   34.8s
[Parallel(n_jobs=8)]: Done   8 out of  16 | elapsed:   27.5s remaining:   27.5s
[Parallel(n_jobs=8)]: Done   9 out of  16 | elapsed:   48.0s remaining:   37.3s
[Parallel(n_jobs=8)]: Done  10 out of  16 | elapsed:   48.0s remaining:   28.8s
[Parallel(n_jobs=8)]: Done  11 out of  16 | elapsed:   48.2s remaining:   21.9s
[Parallel(n_jobs=8)]: Done  12 out of  16 | elapsed:   48.3s rem

In [10]:
pairMap

Unnamed: 0,dbicNum,dbicID,cbsNum,cbsID,pairType,condition,dbicSpeaker,sFile,lFile,duration
0,2,sid000007,2,hid000002,1,0,0,/flash/wheatley/adamb/hyperscanning_CBS/sub-hi...,/flash/wheatley/adamb/hyperscanning_DBIC_ses2/...,0.38
1,2,sid000007,2,hid000002,1,0,1,/flash/wheatley/adamb/hyperscanning_DBIC_ses2/...,/flash/wheatley/adamb/hyperscanning_CBS/sub-hi...,0.36
2,2,sid000007,2,hid000002,1,1,0,/flash/wheatley/adamb/hyperscanning_CBS/sub-hi...,/flash/wheatley/adamb/hyperscanning_DBIC_ses2/...,0.37
3,2,sid000007,2,hid000002,1,1,1,/flash/wheatley/adamb/hyperscanning_DBIC_ses2/...,/flash/wheatley/adamb/hyperscanning_CBS/sub-hi...,0.36
4,2,sid000007,3,hid000003,0,0,0,/flash/wheatley/adamb/hyperscanning_CBS/sub-hi...,/flash/wheatley/adamb/hyperscanning_DBIC_ses2/...,0.36
5,2,sid000007,3,hid000003,0,0,1,/flash/wheatley/adamb/hyperscanning_DBIC_ses2/...,/flash/wheatley/adamb/hyperscanning_CBS/sub-hi...,0.37
6,2,sid000007,3,hid000003,0,1,0,/flash/wheatley/adamb/hyperscanning_CBS/sub-hi...,/flash/wheatley/adamb/hyperscanning_DBIC_ses2/...,0.37
7,2,sid000007,3,hid000003,0,1,1,/flash/wheatley/adamb/hyperscanning_DBIC_ses2/...,/flash/wheatley/adamb/hyperscanning_CBS/sub-hi...,0.36
8,3,sid000009,2,hid000002,0,0,0,/flash/wheatley/adamb/hyperscanning_CBS/sub-hi...,/flash/wheatley/adamb/hyperscanning_DBIC_ses2/...,0.36
9,3,sid000009,2,hid000002,0,0,1,/flash/wheatley/adamb/hyperscanning_DBIC_ses2/...,/flash/wheatley/adamb/hyperscanning_CBS/sub-hi...,0.36


In [9]:
#get total duration
endTime = time.time()
totalDur = (endTime - startTime) / 3600 #total duration [hrs]
totalDur

0.014294000731574165