In [None]:
#imports
%matplotlib inline

import os
import sys
sys.path.append('/Users/jeffreyknotts/Documents/Wheatley_Lab/')
import numpy as np
import pandas as pd
import time
from datetime import datetime
import math
import scipy.io as sio
import statsmodels.api as sm
import matplotlib.pyplot as mplot
from numpy.random import random_sample
from math import pi
from math import sqrt
from joblib import Parallel, delayed

In [None]:
#input parameters
dbicSubs = list(range(1,3)) #using 1 and 9 as inputs gives 1:8, which will function as 2:9 according to matlab indexing
cbsSubs = list(range(1,3))
winTRs = 41 #window size [TRs]
stepSize = 41 #step size [TRs] 
maxT = 6 #max TR lag to include in linear models
voxelCoords = list(range(0,1000)) #whole brain = 1:69880 -- should add step above this that gets ROI voxel coords
fitPermuts = 1000 #0 = don't use permutation test for pair fits
groupPermuts = 1000
method = 1 #0=old scrambling method, 1=new method of scrambling all voxels at once
local = 0 #for JD debugging locally -- will eventually remove
parallel = 1 #use joblib on pairwise for loop
numJobs = 4 #number of cores for joblib to use
totalTRs = 615 #kind of hacky but predefining the total number of TRs that will be in each timeseries
voxParallel = 0 #use joblib on voxelwise for loop

In [None]:
#load pair and run data
pairsAndRuns = pd.read_csv(r'/afs/.dbic.dartmouth.edu/usr/wheatley/jd/hyperscanning_pair_and_run_lookup.csv')
print(pairsAndRuns)

In [None]:
#mark starting time 
startTime = time.time()

#get numbers of pairs
realPairN = len(list(set(dbicSubs) & set(cbsSubs)))
pseudoPairN = len(dbicSubs)*len(cbsSubs) - realPairN
totalPairN = realPairN + pseudoPairN

#define some rolling window parameters based on inputs above
totTRs = 615.0; #total # of TRs in each time series -- needs to be floating point for the steps below to work properly
numSteps = int(math.ceil((totTRs - winTRs + 1) / stepSize)) #total number of steps based on winTR and stepSize
lastTr = winTRs + (numSteps - 1) * stepSize #last TR this approach will analyze
TRsLeftOut = totTRs - lastTr #number of TRs that will be left out by the current approach
if TRsLeftOut > 0: #if any TRs will end up getting left out based on window and step sizes...
    print('The last ' + str(TRsLeftOut) + ' TRs will be left out due to the window and step size!')
    
#preallocate fit arrays
#arrays are structured as follows (using realFits as an example):
#realFits[0] = independent condition
#realFits[1] = joint condition
#realFits[0][0] = beta coefficients [pairs x betas x voxels x windows]
#realFits[0][1] = R^2 [pairs x voxels x windows]
#realFits[0][2] = F [pairs x voxels x windows]
#realFits[0][3] = pF (p-value for F-test) [pairs x voxels x windows]
#realFits[0][4] = pP_Rsq (p-value for R^2 from permutation test) [pairs x voxels x windows]
rRows = realPairN * 2; #number of rows to preallocate for real fit arrays
pRows = pseudoPairN * 2; #number of rows to preallocate for pseudo fit arrays
realFits = [[]]*2 #begin preallocation
pseudoFits = [[]]*2 #begin preallocation
for COND in list(range(0,len(realFits))): #for each condition (independent and joint)...
    realFits[COND] = [[]]*5
    pseudoFits[COND] = [[]]*5
    for MEAS in list(range(0,5)): #for each model fit measure...
        if MEAS == 0:
            realFits[COND][MEAS] = np.empty([rRows,2*maxT+1, len(voxelCoords), numSteps])
            pseudoFits[COND][MEAS] = np.empty([pRows,2*maxT+1, len(voxelCoords), numSteps])
        else:
            realFits[COND][MEAS] = np.empty([rRows,len(voxelCoords), numSteps])
            pseudoFits[COND][MEAS] = np.empty([pRows,len(voxelCoords), numSteps])
            
#preallocate map of which pairs correspond to which rows for the fit data
pairMap_header = ['dbic','cbs','cbsSpeaker']
pairMap = [[]]*2
pairMap[0] = np.empty([pRows,len(pairMap_header)]) #pair mappings for realFits
pairMap[1] = np.empty([rRows,len(pairMap_header)]) #pair mappings for pseudoFits

#create empty dummy array for voxelwise joblib functions
modArray = realFits[0]

#preallocate timing log
timeLog_header = ['dbic','cbs','duration']
timeLog = np.empty([totalPairN,len(timeLog_header)])

In [None]:
def naiveColumnCorr(a, b): 
    """
    Naive, slow, "baseline" function correlating corresponding columns of two matrices.
    Uses a for loop across columns.
    """
    c = np.zeros((a.shape[1]))
    for i in range(a.shape[1]):
        c[i] = np.corrcoef(a[:, i], b[:, i])[0, 1]

    return c


def fastColumnCorr(a, b):
    """
    Fast function for correlating corresponding columns of two matrices.
    Uses numpy.einsum to avoid loops and do computations directly on matrices.
    About ~ 10 times faster than the naive approach in 'naiveColumnCorr'.
    Inputs are 2D numpy arrays with the same shape, both sized samples X vars.
    NOTES:
    Could be further optimized using numpy.einsum_path for contraction order before first use,
    then simply calling einsum with that order subsequently. However, it only seems to give a
    few percents at best.
    contr_order = np.einsum_path("ij,ij->j", aa, bb, optimize='optimal')
    cov = np.einsum("ij,ij->j", aa, bb, optimize=contr_order[1])
    """
    # extract the means from each var, in both matrices
    aa = a - (np.sum(a, 0) / a.shape[0]) # compute a - mean(a)
    bb = b - (np.sum(b, 0) / b.shape[0]) # compute b - mean(b)

    # multiply and sum across rows, that is, get dot products of column pairs
    cov = np.einsum("ij,ij->j", aa, bb)

    # for normalization we need the variances, separately for each var
    var_a = np.sum(aa ** 2, 0)
    var_b = np.sum(bb ** 2, 0)

    return cov / np.sqrt(var_a*var_b)



def phase_scrambling(data_matrix, fft_axis=0):
    """
    FOR REAL DATA ONLY, NOT COMPLEX!
    Phase-scrambling function for matrices. Preserves the original covariance structure.
    After FFT, we add a random phase vector to the FFT components of all time series / vars and do inverse FFT,
    as described in Prichard and Theiler (1994, Generating surrogate data for time series with several
    simultaneously measured variables. Physical review letters, 73(7), 951).
    The returned phase-scrambled data has the same power spectrum as the original but is linearly independent
    (zero expected correlation). Covariance structure is preserved, meaning that linear dependencies
    across time series is the same in the phase-scrambled data as in the original.
    Inputs:
    data_matrix:        2D numpy array of reals. Time series (Vars) X samples by default,
                        set fft_axis if samples X time series.
    fft_axis:           Axis along which FFT / iFFT is calculated. Defaults to 0,
                        meaning that FFT is calculated across rows (= each column is a separate time series / var)
    Outputs:
    data_scrambled:     2D numpy array of reals, contains the phase-scrambled data.
                        Same size and dimensions as input "data_matrix".
                        Returns 0 if input checks failed.
    TODO:
    - look into implementation with FFTW, which is supposedly faster with repetitive usage (our use case)
    """

    # input checks
    if not isinstance(data_matrix, np.ndarray) or len(data_matrix.shape) != 2 or np.iscomplex(data_matrix).any():
        print('Input arg "data_matrix" should be a 2D numpy array of reals!')
        return 0
    if fft_axis not in [0, 1]:
        print('Input arg "fft_axis" should be 0 or 1!')
        return 0

    # if fft_axis != 0, transpose the data
    transposeFlag = False
    if fft_axis == 1:
        data_matrix = np.transpose(data_matrix)
        transposeFlag = True

    # do forward FFT, use version for reals, treat data as if vars were in columns
    data_fft = np.fft.rfft(data_matrix, axis=0)

    # convert to polar coordinates (amplitude/magnitude + phase)
    data_fft_amp = np.abs(data_fft)
    data_fft_angle = np.angle(data_fft)

    # get random phase vector  (values between 0 - 2pi) for all FFT components that are not real by definition
    rng = default_rng()  # new recommended method for random values
    if data_matrix.shape[0] % 2 == 0:  # if even, first and last components are real
        rand_phases = np.hstack(([0] ,(rng.random((data_fft.shape[0]-2)) * 2 * pi), [0]))
    else:  # otherwise only the first component is real
        rand_phases = np.hstack(([0], (rng.random((data_fft.shape[0] - 1)) * 2 * pi)))

    # add random phases to the angles of FFT components of all time series / vars,
    # addition is with broadcasting (newaxis is needed for broadcasting)
    data_fft_angle_rand = data_fft_angle + rand_phases[:, np.newaxis]

    # transform back from polar to cartesian, using the randomized phases but the original magnitude / amplitude values
    data_scrambled_fft = data_fft_amp * np.exp(1j * data_fft_angle_rand)  # returns complex FFT coefficients

    # do inverse FFT
    data_scrambled = np.fft.irfft(data_scrambled_fft, n=data_matrix.shape[0], axis=0)

    # transpose if necessary
    if transposeFlag:
        data_scrambled = np.transpose(data_scrambled)

    return data_scrambled


def phase_scrambling_tests(data_matrix, data_scrambled, fft_axis=0, epsilon=1e-10):
    """
    Tests for the phase_scrambling function:
    (1) compare original FFT amplitudes to scrambled data FFT amplitudes
    (2) compare original covariance matrix to scrambled data covariance matrix
    (3) check if correlations between original and corresponding scrambled time series are around 0
    In the third test, we expect the correlation coefficients to show a normal
    distribution around 0, with a "small" std. To keep things simple, we do not fit
    a normal distribution or try a formal statistical test, but plot the histogram
    of the values and decide the test on the basis of the mean and median values
    (we check if they are "close" to zero, meaning < 0.05).
    IMPORTANT: For the second check we calculate the covariance matrices, so for really large data
    (e.g. tens of thousands of variables) consider the memory requirements of that step
    (~ 1.6 GB for 10^3 variables, considering we need two matrices). The function does not have
    internal checks for that.
    Inputs:
    data_matrix:        2D numpy array of reals. Original data set before phase scrambling.
                        Time series (Vars) X samples by default, set fft_axis if samples X time series.
    data_scrambled:     2D numpy array of reals, phase scrambled version of "data_matrix".
                        Same size and shape as "data_matrix".
    fft_axis:           Axis along which FFT / iFFT is calculated. Defaults to 0,
                        meaning that FFT is calculated across rows (= each column is a separate time series / var).
    epsilon:            Numeric value, threshold for machine accuracy. Tests 1 and 2 are considered "passed"
                        (that is, output "test_results" values set to True), if numeric inaccuracies
                        are below the threshold "epsilon". Defaults to 1e-10.
    Output:
    test_results:       List of booleans, 3-element long. Each boolean value corresponds
                        to pass (True) / fail (False) on a test.
                        The three values correspond to the (1) FFT amplitude test, (2) covariance matrix test,
                        and (3) correlations test.
                        Returns 0 if input checks failed.
    """

    # input checks
    if not isinstance(data_matrix, np.ndarray) or len(data_matrix.shape) != 2 or np.iscomplex(data_matrix).any():
        print('Input arg "data_matrix" should be a 2D numpy array of reals!')
        return 0
    if type(data_matrix) != type(data_scrambled) or data_matrix.shape != data_scrambled.shape or np.iscomplex(data_scrambled).any():
        print('Input arg "data_scrambled" should be a 2D numpy array of reals, with the same shape as "data_matrix"!')
        return 0
    if fft_axis not in [0, 1]:
        print('Input arg "fft_axis" should be 0 or 1!')
        return 0

    # if fft_axis != 0, transpose the data
    if fft_axis == 1:
        data_matrix = np.transpose(data_matrix)
        data_scrambled = np.transpose(data_scrambled)

    # init output list
    test_results = [False, False, False]

    # Check FFT component amplitudes / magnitudes. They should be the same, with differences only due to numeric inaccuracies
    # do forward FFT, use version for reals, treat data as if vars were in columns
    data_fft = np.fft.rfft(data_matrix, axis=0)
    data_scrambled_fft = np.fft.rfft(data_scrambled, axis=0)
    # compare magnitudes
    amp_diffs = np.abs(data_fft)-np.abs(data_scrambled_fft)
    print('Maximum difference between FFT component magnitudes: {:.3e}'.format(amp_diffs.max()))
    # set relevant output to True if passed the test
    if not (amp_diffs>epsilon).any():
        test_results[0] = True
        print('First test passed, original and scrambled data have matching FFT component amplitudes.')
    else:
        print('First test failed, found substantial difference\n' +
              'between original and scrambled data FFT component magnitudes.')

    # Check the covariance matrices
    data_cov = np.cov(data_matrix, rowvar=False)
    data_scrambled_cov = np.cov(data_scrambled, rowvar=False)
    maxDiff = (data_cov-data_scrambled_cov).flatten().max()  # maximum difference
    print('Maximum difference between covariance matrices: {:.3e}'.format(maxDiff))
    # set relevant output to True if passed the test
    if maxDiff <= epsilon:
        test_results[1] = True
        print('Second test passed, original and scrambled data have matching covariance structures.')
    else:
        print('Second test failed, found substantial difference\n' +
              'between original and scrambled data covariance structures.')

    # Check the correlations across original and scrambled vars
    ccoeffs = fastColumnCorr(data_matrix, data_scrambled)
    # if the mean and median are close to zero (<0.05) we consider that a success
    tmp = mplot.hist(ccoeffs, bins=data_matrix.shape[1]//40)
    if np.mean(ccoeffs) < 0.05 and np.median(ccoeffs) < 0.05:
        test_results[2] = True
        print('Third test passed, correlation coefficients group around 0.\n' +
              ' Look at the histogram for further details.')
    else:
        print('Third test failed, correlation coefficients seem to be biased.\n' +
              'Look at the histogram for further details.')

    return test_results

In [None]:
#ISC functions

#OLS - COMPARE WITH sklearn.linear_model.LinearRegression **********************************
def reg_m(y, x):
    x = np.array(x).T
    x = sm.add_constant(x)
    results = sm.OLS(endog=y, exog=x).fit()
    return results

#make 2d X-matrix with variably lagged timeseries [TRs x 2*maxT+1]
def make2dVshift(data,maxT):
    
    """
    make2dVshift -- make 2d X-matrix with variably lagged timeseries [TRs x 2*maxT+1]
    INPUTS:
    data = a [TRs x 1] timeseries vector
    maxT = absolute value of maximum lag to include in the model [TRs]
    OUTPUTS:
    vshift = the X-matrix for voxelwise linear models using variably lagged timeseries [TRs x 2*maxT+1]
    """
    vShift = np.empty([len(data), 2*maxT+1]) #preallocate [TRs x shifts]
    for i in list(range(-maxT,maxT+1)):
        if i < 0:
            vShift[:,i+maxT] = np.hstack((data[list(range(-i,data.shape[0]))],np.zeros(-i)))
        elif i == 0:
            vShift[:,i+maxT] = data
        else:
            vShift[:,i+maxT] = np.hstack((np.zeros(i), data[list(range(0,data.shape[0]-i))]))
    
    return vShift

#make 3d X-matrix with variably lagged timeseries [TRs x voxels x 2*maxT+1]
def make3dVshift(data,maxT):
    
    """
    make3dVshift -- make 3d X-matrix with variably lagged timeseries
    INPUTS:
    data = a [TRs x voxels] timeseries matrix
    maxT = absolute value of maximum lag to include in the model [TRs]
    OUTPUTS:
    vshift = horizontally stacked X-matrices for voxelwise linear models using variably lagged timeseries [TRs x voxels x 2*maxT+1]
    """
    #initialize real time-lagged dataset [TRs x voxels x 2*maxT+1]
    vShift = np.empty([data.shape[0],data.shape[1],2*maxT+1])

    #if it's the first iteration, then get the real time lagged dataset
    for i in list(range(-maxT,maxT+1)): #for each lag

        if i < 0: #if no lag
            vShift[:,:,i+maxT] = np.vstack((data[list(range(-i,data.shape[0])),:], np.zeros((-i,data.shape[1]))))
        elif i == 0:
            vShift[:,:,i+maxT] = data
        else:
            vShift[:,:,i+maxT] = np.vstack((np.zeros((i,data.shape[1])), data[list(range(0,data.shape[0]-i)),:]))
    
    return vShift

def voxWrapper(vShift3D,listener,VOX):
    """
    INPUTS
    vShift3D: ___
    listener: listener time series
    VOX: voxel index

    OUTPUTS
    b: beta
    Rsq: R^2
    F: F-test
    pF: F-test p-value 
    """
    #get voxel-specific X-matrix from the non-scrambled data in 2D
    X = np.reshape(vShift3D[:,VOX,:],(vShift3D.shape[0],vShift3D.shape[2]))

    #solve linear model vShift*b=y where y is the vector from listener's data
    model = reg_m(listener[:,VOX].T, X.T)
    
    b = model.params[list(range(1,len(model.params)))]
    Rsq = model.rsquared
    F = model.fvalue
    pF = model.f_pvalue

    return b, Rsq, F, pF

def voxWrapperPerm(vShift3D_scr,listener,Rperm,VOX):
    """
    INPUTS
    vShift3D_scr: scrambled ___
    listener: listener time series
    Rperm: preallocated array for permutation test R^2 values [permutations x voxels]
    VOX: voxel index

    OUTPUT
    pP_Rsq: p-value from [1 x voxels]
    """
    #get voxel-specific X-matrix from the scrambled data in 2D
    X = np.reshape(vShift3D_scr[:,VOX,:],(vShift3D_scr.shape[0],vShift3D_scr.shape[2]))

    #solve linear model vShift*b=y where y is the vector from listener's data
    model = reg_m(listener[:,VOX].T, X.T)

    return model.rsquared
    
#ISC computation
def couplingFMRI(speaker, listener, maxT, fitPermuts, method):

    #no. of voxels and TRs
    voxelN = speaker.shape[1]
    trN = speaker.shape[0]

    #preallocate results matrices
    b = np.empty([2*maxT+1, voxelN])
    Rsq = np.empty([voxelN]);
    F = np.empty([voxelN]);
    pF = np.empty([voxelN]);
    pP_F = np.empty([voxelN]);

    #print('Data dimensions: ' + str(trN) + ' X ' + str(voxelN));
    print('Modeling listener data using lagged time series from speaker...');

    if method == 0: #original method

        #For each voxel...
        for VOX in list(range(0,voxelN)):

            #get X matrix of time-shifted speaker timeseries
            vShift = make2dVshift(speaker[:,VOX],maxT)

            #solve linear model vShift*b=y where y is the vector from listener's data
            model = reg_m(listener[:,VOX].T, vShift.T)
            b[:,VOX] = model.params[list(range(1,len(model.params)))]
            Rsq[VOX] = model.rsquared
            F[VOX] = model.fvalue
            pF[VOX] = model.f_pvalue

            #occassional feedback
            if ((VOX + 1) % 10000 == 0) | (voxelN < 10000 & VOX == voxelN-1):
                print('\nFinished with voxel no. ' + str(VOX + 1))

            #run permutation test if selected
            if fitPermuts > 0:

                #preallocate
                pP_Rsq = np.empty([voxelN]);
                
                for PERM in list(range(0,fitPermuts)):
                    
                    #make time-lagged X-matrices without scrambling the speaker timeseries
                    vShift2D = make2dVshift(speaker[:,VOX],maxT)
                    
                    #scramble the time series
                    X = phase_scrambling(vShift2D)
                    X = np.vstack((vShift_scr,vShift_scr[0,])) #JD HACK because row nums not preserved

                    #solve linear model vShift*b=y where y is the vector from listener's data
                    model = reg_m(listener[:,VOX].T, X.T)
                    Rperm[PERM,VOX] = model.rsquared
                    
                    #get permutation test R^2 p-value
                    if PERM == firPermuts - 1: #if we're on the last permutation...
                        pP_Rsq[VOX] = len(Rperm[:,VOX][Rperm[:,VOX] > Rsq[VOX]]) / float(fitPermuts)

            else:
                if VOX == 0: #only need to do this once
                    pP_Rsq = None

    else: #new method

        if fitPermuts > 0:
            
            #preallocate arrays for Rsq and F-statistics from permutation test
            pP_Rsq = np.empty([voxelN]);
            Rperm = np.empty([fitPermuts,voxelN])

            for PERM in list(range(0,fitPermuts)):

                #if it's the first permutation, set up the non-scrambled time series
                if PERM == 0:

                    #make time-lagged X-matrices without scrambling the speaker timeseries
                    vShift3D = make3dVshift(speaker,maxT)

                #scramble the time series
                speaker_scr = phase_scrambling(speaker)
                

                #make time-lagged X-matrices
                vShift3D_scr = make3dVshift(speaker_scr,maxT)

                #if using joblib for voxelwise computations...
                if voxParallel == 1:
                    
                    #non-scrambled
                    if PERM == 0:
                        
                        #run ISC for each voxel using joblib
                        tmp = Parallel(n_jobs=numJobs)(delayed(voxWrapper)(vShift3D,listener,VOX) for VOX in range(voxelN))
                        
                        #reformat
                        for VOX in range(voxelN):
                            b[:,VOX] = tmp[VOX][0]
                            Rsq[VOX] = tmp[VOX][1]
                            F[VOX] = tmp[VOX][2]
                            pF[VOX] = tmp[VOX][3]
                        
                    #scrambled
                    pP_Rsq_tmp = Parallel(n_jobs=numJobs)(delayed(voxWrapperPerm)(vShift3D_scr,listener,VOX) for VOX in range(voxelN))
                    Rperm[PERM,:] = pP_Rsq_tmp
                    
                    #get p-value from permutated Rsq and F values
                    if PERM == fitPermuts-1: #if it's the last permutation...
                        pP_Rsq = Parallel(n_jobs=numJobs)(delayed(len(Rperm[:,VOX][Rperm[:,VOX] > Rsq[VOX]]) / float(fitPermuts))(Rperm,fitPermuts,VOX) for VOX in range(voxelN))
                    
                else:
                    
                    #for each voxel...
                    for VOX in list(range(0,voxelN)):

                        #if it's the first permutation... fit the voxelwise models to the non-scrambled data
                        if PERM == 0:

                            #get voxel-specific X-matrix from the non-scrambled data in 2D
                            X = np.reshape(vShift3D[:,VOX,:],(vShift3D.shape[0],vShift3D.shape[2]))

                            #solve linear model vShift*b=y where y is the vector from listener's data
                            model = reg_m(listener[:,VOX].T, X.T)
                            b[:,VOX] = model.params[list(range(1,len(model.params)))]
                            Rsq[VOX] = model.rsquared
                            F[VOX] = model.fvalue
                            pF[VOX] = model.f_pvalue

                        #get voxel-specific X-matrix from the scrambled data in 2D
                        X = np.reshape(vShift3D_scr[:,VOX,:],(vShift3D_scr.shape[0],vShift3D_scr.shape[2]))

                        #solve linear model vShift*b=y where y is the vector from listener's data
                        model = reg_m(listener[:,VOX].T, X.T)
                        Rperm[PERM,VOX] = model.rsquared

                        #get p-value from permutated Rsq and F values
                        if PERM == fitPermuts-1: #if it's the last permutation...
                            pP_Rsq[VOX] = len(Rperm[:,VOX][Rperm[:,VOX] > Rsq[VOX]]) / float(fitPermuts)
                        
        else: 
            
            #make time-lagged X-matrices without scrambling the speaker timeseries
            vShift3D = make3dVshift(speaker,maxT)
            
            #if using joblib for voxelwise computations...
            if voxParallel == 1:
                
                #run ISC for each voxel using joblib
                tmp = Parallel(n_jobs=numJobs)(delayed(voxWrapper)(vShift3D,listener,VOX) for VOX in range(voxelN))
                
                #reformat
                for VOX in range(voxelN):
                    b[:,VOX] = tmp[VOX][0]
                    Rsq[VOX] = tmp[VOX][1]
                    F[VOX] = tmp[VOX][2]
                    pF[VOX] = tmp[VOX][3]
                
            else: 
                
                #for each voxel...
                for VOX in list(range(0,voxelN)):

                    #get voxel-specific X-matrix from the non-scrambled data in 2D
                    X = np.reshape(vShift3D[:,VOX,:],(vShift3D.shape[0],vShift3D.shape[2]))

                    #solve linear model vShift*b=y where y is the vector from listener's data
                    model = reg_m(listener[:,VOX].T, X.T)
                    b[:,VOX] = model.params[list(range(1,len(model.params)))]
                    Rsq[VOX] = model.rsquared
                    F[VOX] = model.fvalue
                    pF[VOX] = model.f_pvalue

            pP_Rsq = None

    return b, Rsq, F, pF, pP_Rsq

In [None]:
#Loop over DBIC participants
pseudoCounter = 0;
realCounter = 0;
pairCounter = 0;

#preallocate speaker timeseries arrays
sData = [[]]*4; 

#preallocate listener timeseries arrays
lData = [[]]*4; 

#define some index mapping variables
rowMap = [0,1,0,1]
ijMap = [0,0,1,1]

for dbicPairN in dbicSubs: #for each DBIC subject...
    
    #clear the DBIC arrays within sData and lData to make sure nothing gets recycled between participants
    for COND in [1,3]:
        sData[COND] = np.empty([totalTRs,len(voxelCoords)])
    for COND in [0,2]:
        lData[COND] = np.empty([totalTRs,len(voxelCoords)])
    
    ######################
    ### load DBIC data ###
    ######################
    
    #get dbicSub ID and runN
    dbicSub = pairsAndRuns['DBIC'][dbicPairN] #e.g., 'sid000007'
    dbicRunInd = pairsAndRuns['ind_run'][dbicPairN]
    dbicRunJoint = pairsAndRuns['joint_run'][dbicPairN]
    
    #indicate that we're starting on a new DBIC participant
    print('\nloading data for DBIC subject ' + str(dbicPairN + 1) + ', ' + dbicSub + '...')
    
    #get file names
    if local == 1: #for JD debugging locally -- will eventually remove
        
        #independent DBIC speaker
        dbicFileSpeaker_ind = '/Users/jeffreyknotts/Documents/MATLAB/Wheatley/isc_test/sub-' + dbicSub + \
        '_ses-pair0' + str(dbicPairN + 1) + '_task-storytelling' + str(dbicRunInd) + '_run-0' + \
        str(dbicRunInd) + '_bold_space-MNI152NLin2009cAsym_preproc_nuisRegr_speaker.mat'  #[TRs x voxels]
        
        #independent DBIC listener
        dbicFileListener_ind = '/Users/jeffreyknotts/Documents/MATLAB/Wheatley/isc_test/sub-' + dbicSub + \
        '_ses-pair0' + str(dbicPairN + 1) + '_task-storytelling' + str(dbicRunInd) + '_run-0' + \
        str(dbicRunInd) + '_bold_space-MNI152NLin2009cAsym_preproc_nuisRegr_listener.mat'  #[TRs x voxels]
        
        #joint DBIC speaker
        dbicFileSpeaker_joint = '/Users/jeffreyknotts/Documents/MATLAB/Wheatley/isc_test/sub-' + dbicSub + \
        '_ses-pair0' + str(dbicPairN + 1) + '_task-storytelling' + str(dbicRunJoint) + '_run-0' + \
        str(dbicRunJoint) + '_bold_space-MNI152NLin2009cAsym_preproc_nuisRegr_speaker.mat'  #[TRs x voxels]
        
        #joint DBIC listener
        dbicFileListener_joint = '/Users/jeffreyknotts/Documents/MATLAB/Wheatley/isc_test/sub-' + dbicSub + \
        '_ses-pair0' + str(dbicPairN + 1) + '_task-storytelling' + str(dbicRunJoint) + '_run-0' + \
        str(dbicRunJoint) + '_bold_space-MNI152NLin2009cAsym_preproc_nuisRegr_listener.mat'  #[TRs x voxels]
        
    else:
    
        #independent DBIC speaker
        dbicFileSpeaker_ind = '/flash/wheatley/adamb/hyperscanning_DBIC_ses2/sub-' + dbicSub + \
        '_fmriprep/fmriprep/sub-' + dbicSub + '/ses-pair0' + str(dbicPairN + 1) + '/func/' + \
        'sub-' + dbicSub + '_ses-pair0' + str(dbicPairN + 1) + '_task-storytelling' + str(dbicRunInd) + '_run-0' + \
        str(dbicRunInd) + '_bold_space-MNI152NLin2009cAsym_preproc_nuisRegr_speaker.mat'  #[TRs x voxels]
        
        #independent DBIC listener
        dbicFileListener_ind = '/flash/wheatley/adamb/hyperscanning_DBIC_ses2/sub-' + dbicSub + \
        '_fmriprep/fmriprep/sub-' + dbicSub + '/ses-pair0' + str(dbicPairN + 1) + '/func/' + \
        'sub-' + dbicSub + '_ses-pair0' + str(dbicPairN + 1) + '_task-storytelling' + str(dbicRunInd) + '_run-0' + \
        str(dbicRunInd) + '_bold_space-MNI152NLin2009cAsym_preproc_nuisRegr_listener.mat'  #[TRs x voxels]
        
        #joint DBIC speaker
        dbicFileSpeaker_joint = '/flash/wheatley/adamb/hyperscanning_DBIC_ses2/sub-' + dbicSub + \
        '_fmriprep/fmriprep/sub-' + dbicSub + '/ses-pair0' + str(dbicPairN + 1) + '/func/' + \
        'sub-' + dbicSub + '_ses-pair0' + str(dbicPairN + 1) + '_task-storytelling' + str(dbicRunJoint) + '_run-0' + \
        str(dbicRunJoint) + '_bold_space-MNI152NLin2009cAsym_preproc_nuisRegr_speaker.mat'  #[TRs x voxels]
        
        #joint DBIC listener
        dbicFileListener_joint = '/flash/wheatley/adamb/hyperscanning_DBIC_ses2/sub-' + dbicSub + \
        '_fmriprep/fmriprep/sub-' + dbicSub + '/ses-pair0' + str(dbicPairN + 1) + '/func/' + \
        'sub-' + dbicSub + '_ses-pair0' + str(dbicPairN + 1) + '_task-storytelling' + str(dbicRunJoint) + '_run-0' + \
        str(dbicRunJoint) + '_bold_space-MNI152NLin2009cAsym_preproc_nuisRegr_listener.mat'  #[TRs x voxels]
        
        
    #load timeseries for (D)BIC (S)peaker in the (I)ndependent condition
    if 'ts_DSI' in locals(): #if the ts_DSI variable already exists...
        del ts_DSI #delete it to make sure we don't have data leaking between participants
    if os.path.isfile(dbicFileSpeaker_ind):
        dummy = sio.loadmat(dbicFileSpeaker_ind) #load data
        sData[1] = dummy['dbicSpeaker'][:,voxelCoords] #get timeseries data 
        #ts_DSI = dummy['dbicSpeaker'] #get timeseries data 
        del dummy
        
    #load timeseries for (D)BIC (L)istener in the (I)ndependent condition
    if 'ts_DLI' in locals(): #if the ts_DLI variable already exists...
        del ts_DLI #delete it to make sure we don't have data leaking between participants
    if os.path.isfile(dbicFileListener_ind):
        dummy = sio.loadmat(dbicFileListener_ind) #load data
        lData[0] = dummy['dbicListener'][:,voxelCoords] #get timeseries data
        #ts_DLI = dummy['dbicListener'] #get timeseries data
        del dummy
        
    #load timeseries for (D)BIC (S)peaker in the (J)oint condition
    if 'ts_DSJ' in locals(): #if the ts_DSJ variable already exists...
        del ts_DSJ #delete it to make sure we don't have data leaking between participants
    if os.path.isfile(dbicFileSpeaker_joint):
        dummy = sio.loadmat(dbicFileSpeaker_joint) #load data
        sData[3] = dummy['dbicSpeaker'][:,voxelCoords] #get time series data
        #ts_DSJ = dummy['dbicSpeaker'] #get time series data
        del dummy
        
    #load timeseries for (D)BIC (L)istener in the (J)oint condition
    if 'ts_DLJ' in locals(): #if the ts_DLJ variable already exists...
        del ts_DLJ #delete it to make sure we don't have data leaking between participants
    if os.path.isfile(dbicFileListener_joint):
        dummy = sio.loadmat(dbicFileListener_joint) #load data
        lData[2] = dummy['dbicListener'][:,voxelCoords] #get time series data
        #ts_DLJ = dummy['dbicListener'] #get time series data
        del dummy

    
    #Loop over CBS participants
    for cbsPairN in cbsSubs:
        
        #clear the CBS arrays within sData and lData to make sure nothing gets recycled between participants
        for COND in [0,2]:
            sData[COND] = np.empty([totalTRs,len(voxelCoords)]) 
        for COND in [1,3]:
            lData[COND] = np.empty([totalTRs,len(voxelCoords)])

        #####################
        ### load CBS data ###
        #####################
        
        pairType = 0; #for adding to time.pairs below
        
        cbsSub = pairsAndRuns['CBS'][cbsPairN] #e.g., 'hid000002'
        cbsRunInd = pairsAndRuns['ind_run'][cbsPairN]
        cbsRunJoint = pairsAndRuns['joint_run'][cbsPairN]
        
        #indicate that we're starting on a new DBIC participant
        print('\nloading data for CBS subject ' + str(cbsPairN + 1) + ', ' + cbsSub + '...')
        
        #get file names
        if local == 1: #for JD debugging locally -- will eventually remove

            #independent CBS speaker
            cbsFileSpeaker_ind = '/Users/jeffreyknotts/Documents/MATLAB/Wheatley/isc_test/sub-' + cbsSub + \
            '_ses-pair0' + str(cbsPairN + 1) + '_task-storytelling' + str(cbsRunInd) + '_run-0' + \
            str(cbsRunInd) + '_bold_space-MNI152NLin2009cAsym_preproc_nuisRegr_speaker.mat'  #[TRs x voxels]

            #independent CBS listener
            cbsFileListener_ind = '/Users/jeffreyknotts/Documents/MATLAB/Wheatley/isc_test/sub-' + cbsSub + \
            '_ses-pair0' + str(cbsPairN + 1) + '_task-storytelling' + str(cbsRunInd) + '_run-0' + \
            str(cbsRunInd) + '_bold_space-MNI152NLin2009cAsym_preproc_nuisRegr_listener.mat'  #[TRs x voxels]

            #joint CBS speaker
            cbsFileSpeaker_joint = '/Users/jeffreyknotts/Documents/MATLAB/Wheatley/isc_test/sub-' + cbsSub + \
            '_ses-pair0' + str(cbsPairN + 1) + '_task-storytelling' + str(cbsRunJoint) + '_run-0' + \
            str(cbsRunJoint) + '_bold_space-MNI152NLin2009cAsym_preproc_nuisRegr_speaker.mat'  #[TRs x voxels]

            #joint CBS listener
            cbsFileListener_joint = '/Users/jeffreyknotts/Documents/MATLAB/Wheatley/isc_test/sub-' + cbsSub + \
            '_ses-pair0' + str(cbsPairN + 1) + '_task-storytelling' + str(cbsRunJoint) + '_run-0' + \
            str(cbsRunJoint) + '_bold_space-MNI152NLin2009cAsym_preproc_nuisRegr_listener.mat'  #[TRs x voxels]

        else:

            #independent CBS speaker
            cbsFileSpeaker_ind = '/flash/wheatley/adamb/hyperscanning_CBS/sub-' + cbsSub + \
            '_fmriprep/fmriprep/sub-' + cbsSub + '/ses-pair0' + str(cbsPairN + 1) + '/func/' + \
            'sub-' + cbsSub + '_ses-pair0' + str(cbsPairN + 1) + '_task-storytelling' + str(cbsRunInd) + '_run-0' + \
            str(cbsRunInd) + '_bold_space-MNI152NLin2009cAsym_preproc_nuisRegr_speaker.mat'  #[TRs x voxels]
            
            #independent CBS listener
            cbsFileListener_ind = '/flash/wheatley/adamb/hyperscanning_CBS/sub-' + cbsSub + \
            '_fmriprep/fmriprep/sub-' + cbsSub + '/ses-pair0' + str(cbsPairN + 1) + '/func/' + \
            'sub-' + cbsSub + '_ses-pair0' + str(cbsPairN + 1) + '_task-storytelling' + str(cbsRunInd) + '_run-0' + \
            str(cbsRunInd) + '_bold_space-MNI152NLin2009cAsym_preproc_nuisRegr_listener.mat'  #[TRs x voxels]

            #joint CBS speaker
            cbsFileSpeaker_joint = '/flash/wheatley/adamb/hyperscanning_CBS/sub-' + cbsSub + \
            '_fmriprep/fmriprep/sub-' + cbsSub + '/ses-pair0' + str(cbsPairN + 1) + '/func/' + \
            'sub-' + cbsSub + '_ses-pair0' + str(cbsPairN + 1) + '_task-storytelling' + str(cbsRunJoint) + '_run-0' + \
            str(cbsRunJoint) + '_bold_space-MNI152NLin2009cAsym_preproc_nuisRegr_speaker.mat'  #[TRs x voxels]

            #joint CBS listener
            cbsFileListener_joint = '/flash/wheatley/adamb/hyperscanning_CBS/sub-' + cbsSub + \
            '_fmriprep/fmriprep/sub-' + cbsSub + '/ses-pair0' + str(cbsPairN + 1) + '/func/' + \
            'sub-' + cbsSub + '_ses-pair0' + str(cbsPairN + 1) + '_task-storytelling' + str(cbsRunJoint) + '_run-0' + \
            str(cbsRunJoint) + '_bold_space-MNI152NLin2009cAsym_preproc_nuisRegr_listener.mat'  #[TRs x voxels]

            
        #load timeseries for (C)BS (S)peaker in the (I)ndependent condition
        if 'ts_CSI' in locals(): #if the ts_CSI variable already exists...
            del ts_CSI #delete it to make sure we don't have data leaking between participants
        if os.path.isfile(cbsFileSpeaker_ind):
            dummy = sio.loadmat(cbsFileSpeaker_ind) #load data
            sData[0] = dummy['cbsSpeaker'][:,voxelCoords] #get time series data
            #ts_CSI = dummy['cbsSpeaker'] #get time series data
            del dummy
            
        #load timeseries for (C)BS (L)istener in the (I)ndependent condition
        if 'ts_CLI' in locals(): #if the ts_CLI variable already exists...
            del ts_CLI #delete it to make sure we don't have data leaking between participants
        if os.path.isfile(cbsFileListener_ind):
            dummy = sio.loadmat(cbsFileListener_ind) #load data
            lData[1] = dummy['cbsListener'][:,voxelCoords] #get time series data
            #ts_CLI = dummy['cbsListener'] #get time series data
            del dummy
        
        #load timeseries for (C)BS (S)peaker in the (J)oint condition
        if 'ts_CSJ' in locals(): #if the ts_CSJ variable already exists...
            del ts_CSJ #delete it to make sure we don't have data leaking between participants
        if os.path.isfile(cbsFileSpeaker_joint):
            dummy = sio.loadmat(cbsFileSpeaker_joint) #load data
            sData[2] = dummy['cbsSpeaker'][:,voxelCoords] #get time series data
            #ts_CSJ = dummy['cbsSpeaker'] #get time series data
            del dummy
        
        #load timeseries for (C)BS (L)istener in the (J)oint condition
        if 'ts_CLJ' in locals(): #if the ts_CLJ variable already exists...
            del ts_CLJ #delete it to make sure we don't have data leaking between participants
        if os.path.isfile(cbsFileListener_joint):
            dummy = sio.loadmat(cbsFileListener_joint) #load data
            lData[3] = dummy['cbsListener'][:,voxelCoords] #get time series data
            #ts_CLJ = dummy['cbsListener'] #get time series data
            del dummy
        
        
        ########################
        ### run couplingFMRI ###
        ########################
        
        pairStart = time.time() #get starting time
        print('\n#################################################')
        print('### starting ISC for DBIC sub ' + str(dbicPairN + 1) + ' and CBS sub ' + str(cbsPairN + 1) + ' ###')
        print('#################################################')
    
        if dbicPairN == cbsPairN: #if we're dealing with a real pair...
            
            #Get real pair info
            ROW = [(realCounter*2), realCounter*2+1]
            pairMap[0][ROW,0] = dbicPairN + 1
            pairMap[0][ROW,1] = cbsPairN + 1
            pairMap[0][ROW,2] = [1, 0]
            
            #Loop over rolling windows...
            for STEP in list(range(numSteps)): #for each window...
                    
                #get TRs to use in current window
                TRs = list(range(stepSize * STEP,stepSize * STEP + winTRs)); #make sure to start at ZERO here
                    
                #estimate remaining time based on current estimate of mean ISC duration
                if pairCounter == 0:
                    timeString = 'lets wait and see how long the first ISC takes...'
                else:
                    estTimeRemain = pairsRemaining * meanISCdur - (meanISCdur / numSteps)*STEP
                    hrs = math.floor(estTimeRemain / 60)
                    minFloat = estTimeRemain % 60
                    mins = math.floor(minFloat)
                    secs = round((minFloat - mins)*60)
                    if hrs > 0:
                        timeString = str(hrs) + ' hr ' + str(mins) + ' min ' + str(secs) + ' s' 
                    else:
                        timeString = str(mins) + ' min ' + str(secs) + ' s'
                            
                print('\n%%%%%%%% Running ISC for pair ' + str(pairCounter + 1) + ' of ' + str(totalPairN) + ', step ' + str(STEP + 1) + ' of ' + str(numSteps) + ': TRs ' + str(stepSize * STEP + 1) + ':' + str(stepSize * STEP + winTRs)) #add 1 to the TRs computed in the line above for clarity in printing
                print('%%%%%%%% estimated time remaining: ' + timeString)
                
                if parallel == 1: 
                    
                    #run couplingFMRI via joblib
                    #conditions...
                    #COND=1: CBS speaker, DBIC listener, independent
                    #COND=2: DBIC speaker, CBS listener, independent
                    #COND=3: CBS speaker, DBIC listener, joint
                    #COND=4: DBIC speaker, CBS listener, joint
                    tmp = Parallel(n_jobs=numJobs)(delayed(couplingFMRI)(sData[COND][TRs,:], lData[COND][TRs,:], maxT, fitPermuts, method) for COND in range(4))
                    
                    #index couplingFMRI output to realFits array
                    for COND in range(4):
                        realFits[ijMap[COND]][0][ROW[rowMap[COND]],:,:,STEP] = tmp[COND][0]
                        realFits[ijMap[COND]][1][ROW[rowMap[COND]],:,STEP] = tmp[COND][1]
                        realFits[ijMap[COND]][2][ROW[rowMap[COND]],:,STEP] = tmp[COND][2]
                        realFits[ijMap[COND]][3][ROW[rowMap[COND]],:,STEP] = tmp[COND][3]
                        realFits[ijMap[COND]][4][ROW[rowMap[COND]],:,STEP] = tmp[COND][4]
                    
                else:
                
                    #run couplingFMRI
                    #rowMap = [0,1,0,1]; ijMap = [0,0,1,1]; #MOVE OUT OF LOOPS EVENTUALLY
                    for COND in range(4):
                        realFits[ijMap[COND]][0][ROW[rowMap[COND]],:,:,STEP], realFits[ijMap[COND]][1][ROW[rowMap[COND]],:,STEP], realFits[ijMap[COND]][2][ROW[rowMap[COND]],:,STEP], realFits[ijMap[COND]][3][ROW[rowMap[COND]],:,STEP], realFits[ijMap[COND]][4][ROW[rowMap[COND]],:,STEP] = couplingFMRI(sData[COND][TRs,:], lData[COND][TRs,:], maxT, fitPermuts, method)
            
            #add one to real pair counter
            realCounter += 1
            
        else: 
            
            #Get pseudo pair info
            ROW = [(pseudoCounter*2), pseudoCounter*2+1]
            pairMap[1][ROW,0] = dbicPairN + 1
            pairMap[1][ROW,1] = cbsPairN + 1
            pairMap[1][ROW,2] = [1, 0]
            
            #Loop over rolling windows...
            for STEP in range(numSteps): #for each window...
                
                #get TRs to use in current window
                TRs = list(range(stepSize * STEP,stepSize * STEP + winTRs)); #make sure to start at ZERO here
                    
                #estimate remaining time based on current estimate of mean ISC duration
                if pairCounter == 0:
                    timeString = 'lets wait and see how long the first ISC takes...'
                else:
                    estTimeRemain = pairsRemaining * meanISCdur - (meanISCdur / numSteps)*STEP
                    hrs = math.floor(estTimeRemain / 60)
                    minFloat = estTimeRemain % 60
                    mins = math.floor(minFloat)
                    secs = round((minFloat - mins)*60)
                    if hrs > 0:
                        timeString = str(hrs) + ' hr ' + str(mins) + ' min ' + str(secs) + ' s' 
                    else:
                        timeString = str(mins) + ' min ' + str(secs) + ' s'
                            
                print('\n%%%%%%%% Running ISC for pair ' + str(pairCounter + 1) + ' of ' + str(totalPairN) + ', step ' + str(STEP + 1) + ' of ' + str(numSteps) + ': TRs ' + str(stepSize * STEP + 1) + ':' + str(stepSize * STEP + winTRs)) #add 1 to the TRs computed in the line above for clarity in printing
                print('%%%%%%%% estimated time remaining: ' + timeString)
                
                if parallel == 1: 
                    
                    #run couplingFMRI via joblib
                    #conditions...
                    #COND=1: CBS speaker, DBIC listener, independent
                    #COND=2: DBIC speaker, CBS listener, independent
                    #COND=3: CBS speaker, DBIC listener, joint
                    #COND=4: DBIC speaker, CBS listener, joint
                    tmp = Parallel(n_jobs=numJobs)(delayed(couplingFMRI)(sData[COND][TRs,:], lData[COND][TRs,:], maxT, fitPermuts, method) for COND in range(4))
                    
                    #index couplingFMRI output to realFits array
                    #rowMap = [0,1,0,1]; ijMap = [0,0,1,1]; #MOVE OUT OF LOOPS EVENTUALLY
                    for COND in range(4):
                        pseudoFits[ijMap[COND]][0][ROW[rowMap[COND]],:,:,STEP] = tmp[COND][0]
                        pseudoFits[ijMap[COND]][1][ROW[rowMap[COND]],:,STEP] = tmp[COND][1]
                        pseudoFits[ijMap[COND]][2][ROW[rowMap[COND]],:,STEP] = tmp[COND][2]
                        pseudoFits[ijMap[COND]][3][ROW[rowMap[COND]],:,STEP] = tmp[COND][3]
                        pseudoFits[ijMap[COND]][4][ROW[rowMap[COND]],:,STEP] = tmp[COND][4]
                    
                else:
                
                    #run couplingFMRI
                    for COND in range(4):
                        pseudoFits[ijMap[COND]][0][ROW[rowMap[COND]],:,:,STEP], pseudoFits[ijMap[COND]][1][ROW[rowMap[COND]],:,STEP], pseudoFits[ijMap[COND]][2][ROW[rowMap[COND]],:,STEP], pseudoFits[ijMap[COND]][3][ROW[rowMap[COND]],:,STEP], pseudoFits[ijMap[COND]][4][ROW[rowMap[COND]],:,STEP] = couplingFMRI(sData[COND][TRs,:], lData[COND][TRs,:], maxT, fitPermuts, method)
                    

            #add one to pseudo pair counter
            pseudoCounter += 1
        
        #get approximate time to run couplingFMRI for current pair [min]
        timeLog[pairCounter,0] = dbicPairN + 1 #DBIC sub index
        timeLog[pairCounter,1] = cbsPairN + 1 #CBS sub index
        timeLog[pairCounter,2] = round((time.time() - pairStart) / 60,1) #approximate duration of couplingFMRI for current pair [min]
        
        #estimate analysis duration 
        meanISCdur = np.mean(timeLog[list(range(0,pairCounter+1)),2]) #average time it takes to run couplingFMRI across all pairs that have been ruun up to this point
        pairCounter = pairCounter + 1 #add one to total pair counter
        pairsRemaining = totalPairN - pairCounter #get number of pairs that remain to be analyzed (for estimating time remaining above)
    

In [None]:
endTime = time.time()
ISCduration = (endTime - startTime) / 3600 #total duration [hrs]
ISCduration