In [4]:

'''
This script loads the timeseries data from the listening and reading
hyperscanning control tasks, and visualizes the data in various ways
with the goal of trying to elucidate why we see so many negative
correlations between individual timeseries and averaged timeseries
from the rest of the group.


'''

'\nThis script loads the timeseries data from the listening and reading\nhyperscanning control tasks, and visualizes the data in various ways\nwith the goal of trying to elucidate why we see so many negative\ncorrelations between individual timeseries and averaged timeseries\nfrom the rest of the group.\n\nTo add:\nlook at just primary auditory cortex\n'

In [None]:
server = 1 # 0=drzeuss, 1=discovery

if server == 0: # drzeuss
    baseFolder = '/afs/dbic.dartmouth.edu/usr/wheatley/jd'
    sys.path.append(baseFolder)
    inputFolder = baseFolder + 'control_tasks/'
    maskFile = '/flash/wheatley/adamb/mni_asym09c_mask_resamp3x3.nii.gz'
    statMapFolder = inputFolder
    htmlFolder = inputFolder
else: # discovery
    baseFolder = '/dartfs-hpc/rc/home/z/f00589z/hyperscanning/'
    sys.path.append(baseFolder + 'support_scripts/')
    inputFolder = baseFolder + 'control_tasks/nuisRegr_output_files/'
    maskFile = baseFolder + 'control_tasks/nuisRegr_input_files/mni_asym09c_mask_resamp3x3.nii.gz'
    statMapFolder = baseFolder + 'control_tasks/statMaps/'
    htmlFolder = baseFolder + 'control_tasks'

In [5]:
import scipy.io as sio
import os
import sys
import numpy as np
import pandas as pd
import time
from joblib import Parallel, delayed
from scipy import stats
import matplotlib.pyplot as plt
import statsmodels.stats.multitest as multi
from phaseScramble import *
from CircleShift import *
from scipy.stats import norm
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
%matplotlib inline

In [6]:
# participant IDs
dbicIDs = np.array(["sid000007", "sid000009", "sid000560", "sid000535", "sid000102", "sid000416", "sid000499", "sid000142"])
cbsIDs = np.array(["hid000002", "hid000003", "hid000004", "hid000005", "hid000006", "hid000007", "hid000008", "hid000009"])

# pair numbers
pairNums = np.arange(2,len(dbicIDs)+2)

# make subject list data frame
subList = pd.DataFrame(np.transpose(np.tile(pairNums, (1, 2))),columns=['pairNum'])
subList['subID'] = np.concatenate((dbicIDs, cbsIDs), axis=0)
print(subList)

# get number of participants
numSubs = len(pairNums) * 2

# set fitting distribution to normal
dist = getattr(stats, 'norm')

# define condition labels
taskNames = np.array(['listening','reading'])
normNames = np.array(['',', norm'])
siteNames = np.array(['DBIC','CBS'])

# set scaling type
standardScaling = True # True = mean center and z-score, False = normalize to range from 0 to 1

# define standard scaler
scaler = StandardScaler()

# set whether or not to export html
exportHtml = False

# remove some set of initial time points from each task?
# Note: this was just a quick hacky way to check how removing the time points
# over which that big initial drift occurs will affect ISC
RI = False
removeInitial = [100,50] # number of timepoints to remove off the front

# save out drfit and smoothness stat maps
saveStatMaps = False

# # sign flip harvard?
# SFH = True

    pairNum      subID
0         2  sid000007
1         3  sid000009
2         4  sid000560
3         5  sid000535
4         6  sid000102
5         7  sid000416
6         8  sid000499
7         9  sid000142
8         2  hid000002
9         3  hid000003
10        4  hid000004
11        5  hid000005
12        6  hid000006
13        7  hid000007
14        8  hid000008
15        9  hid000009


In [7]:
# set input data folder


# add a filename tag (optional)
inputFileTag = True
if inputFileTag:
    fileTag = '_detrended'
else:
    fileTag = ''

# preallocate task lists
boldData = [[]] * 2

# get EPI time series
for TASK in [0,1]: # for each task, listening, then reading...

    # preallocate normalization lists
    boldData[TASK] = [[]] * 2

    for NORM in [0,1]: # for each normalization condition (0 = no normalization, 1 = normalization)...

        # preallocate subject lists
        boldData[TASK][NORM] = [[]] * numSubs

        for SUB in range(numSubs): # for each subject...

            # get file name
            fileName = inputFolder + 'sub-' + subList['subID'][SUB] + '_ses-pair0' + str(subList['pairNum'][SUB]) + '_task-storytelling' + str(TASK + 3) + '_run-0' + str(TASK + 3) + '_bold_space-MNI152NLin2009cAsym_preproc_nuisRegr_2021' + fileTag + '.mat'

            if NORM == 0:
                # load real data
                tmp = sio.loadmat(fileName) #load file
                boldData[TASK][NORM][SUB] = tmp['tseries'] #get timeseries data
                print('loaded ' + str(boldData[TASK][NORM][SUB].shape[0]) + ' x ' + str(boldData[TASK][NORM][SUB].shape[1]) + ' timeseries for ' + taskNames[TASK] + ' task, sub ' + subList['subID'][SUB])

            else:

                # normalize
                if standardScaling:
                    boldData[TASK][NORM][SUB] = scaler.fit_transform(boldData[TASK][0][SUB])
                else:
                    boldData[TASK][NORM][SUB] = preprocessing.normalize(boldData[TASK][0][SUB])

loaded 508 x 69880 timeseries for listening task, sub sid000007
loaded 506 x 69880 timeseries for listening task, sub sid000009
loaded 533 x 69880 timeseries for listening task, sub sid000560
loaded 501 x 69880 timeseries for listening task, sub sid000535
loaded 561 x 69880 timeseries for listening task, sub sid000102
loaded 500 x 69880 timeseries for listening task, sub sid000416
loaded 504 x 69880 timeseries for listening task, sub sid000499
loaded 503 x 69880 timeseries for listening task, sub sid000142
loaded 510 x 69880 timeseries for listening task, sub hid000002
loaded 510 x 69880 timeseries for listening task, sub hid000003
loaded 510 x 69880 timeseries for listening task, sub hid000004
loaded 510 x 69880 timeseries for listening task, sub hid000005
loaded 510 x 69880 timeseries for listening task, sub hid000006
loaded 510 x 69880 timeseries for listening task, sub hid000007
loaded 510 x 69880 timeseries for listening task, sub hid000008
loaded 510 x 69880 timeseries for listen

KeyboardInterrupt: 

In [None]:
if RI:
    for TASK in [0,1]: # for each task, listening, then reading
        for NORM in [0,1]: # for each normalization condition (0 = no normalization, 1 = normalization)
            for SUB in range(numSubs): # for each subject
                boldData[TASK][NORM][SUB] = np.delete(boldData[TASK][NORM][SUB], np.arange(removeInitial[TASK]), 0)

In [None]:
corrData = [[]] * 2
for TASK in [0,1]: # for each task, listening, then reading

    corrData[TASK] = [[]] * 2

    for NORM in [0,1]: # for each normalization condition (not normalized, normalized)

        # preallocate task data list
        corrData[TASK][NORM] = [[]] * numSubs

        for SUB in range(numSubs): #for each subject

            # get mean of data from all participants EXCEPT the current participant
            otherSubs = np.arange(0,numSubs)
            otherSubs = np.delete(otherSubs,SUB)
            groupMean = np.mean([boldData[TASK][NORM][i] for i in otherSubs], axis=0)

            # get correlation between current participant and groupMean
            corrData[TASK][NORM][SUB] = fastColumnCorr(boldData[TASK][NORM][SUB], groupMean)
            print('computing correlation for sub ' + str(SUB + 1) + ', ' + taskNames[TASK] + ' task' + normNames[NORM])

In [None]:
# get number of pairs
numPairs = round(numSubs / 2)

# include random data at the bottom?
randData = False

# make subplot map
if randData:
    spMap = np.arange(10).reshape(5,2) + 1
else:
    spMap = np.arange(8).reshape(4,2) + 1

# set axis label font size
axLabFontSize = 12

# colorblind-friendly colors list
CB_color_cycle = ['#377eb8', '#ff7f00', '#4daf4a',
                  '#f781bf', '#a65628', '#984ea3',
                  '#999999', '#e41a1c', '#dede00']

# set task colors
taskColors = CB_color_cycle[:2]

# plot data
for SUB in range(numPairs):

    # get subjects from current pair
    pairSubs = [SUB,SUB + round((numSubs / 2))]

    # initialize plot
    plt.figure(facecolor='white',figsize=(6,14))

    # for each subject in the current pair
    for PAIRSUB in [0,1]:

        for TASK in [0,1]:

            for NORM in [0,1]:

                # get plot data
                pData = corrData[TASK][NORM][pairSubs[PAIRSUB]]

                # select subplot
                plt.subplot(5, 2, spMap[NORM+PAIRSUB*2,TASK])

                # plot histogram
                plt.hist(pData, bins=25, density=True, alpha=0.6, color=taskColors[TASK])

                # dashed line at x=0
                yMax = plt.gca().get_ylim()[1]
                plt.plot([0, 0], [0, yMax], '--k')

                # axes and title
                plt.xlabel('correlation', fontsize=axLabFontSize)
                if TASK == 0:
                    plt.ylabel('voxel count', fontsize=axLabFontSize)
                plt.title(taskNames[TASK] + ', sub ' + siteNames[PAIRSUB] + str(SUB + 1) + normNames[NORM])

    plt.tight_layout()
    plt.show()

In [None]:
import seaborn as sns
import matplotlib.cm as cm
from matplotlib.colors import Normalize

corrMat = [[]] * 2
corrColors = [[]] * 2
corrData_pairs = [[]] * 2
axLab = [[]] * numSubs

for TASK in [0,1]:

    corrMat[TASK] = [[]] * 2
    corrColors[TASK]= [[]] * 2
    corrData_pairs[TASK]= [[]] * 2

    for NORM in [0,1]:

        # some feedback
        print('computing pairwise correlations for ' + str(taskNames[TASK]) + ' task' + normNames[NORM])

        # preallocate subs x subs correlation matrix
        corrMat[TASK][NORM] = np.empty([numSubs,numSubs])
        corrData_pairs[TASK][NORM] = [[]] * numSubs

        for SUB1 in range(numSubs):

            corrData_pairs[TASK][NORM][SUB1] = [[]] * numSubs

            # get axis labels
            if TASK == 0 & NORM == 0:
                if SUB1 < numPairs:
                    axLab[SUB1] = 'D' + str(SUB1 + 1)
                else:
                    axLab[SUB1] = 'H' + str(SUB1 - numPairs + 1)

            # set the diagonal equal to 1
            corrMat[TASK][NORM][SUB1,SUB1] = 1

            for SUB2 in np.arange(SUB1 + 1,numSubs):

                corrData_pairs[TASK][NORM][SUB1][SUB2] = fastColumnCorr(boldData[TASK][NORM][SUB1], boldData[TASK][NORM][SUB2])
                corrMat[TASK][NORM][SUB1,SUB2] = np.mean(corrData_pairs[TASK][NORM][SUB1][SUB2])

                #fill in the other half of corrMat so the plots dont look weird
                corrMat[TASK][NORM][SUB2,SUB1] = corrMat[TASK][NORM][SUB1,SUB2]


        plt.figure(facecolor='white')
        cmap = cm.get_cmap('RdBu')#sns.diverging_palette(20, 220, n=200)
        ax = sns.heatmap(
            corrMat[TASK][NORM],
            vmin=-1, vmax=1, center=0,
            cmap=cmap,
            square=True
        )
        ax.set_xticklabels(axLab)
        ax.set_xticklabels(
            ax.get_xticklabels(),
            rotation=45,
            horizontalalignment='right'
        )
        ax.set_yticklabels(axLab)
        ax.set_yticklabels(
            ax.get_yticklabels(),
            rotation=0
        )

        # add a title
        plt.title('mean corr coef across vox, ' + taskNames[TASK] + ' task' + normNames[NORM])

        # get heatmap rgbs
        im = ax.collections[0]
        corrColors[TASK][NORM] = im.cmap(im.norm(im.get_array()))

In [None]:
# use normalized data to put things on the same scale
NORM = 1

# extreme voxel labels
voxLabs = ['min corr vox','max corr vox','median vox']
voxColors = ['y','m','k']

# make subplotting map
spMap3 = np.arange(8).reshape(4,2) + 1

# plot data
for SUB in range(numSubs):

    # initialize plot
    plt.figure(facecolor='white',figsize=(16,8))

    if SUB < numPairs:
        titleString = dbicIDs[SUB] + normNames[NORM]
    else:
        titleString = cbsIDs[SUB - numPairs] + normNames[NORM]

    plt.suptitle(titleString)

    for TASK in [0,1]:

        # get plot data
        pData = corrData[TASK][NORM][SUB]

        # select subplot for histogram
        plt.subplot(spMap3.shape[0], spMap3.shape[1], spMap[0,TASK])

        # plot histogram
        plt.hist(pData, bins=100, density=True, alpha=0.6, color=taskColors[TASK])

        # dashed line at x=0
        yMax = plt.gca().get_ylim()[1]
        plt.plot([0, 0], [0, yMax], '--k')

        # axes and title
        plt.xlabel('correlation', fontsize=axLabFontSize)
        if TASK == 0:
            plt.ylabel('voxel count', fontsize=axLabFontSize)
        plt.title(taskNames[TASK])

        # plot voxel time series with extreme values
        for VOX in [0,1,2]: # min, max, median

            # get "Extreme Index" of voxel with either min or max value (or median)
            if VOX == 0:
                EIND = np.unravel_index(np.argmin(pData),pData.shape) # minimum correlation voxel index
            elif VOX == 1:
                EIND = np.unravel_index(np.argmax(pData),pData.shape) # maximum correlation voxel index
            elif VOX == 2:
                EIND = np.argsort(pData)[len(pData)//2] # median (approximately)

            # add locations of min and max correlation to histogram for reference
            extremeCorr = pData[EIND]
            plt.subplot(spMap3.shape[0], spMap3.shape[1], spMap3[0,TASK])
            plt.plot([extremeCorr, extremeCorr], [0, yMax], '-' + voxColors[VOX])

            # get individual subject time series at the extreme voxel
            y1 = boldData[TASK][NORM][SUB][:,EIND]
            x = np.array(range(len(y1))) + 1

            # get mean of data from all participants EXCEPT the current participant
            otherSubs = np.arange(0,numSubs)
            otherSubs = np.delete(otherSubs,SUB)
            y2 = np.mean([boldData[TASK][NORM][i][:,EIND] for i in otherSubs], axis=0)
            if VOX == 2: #hack to deal with EIND not being a tuple when we find the median
                y2 = y2.reshape(y2.shape[0],1)
            y2 = scaler.fit_transform(y2) # normalize the rest-of-group mean (see next section for confirmation that this doesn't influence correlations)

            # select subplot and reset subplot border color
            ax = plt.subplot(spMap3.shape[0], spMap3.shape[1], spMap3[VOX + 1,TASK])
            plt.setp(ax.spines.values(), color=voxColors[VOX])
            plt.setp([ax.get_xticklines(), ax.get_yticklines()], color=voxColors[VOX])

            # plot lines and add legend
            line1, = plt.plot(x,y1,'-k',label = 'individual')
            line2, = plt.plot(x,y2,'-', label = 'rest of group', color = taskColors[TASK]) # , linewidth=2
            plt.legend(handles=[line1, line2],loc='upper right')

            if TASK == 0:
                plt.xlabel('TR')
            else:
                plt.xlabel('reading stimulus flip')
            plt.ylabel('BOLD signal')
            plt.title(voxLabs[VOX])

    plt.tight_layout()
    plt.show()

In [None]:
def find_nearest_percentile_index(array, percentile):
    array = np.asarray(array)
    target = np.percentile(array, percentile)
    idx = (np.abs(array - target)).argmin()
    return idx

voxMethod = 'median'

spMap = np.arange(6).reshape(3,2) + 1

NORM = 1
numVox = boldData[0][0][0].shape[1] # get number of voxels from first sub
smoothness = [[]] * 2 # preallocate
stdSmooth = False
voxColors = CB_color_cycle[4:9]
percentiles = [1, 2, 3]
smoothLabs = ['max smoothness',str(percentiles[0]) + ' %', str(percentiles[1]) + ' %', str(percentiles[2]) + ' %','min smoothness']
for TASK in [0,1]: # for each task
    smoothness[TASK] = [[]] * numSubs # preallocate
    for SUB in range(numSubs):

        # initialize plot
        plt.figure(facecolor='white',figsize=(16,8))

        # main title
        plt.suptitle(taskNames[TASK] + ' sub ' + str(SUB + 1))

        # get data
        data = boldData[TASK][NORM][SUB]
        smoothness[TASK][SUB] = np.std(np.diff(data,axis=0),axis=0) / abs(np.mean(np.diff(data,axis=0),axis=0)) # formula should be intuitive, but got it from here: https://stats.stackexchange.com/questions/24607/how-to-measure-smoothness-of-a-time-series-in-r

        # optional standardization
        if stdSmooth:
            smoothness[TASK][SUB] = (smoothness[TASK][SUB] - np.mean(smoothness[TASK][SUB])) / np.std(smoothness[TASK][SUB])

        # arbitrarily subset for plotability (because these are so skewed)
        plotDataInds = np.argwhere(smoothness[TASK][SUB] < 1000)
        plotData = smoothness[TASK][SUB][plotDataInds]

        # select subplot for histogram
        plt.subplot(spMap.shape[0], spMap.shape[1], 1)
        plt.hold(True)

        # plot smoothness histogram
        plt.hist(plotData, bins=100, density=True, alpha=1, color=taskColors[TASK])
        plt.xlabel('inverse smoothness parameter')
        plt.ylabel('proportion of voxels')

        # get voxel indices for time series with various levels of smoothness smoothness
        evox = [[]] * 5
        evox[0] = np.unravel_index(np.argmin(plotData),plotData.shape)[0]
        if voxMethod == 'median':
            evox[1] = (np.abs(plotData - (np.median(plotData) - np.std(plotData)))).argmin()
            evox[2] = (np.abs(plotData - np.median(plotData))).argmin()
            evox[3] = (np.abs(plotData - (np.median(plotData) + np.std(plotData)))).argmin()
        else:
            counter = 1
            for PERC in percentiles:
                evox[counter] = find_nearest_percentile_index(plotData, PERC)
                counter += 1
        evox[4] = np.unravel_index(np.argmax(plotData),plotData.shape)[0]

        # get histogram max y-value
        yMax = plt.gca().get_ylim()[1]

        # plot single voxel timeseries
        for VOX in range(len(evox)):

            # add vertical bars to histogram
            plt.subplot(spMap.shape[0], spMap.shape[1], 1)
            smoothVal = plotData[evox[VOX]]
            plt.plot([smoothVal, smoothVal], [0, yMax], '-', color=voxColors[VOX])

            # get time series at the extreme voxel
            y = boldData[TASK][NORM][SUB][:,plotDataInds[evox[VOX]]]
            x = np.array(range(len(y))) + 1

            # select subplot for time series line plot
            ax = plt.subplot(spMap.shape[0], spMap.shape[1], VOX + 2)
            plt.setp(ax.spines.values(), color=voxColors[VOX])
            plt.setp([ax.get_xticklines(), ax.get_yticklines()], color=voxColors[VOX])

            # plot time series
            plt.plot(x,y,'-k')

            # subplot title and axis labels
            plt.title(smoothLabs[VOX])
            if TASK == 0:
                plt.xlabel('TR')
            else:
                plt.xlabel('reading stimulus flip')
            plt.ylabel('BOLD signal')

        plt.tight_layout()
        plt.show()

In [None]:
# make a numSubs by numSubs plot map
spMap4 = np.arange(numSubs**2).reshape(numSubs,numSubs)

# set plot width [inches?]
plotWidth = 16

# hardcode normalization because 64 plots is enough
NORM = 1

# plot data
for SUB1 in range(numSubs):

    # get sub1 string
    if SUB1 < numPairs:
        sub1Str = 'D' + str(SUB1 + 1)
    else:
        sub1Str = 'H' + str(SUB1 - numPairs + 1)

    for SUB2 in np.arange(SUB1 + 1,numSubs):

        # get sub2 string
        if SUB2 < numPairs:
            sub2Str = 'D' + str(SUB2 + 1)
        else:
            sub2Str = 'H' + str(SUB2 - numPairs + 1)

        # initialize plot
        plt.figure(facecolor='white',figsize=(16,8))

        # main title
        plt.suptitle('subs ' + sub1Str + ' & ' + sub2Str)

        for TASK in [0,1]:

            # get data from voxels of interest
            pData = corrData_pairs[TASK][NORM][SUB1][SUB2]

            # plot histogram
            plt.subplot(spMap3.shape[0], spMap3.shape[1], spMap[0,TASK])
            plt.hist(pData, bins=100, density=True, alpha=0.6, color=taskColors[TASK])

            # dashed line at x=0
            yMax = plt.gca().get_ylim()[1]
            plt.plot([0, 0], [0, yMax], '--k')

            # axes and title
            plt.xlabel('correlation', fontsize=axLabFontSize)
            if TASK == 0:
                plt.ylabel('voxel count', fontsize=axLabFontSize)
            plt.title(taskNames[TASK])

            for VOX in [0,1,2]: # min, max, median

                # get "Extreme Index" of voxel with either min or max value (or median)
                if VOX == 0:
                    EIND = np.unravel_index(np.argmin(pData),pData.shape) # minimum correlation voxel index
                elif VOX == 1:
                    EIND = np.unravel_index(np.argmax(pData),pData.shape) # maximum correlation voxel index
                elif VOX == 2:
                    EIND = np.argsort(pData)[len(pData)//2] # median (approximately)

                # add locations of min and max correlation to histogram for reference
                extremeCorr = pData[EIND]
                plt.subplot(spMap3.shape[0], spMap3.shape[1], spMap3[0,TASK])
                plt.plot([extremeCorr, extremeCorr], [0, yMax], '-', color=voxColors[VOX])

                # get individual subject time series at the extreme voxel
                y1 = boldData[TASK][NORM][SUB1][:,EIND]
                y2 = boldData[TASK][NORM][SUB2][:,EIND]
                x = np.array(range(len(y1))) + 1

                # select subplot for time series line plot
                ax = plt.subplot(spMap3.shape[0], spMap3.shape[1], spMap3[VOX + 1,TASK])
                plt.setp(ax.spines.values(), color=voxColors[VOX])
                plt.setp([ax.get_xticklines(), ax.get_yticklines()], color=voxColors[VOX])

                line1, = plt.plot(x,y1,'-k',label = sub1Str)
                line2, = plt.plot(x,y2,'-', label = sub2Str, color = taskColors[TASK])
                plt.legend(handles=[line1, line2],loc='upper right')

                if TASK == 0:
                    plt.xlabel('TR')
                else:
                    plt.xlabel('reading stimulus flip')
                plt.ylabel('BOLD signal')
                plt.title(voxLabs[VOX])

        # display plots
        plt.tight_layout()
        plt.show()

In [None]:
# compare the mean signal of an early epoch to that of a late epoch. Greater absolute differences
# should indicate greater drift. NOTE that this is super hacky, but should be FAST and at least
# somewhat sensitive

# set ending and starting time points for the early and late epochs for each task, respectively
# epochBorders = [[10,5],[100,50]] would mean...
# early epochs for the listening and reading tasks would be time points 1-10 and 1-5, respectively
# and the late epochs would be 100-end, 50-end, also respectively
epochBorders = [[10,5],[100,50]]

# subplot map
spMap = np.arange(6).reshape(3,2) + 1

# use normalized data
NORM = 1

# set percentiles, colors, labels
voxColors = CB_color_cycle[4:9]
percentiles = [10, 50, 90]
if voxMethod == 'mean':
    diffLabs = ['most negative diff','mean - SD','mean','mean + SD','most negative diff']
else:
    diffLabs = ['most negative diff',str(percentiles[0]) + ' %', str(percentiles[1]) + ' %', str(percentiles[2]) + ' %','most positive diff']

# standardize difference scores
stdDiff = True

# preallocate arrays
epoch = [[]] * 2
driftHack = [[]] * 2

# get number of samples in the time series from each task, using the normalized data from the first subject
numSamps = [boldData[0][0][0].shape[0], boldData[1][0][0].shape[0]]

for TASK in [0,1]:

    # get epoch time points
    epoch[TASK] = [[]] * 2 # preallocate
    epoch[TASK][0] = np.arange(epochBorders[0][TASK]) # early epoch
    lateEpochWidth = numSamps[TASK] - epochBorders[1][TASK] + 1
    epoch[TASK][1] = np.arange(lateEpochWidth) + epochBorders[1][TASK] - 1
    # epoch[TASK][1] = np.linspace(epochBorders[1][TASK],numSamps[TASK],numSamps[TASK] - epochBorders[1][TASK] + 1) # late epoch

    # preallocate
    driftHack[TASK] = [[]] * numSubs

    for SUB in range(numSubs):

        # initialize plot
        plt.figure(facecolor='white',figsize=(16,8))

        # main title
        plt.suptitle(taskNames[TASK] + ' sub ' + str(SUB + 1))

        # get time series for current sub
        data = boldData[TASK][NORM][SUB]

        # compute hacky drift statistic
        driftHack[TASK][SUB] = np.mean(data[tuple(epoch[TASK][0]),:],axis=0) - np.mean(data[tuple(epoch[TASK][1]),:],axis=0)

        # optional standardization
        if stdDiff:
            driftHack[TASK][SUB] = (driftHack[TASK][SUB] - np.mean(driftHack[TASK][SUB])) / np.std(driftHack[TASK][SUB])

        # select subplot for histogram
        plt.subplot(spMap.shape[0], spMap.shape[1], 1)
        # plt.hold(True)

        # plot difference histogram
        plt.hist(driftHack[TASK][SUB], bins=100, density=True, alpha=0.5, color=taskColors[TASK])
        plt.xlabel('mean(first ' + str(epochBorders[0][TASK]) + ' time points) - mean(last ' + str(numSamps[TASK] - epochBorders[1][TASK] + 1) + ' timpoints')
        plt.ylabel('proportion of voxels')

        # get voxel indices for time series with min and max difference scores and those at various percentile cutoffs
        evox = [[]] * 5
        evox[0] = np.unravel_index(np.argmin(driftHack[TASK][SUB]),driftHack[TASK][SUB].shape)[0]
        if voxMethod == 'median':
            evox[1] = (np.abs(driftHack[TASK][SUB] - (np.mean(driftHack[TASK][SUB]) - np.std(driftHack[TASK][SUB])))).argmin()
            evox[2] = (np.abs(driftHack[TASK][SUB] - np.mean(driftHack[TASK][SUB]))).argmin()
            evox[3] = (np.abs(driftHack[TASK][SUB] - (np.mean(driftHack[TASK][SUB]) + np.std(driftHack[TASK][SUB])))).argmin()
        else:
            counter = 1
            for PERC in percentiles:
                evox[counter] = find_nearest_percentile_index(driftHack[TASK][SUB], PERC)
                counter += 1
        evox[4] = np.unravel_index(np.argmax(driftHack[TASK][SUB]),driftHack[TASK][SUB].shape)[0]

        # get histogram max y-value
        yMax = plt.gca().get_ylim()[1]

        # plot single voxel timeseries
        for VOX in range(len(evox)):

            # add vertical bars to histogram
            plt.subplot(spMap.shape[0], spMap.shape[1], 1)
            diffVal = driftHack[TASK][SUB][evox[VOX]]
            plt.plot([diffVal, diffVal], [0, yMax], '-', color=voxColors[VOX])

            # get time series at the extreme voxel
            y = boldData[TASK][NORM][SUB][:,evox[VOX]]
            x = np.array(range(len(y))) + 1

            # select subplot for time series line plot
            ax = plt.subplot(spMap.shape[0], spMap.shape[1], VOX + 2)
            plt.setp(ax.spines.values(), color=voxColors[VOX])
            plt.setp([ax.get_xticklines(), ax.get_yticklines()], color=voxColors[VOX])

            # plot time series
            plt.plot(x,y,'-k')

            # subplot title and axis labels
            plt.title(diffLabs[VOX])
            if TASK == 0:
                plt.xlabel('TR')
            else:
                plt.xlabel('reading stimulus flip')
            plt.ylabel('BOLD signal')

        plt.tight_layout()
        plt.show()

In [None]:
driftHackCons = [[]] * 2
smoothnessCons = [[]] * 2
driftHack_mean = [[]] * 2
smoothness_mean = [[]] * 2

# get mean drift measure across subs
for TASK in [0,1]:

    driftHackCons[TASK] = np.empty([numSubs,numVox])
    smoothnessCons[TASK] = np.empty([numSubs,numVox])

    for SUB in range(numSubs):

        # make sure everything is standardized
        driftHackCons[TASK][SUB,:] = (driftHack[TASK][SUB] - np.mean(driftHack[TASK][SUB])) / np.std(driftHack[TASK][SUB])
        smoothnessCons[TASK][SUB,:] = (smoothness[TASK][SUB] - np.mean(smoothness[TASK][SUB])) / np.std(smoothness[TASK][SUB])

    driftHack_mean[TASK] = np.mean(driftHackCons[TASK], axis=0)
    smoothness_mean[TASK]  = np.mean(smoothnessCons[TASK], axis=0)

In [None]:
smoothness_mean[TASK][0:100]

In [None]:
if saveStatMaps:
    from nilearn import image as nImage
    from nilearn import input_data

    # get masker object
    maskImg = nImage.load_img(maskFile)
    masker = input_data.NiftiMasker(maskImg)
    masker.fit_transform(maskImg)

    for TASK in [0,1]:
        smoothPath = statMapFolder + 'smoothMean_' + taskNames[TASK] + '.nii.gz'
        driftPath = statMapFolder + 'driftMean_' + taskNames[TASK] + '.nii.gz'

        # smoothness
        cleaned_img = masker.inverse_transform(smoothness_mean[TASK])
        cleaned_img.to_filename(smoothPath)

        # "drift"
        cleaned_img = masker.inverse_transform(driftHack_mean[TASK])
        cleaned_img.to_filename(driftPath)

In [None]:
# set parameters for confirming a few things about voxel-level analyses
SUB=0
TASK=0
NORM=1
VOX=1 #0=min,1=max

# get extreme correlation index
pData = corrData[TASK][NORM][SUB]
if VOX == 0:
    EIND = np.unravel_index(np.argmin(pData),pData.shape) # minimum correlation voxel index
else:
    EIND = np.unravel_index(np.argmax(pData),pData.shape) # maximum correlation voxel index

# get individual timeseries at extreme voxel
y1 = boldData[TASK][NORM][SUB][:,EIND].flatten()

# get rest-of-group mean timeseries at extreme voxel
otherSubs = np.arange(0,numSubs)
otherSubs = np.delete(otherSubs,SUB)
y2 = np.mean([boldData[TASK][NORM][i][:,EIND] for i in otherSubs], axis=0).flatten()

# normalize (z-score) the resulting rest-of-group mean timeseries
y2_norm = scaler.fit_transform(y2.reshape(len(y2),1)).flatten()

#confirm that using normalized (z-scored) group data doesn't change correlation values
print(np.corrcoef(y1,y2))
print(np.corrcoef(y1,y2_norm))

#confirm that using the fastColumnCorr function doesn't change correlation values
print(fastColumnCorr(y1.reshape(y1.shape[0],1),y2.reshape(y1.shape[0],1)))

In [None]:
# make a random vector, a
a = np.random.rand(5).reshape(5,1)
print('random vector a:')
print(np.transpose(a)[0])

# make a random vector, b
b = np.random.rand(5).reshape(5,1)
print('\nrandom vector b:')
print(np.transpose(b)[0])

# z-score normalize vectors a and b
a_norm = scaler.fit_transform(a.reshape(len(a),1)).flatten()
print('\nz-scored vector a:')
print(a_norm)
b_norm = scaler.fit_transform(b.reshape(len(b),1)).flatten()
print('\nz-scored vector b:')
print(b_norm)

# get the mean of non-normalized vectors a and b
ab_mean = np.mean([a, b],axis=0)
print('\nmean of raw vectors a and b:')
print(np.transpose(ab_mean)[0])

# normalize the mean of non-normalized vectors a and b
ab_mean_norm = scaler.fit_transform(ab_mean.reshape(len(ab_mean),1)).flatten()
print('\nz-scored mean of raw vectors a and b:')
print(ab_mean_norm)

# get the mean of normalized vectors a and b
ab_norm_mean = np.mean([a_norm, b_norm],axis=0)
print('\nmean of z-scored vectors a and b:')
print(ab_norm_mean)

# normalize the mean of normalized vectors a and b
ab_norm_mean_norm = scaler.fit_transform(ab_norm_mean.reshape(len(ab_norm_mean),1)).flatten()
print('\nz-scored mean of z-scored vectors a and b:')
print(ab_norm_mean_norm)


In [None]:
if exportHtml:
    os.chdir(htmlFolder)
    os.system('jupyter nbconvert --to html control_ISC_correlation_mystery.ipynb')
