In [None]:
"""

This notebook reconstructs full storytelling time series for each subject
from the parseEPI outputted listener and speaker time series.

It saves these out as nifti files with condition labels in the file
name (independent, joint) -- 2 files per subject.

These nifti files can be used by make_hyperalignment_datasets.py to
make pymvpa datasets. Note that these can later be sliced, for
example, into just listening or just reading intervals within the
dataset format to try things like hyperaligning on the listening
task and testing on the concatenated storytelling listening data, etc...

"""


In [1]:
import pickle
import numpy as np
import pandas as pd
import scipy.io as sio
from scipy import stats
from nilearn import image as nImage
from nilearn import input_data
# from nilearn import datasets
# from nilearn import surface
# from nilearn import plotting

In [2]:
saveFolder = '/dartfs-hpc/rc/lab/W/WheatleyT/f00589z/hyperscanning/preprocessing/hyperalignment/input_nifti_files/'

In [3]:
loadFolder = '/dartfs-hpc/rc/lab/W/WheatleyT/f00589z/hyperscanning/storytelling/misc/'
with open(loadFolder + 'pairMap_all_DBIC_CBS_pairs.pkl', 'rb') as f:
    pairMap = pickle.load(f)

In [4]:
# kind of hacky but predefining the total number of TRs that will be in each concatenated time series
totTRs = 615

# number of TRs per turn
TRsPerTurn = 41

# number of speech turns per participant per condition
numTurns = round(totTRs / TRsPerTurn)

numPairs = 8

# get speaker/listener TR indices
turnTRs = [[]] * numTurns * 2
for TURN in range(int(numTurns * 2)):
    if TURN == 0:
        inds = np.array(list(range(TRsPerTurn)))
    else:
        inds = inds + TRsPerTurn
    turnTRs[TURN] = inds

In [12]:
pairMap

Unnamed: 0,dbicNum,dbicID,cbsNum,cbsID,pairType,condition,dbicSpeaker,sFile,lFile,duration
0,2,sid000007,2,hid000002,1,0,0,/dartfs-hpc/rc/lab/W/WheatleyT/f00589z/hypersc...,/dartfs-hpc/rc/lab/W/WheatleyT/f00589z/hypersc...,2.13
1,2,sid000007,2,hid000002,1,0,1,/dartfs-hpc/rc/lab/W/WheatleyT/f00589z/hypersc...,/dartfs-hpc/rc/lab/W/WheatleyT/f00589z/hypersc...,2.12
2,2,sid000007,2,hid000002,1,1,0,/dartfs-hpc/rc/lab/W/WheatleyT/f00589z/hypersc...,/dartfs-hpc/rc/lab/W/WheatleyT/f00589z/hypersc...,2.17
3,2,sid000007,2,hid000002,1,1,1,/dartfs-hpc/rc/lab/W/WheatleyT/f00589z/hypersc...,/dartfs-hpc/rc/lab/W/WheatleyT/f00589z/hypersc...,2.13
4,3,sid000009,3,hid000003,1,0,0,/dartfs-hpc/rc/lab/W/WheatleyT/f00589z/hypersc...,/dartfs-hpc/rc/lab/W/WheatleyT/f00589z/hypersc...,2.12
5,3,sid000009,3,hid000003,1,0,1,/dartfs-hpc/rc/lab/W/WheatleyT/f00589z/hypersc...,/dartfs-hpc/rc/lab/W/WheatleyT/f00589z/hypersc...,2.1
6,3,sid000009,3,hid000003,1,1,0,/dartfs-hpc/rc/lab/W/WheatleyT/f00589z/hypersc...,/dartfs-hpc/rc/lab/W/WheatleyT/f00589z/hypersc...,2.1
7,3,sid000009,3,hid000003,1,1,1,/dartfs-hpc/rc/lab/W/WheatleyT/f00589z/hypersc...,/dartfs-hpc/rc/lab/W/WheatleyT/f00589z/hypersc...,2.1
8,4,sid000560,4,hid000004,1,0,0,/dartfs-hpc/rc/lab/W/WheatleyT/f00589z/hypersc...,/dartfs-hpc/rc/lab/W/WheatleyT/f00589z/hypersc...,2.11
9,4,sid000560,4,hid000004,1,0,1,/dartfs-hpc/rc/lab/W/WheatleyT/f00589z/hypersc...,/dartfs-hpc/rc/lab/W/WheatleyT/f00589z/hypersc...,2.13


In [5]:
condInds = [[]] * 2
for COND in [0,1]:
    condInds[COND] = np.where(pairMap['condition'] == COND)[0]

In [7]:
# preallocate data frame
fileList = pd.DataFrame(index=np.arange(int(pairMap.shape[0]*2)), columns=['subID','site','condition','speaker','file'])

site = ['DBIC','CBS']
siteID = ['dbicID','cbsID']
fileType = ['lFile','sFile']

In [8]:
# fill in fileList
fROW = 0
for SITE in [0,1]: # dbic, cbs
    for pROW in range(pairMap.shape[0]): # for each row of pairMap...

        fileList['subID'][fROW] = pairMap[siteID[SITE]][pROW]
        fileList['site'][fROW] = site[SITE]
        fileList['condition'][fROW] = pairMap['condition'][pROW]
        if SITE == pairMap['dbicSpeaker'][pROW]:
           fileList['speaker'][fROW] = 0
        else:
           fileList['speaker'][fROW] = 1
        fileList['file'][fROW] = pairMap[fileType[fileList['speaker'][fROW] == 1]][pROW]

        # increment fileList row counter
        fROW += 1

In [9]:
resampledMaskFile = '/dartfs-hpc/rc/lab/W/WheatleyT/f00589z/hyperscanning/misc/mni_icbm152_nlin_asym_09c/mni_icbm152_t1_tal_nlin_asym_09c_mask_RESAMPLED.nii'

In [15]:
for ROW in range(fileList.shape[0]):
    fileList['file'][ROW] = fileList['file'][ROW].replace('2021','newMask')

fileList

'/dartfs-hpc/rc/lab/W/WheatleyT/f00589z/hyperscanning/storytelling/parseEPI_output_files/sub-sid000007_ses-pair02_task-storytelling2_run-02_bold_space-MNI152NLin2009cAsym_preproc_nuisRegr_newMask_listener.mat'

In [None]:
# get number of subjects
numSubs = len(np.unique(fileList['subID']))

# preallocate
data = [[]] * numSubs

# condition labels
condLabs = ['ind','joint']

ROW = 0
for SUB in range(numSubs):
    data[SUB] = [[]] * 2
    for COND in [0,1]:

        # get .mat file names
        if fileList['speaker'][ROW]: # if the first row is a speaker file...

            sFile = fileList['file'][ROW]
            lFile = fileList['file'][ROW+1]

        else:

            lFile = fileList['file'][ROW]
            sFile = fileList['file'][ROW+1]

        # get corresponding nifti file name (arbitrarily use speaker file as reference)
        niiFile = sFile[0:sFile.find('nuisRegr')] + 'nuisRegr_newMask.nii.gz'
        niiFile = niiFile.replace('parseEPI_output_files','nuisRegr_output_files')

        # preallocate separated speaker/listener data
        sepData = [[]] * 2 # 0=speaker, 1=listener

        # load speaker data
        print('loading ' + sFile + '...')
        dummyFile = sio.loadmat(sFile)
        if fileList['site'][ROW] == 'DBIC': # dbic
            order = [0,1] # listener, speaker
            sepData[0] = dummyFile['dbicSpeaker']
        else: # cbs
            order = [1,0] # speaker, listener
            sepData[0] = dummyFile['cbsSpeaker']
        del dummyFile

        # load listener data
        print('loading ' + lFile + '...')
        dummyFile = sio.loadmat(lFile)
        if fileList['site'][ROW] == 'DBIC': # dbic
            sepData[1] = dummyFile['dbicListener']
        else: # cbs
            sepData[1] = dummyFile['cbsListener']
        del dummyFile

        # preallocate
        data[SUB][COND] = np.empty([int(totTRs*2),sepData[0].shape[1]])

        # initialize row indices
        rowInds = np.copy(turnTRs[0])

        # for each pair of speaker-listener turns
        for TURN in range(numTurns):

            for SPEAKER in order:

                # get data
                data[SUB][COND][rowInds,:] = sepData[SPEAKER][turnTRs[TURN],:]
                rowInds += TRsPerTurn

        # standardize the time series
        data[SUB][COND] = stats.zscore(data[SUB][COND],axis=0)

        #%% load whole brain mask
        maskImg = nImage.load_img(resampledMaskFile)

        # initialize masker object from whole brain mask and nuisRegr output .nii file
        masker = input_data.NiftiMasker(maskImg)
        masker.fit_transform(niiFile)

        #%% make new nifti with parsedEPI time series
        outputFile = niiFile.replace(niiFile[0:niiFile.find('sub-')],'')
        outputFile = outputFile.replace(niiFile[-7:],'_interp_uncut_' + condLabs[COND] + '.nii.gz')
        outputFile = saveFolder + outputFile
        print('saving file ' + str(SUB*2+COND+1) + ' of ' + str(int(fileList.shape[0] / 2)) + ' to: ')
        print(outputFile)
        cleaned_img = masker.inverse_transform(data[SUB][COND])
        cleaned_img.to_filename(outputFile)

        # increment row indices
        ROW += 2

loading /dartfs-hpc/rc/lab/W/WheatleyT/f00589z/hyperscanning/storytelling/parseEPI_output_files/sub-sid000007_ses-pair02_task-storytelling2_run-02_bold_space-MNI152NLin2009cAsym_preproc_nuisRegr_newMask_speaker.mat...
loading /dartfs-hpc/rc/lab/W/WheatleyT/f00589z/hyperscanning/storytelling/parseEPI_output_files/sub-sid000007_ses-pair02_task-storytelling2_run-02_bold_space-MNI152NLin2009cAsym_preproc_nuisRegr_newMask_listener.mat...
saving file 1 of 32 to: 
/dartfs-hpc/rc/lab/W/WheatleyT/f00589z/hyperscanning/preprocessing/hyperalignment/input_nifti_files/sub-sid000007_ses-pair02_task-storytelling2_run-02_bold_space-MNI152NLin2009cAsym_preproc_nuisRegr_newMask_interp_uncut_ind.nii.gz
loading /dartfs-hpc/rc/lab/W/WheatleyT/f00589z/hyperscanning/storytelling/parseEPI_output_files/sub-sid000007_ses-pair02_task-storytelling1_run-01_bold_space-MNI152NLin2009cAsym_preproc_nuisRegr_newMask_speaker.mat...
loading /dartfs-hpc/rc/lab/W/WheatleyT/f00589z/hyperscanning/storytelling/parseEPI_output