In [2]:

import os
import sys
sys.path.append('/gpfs/milgram/project/turk-browne/projects/rtSynth_rt/')
import argparse
import numpy as np
import nibabel as nib
import scipy.io as sio
import subprocess
from scipy.stats import zscore
from nibabel.nicom import dicomreaders
import pydicom as dicom  # type: ignore
import time
from glob import glob
import shutil
from nilearn.image import new_img_like
import joblib
import rtCommon.utils as utils
from rtCommon.utils import loadConfigFile
import pickle5 as pickle
def save_obj(obj, name):
    with open(name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

def load_obj(name):
    with open(name + '.pkl', 'rb') as f:
        return pickle.load(f)
# from rtCommon.fileClient import FileInterface
# import rtCommon.projectUtils as projUtils
# from rtCommon.imageHandling import readRetryDicomFromFileInterface, getDicomFileName, convertDicomImgToNifti


argParser = argparse.ArgumentParser()
argParser.add_argument('--config', '-c', default='sub002.ses4.toml', type=str, help='experiment file (.json or .toml)')
argParser.add_argument('--skipPre', '-s', default=0, type=int, help='skip preprocess or not')
argParser.add_argument('--skipGreedy', '-g', default=0, type=int, help='skip greedy or not')
argParser.add_argument('--testRun', '-t', default=None, type=int, help='testRun, can be [None,1,2,3,4,5,6,7,8]')
argParser.add_argument('--scan_asTemplate', '-a', default=1, type=int, help="which scan's middle dicom as Template?")

args = argParser.parse_args("")
from rtCommon.cfg_loading import mkdir,cfg_loading
# config="sub001.ses2.toml"
cfg = cfg_loading(args.config)

conda env=/gpfs/milgram/project/turk-browne/users/kp578/CONDA/rtcloud
toml filename=/gpfs/milgram/project/turk-browne/projects/rtSynth_rt/projects/rtSynth_rt/conf/sub002.ses4.toml


In [6]:

def behaviorDataLoading(cfg,curr_run):
    '''
    extract the labels which is selected by the subject and coresponding TR and time
    check if the subject's response is correct. When Item is A,bed, response should be 1, or it is wrong
    '''
    behav_data = pd.read_csv(f"{cfg.recognition_dir}{cfg.subjectName}_{curr_run}.csv")

    # the item(imcode) colume of the data represent each image in the following correspondence
    imcodeDict={
    'A': 'bed',
    'B': 'chair',
    'C': 'table',
    'D': 'bench'}

    # When the imcode code is "A", the correct response should be '1', "B" should be '2'
    correctResponseDict={
    'A': 1,
    'B': 2,
    'C': 1,
    'D': 2}

    # extract the labels which is selected by the subject and coresponding TR and time
    behav_data = behav_data[['TR', 'image_on', 'Resp',  'Item']] # the TR, the real time it was presented, 

    # 为了处理 情况 A.被试的反应慢了一个TR，或者 B.两个按钮都被按了(这种情况下按照第二个按钮处理)
    # 现在的问题是”下一个TR“可能超过了behav_data的长度
    # this for loop is to deal with the situation where Resp is late for 1 TR, or two buttons are pressed. 
    # when Resp is late for 1 TR, set the current Resp as the later Response.
    # when two buttons are pressed, set the current Resp as the later Response because the later one should be the real choice
    for curr_trial in range(behav_data.shape[0]):
        if behav_data['Item'].iloc[curr_trial]  in ["A","B","C","D"]:
            if curr_trial+1<behav_data.shape[0]: # 为了防止”下一个TR“超过behav_data的长度  中文
                if behav_data['Resp'].iloc[curr_trial+1] in [1.0,2.0]:
                    behav_data['Resp'].iloc[curr_trial]=behav_data['Resp'].iloc[curr_trial+1]


    behav_data=behav_data.dropna(subset=['Item'])

    # check if the subject's response is correct. When Item is A,bed, response should be 1, or it is wrong
    isCorrect=[]
    for curr_trial in range(behav_data.shape[0]):
        isCorrect.append(correctResponseDict[behav_data['Item'].iloc[curr_trial]]==behav_data['Resp'].iloc[curr_trial])
    print(f"behavior pressing accuracy for run {curr_run} = {np.mean(isCorrect)}")

    behav_data['isCorrect']=isCorrect # merge the isCorrect clumne with the data dataframe
    behav_data['subj']=[cfg.subjectName for i in range(len(behav_data))]
    behav_data['run_num']=[int(curr_run) for i in range(len(behav_data))]
    behav_data=behav_data[behav_data['isCorrect']] # discard the trials where the subject made wrong selection
    return behav_data


In [None]:
#### # import and set up environment
import sys
from subprocess import call
import nibabel as nib
import pydicom as dicom
import numpy as np
import time
import os
from glob import glob
import shutil
import pandas as pd
# from import convertDicomFileToNifti
from rtCommon.imageHandling import convertDicomImgToNifti, readDicomFromFile
from rtCommon.cfg_loading import mkdir,cfg_loading


scan_asTemplate=1
'''
purpose: 
    prepare data for the model training code.
steps:
    convert all dicom files into nii files in the temp dir. 
    find the middle volume of the run1 as the template volume
    align every other functional volume with templateFunctionalVolume (3dvolreg)
'''
# select a list of run IDs based on the runRecording.csv, actualRuns would be [1,2] is the 1st and the 3rd runs are recognition runs.
runRecording = pd.read_csv(f"{cfg.recognition_dir}../runRecording.csv")
actualRuns = list(runRecording['run'].iloc[list(np.where(1==1*(runRecording['type']=='recognition'))[0])])

# convert all dicom files into nii files in the temp dir. 
if os.path.exists(f"{cfg.recognition_dir}run{actualRuns[-1]}.nii") or os.path.exists(f"{cfg.recognition_dir}run{actualRuns[-1]}.nii.gz"):
    pass # 如果检测到已经存在了fslmerge的结果，就不做这一步了 中文
else:
    tmp_dir=f"{cfg.tmp_folder}{time.time()}/" ; mkdir(tmp_dir)
    dicomFiles=glob(f"{cfg.dicom_dir}/*.dcm") ; dicomFiles.sort()
    for curr_dicom in dicomFiles:
        dicomImg = readDicomFromFile(curr_dicom) # read dicom file
        convertDicomImgToNifti(dicomImg, dicomFilename=f"{tmp_dir}/{curr_dicom.split('/')[-1]}") #convert dicom to nii    
        # os.remove(f"{tmp_dir}/{curr_dicom.split('/')[-1]}") # remove temp dcm file

    # find the middle volume of the run1 as the template volume

    scan_asTemplate=str(scan_asTemplate).zfill(6)
    tmp=glob(f"{tmp_dir}001_{scan_asTemplate}*.nii") ; tmp.sort()
    # cfg.templateFunctionalVolume = f"{cfg.recognition_dir}/templateFunctionalVolume.nii" 
    if cfg.session ==1:
        call(f"cp {tmp[int(len(tmp)/2)]} {cfg.templateFunctionalVolume}", shell=True)
        call(f"cp {cfg.templateFunctionalVolume} {cfg.templateFunctionalVolume_converted}", shell=True)
    else:
        # call(f"cp {tmp[int(len(tmp)/2)]} {cfg.templateFunctionalVolume_converted}", shell=True)
        # convert cfg.templateFunctionalVolume to the previous template volume space 
        cmd=f"flirt -ref {cfg.templateFunctionalVolume} \
            -in {tmp[int(len(tmp)/2)]} \
            -out {cfg.templateFunctionalVolume_converted}"
        print(cmd)
        call(cmd,shell=True) 

    # align every other functional volume with templateFunctionalVolume (3dvolreg)
    allTRs=glob(f"{tmp_dir}/001_*.nii") ; allTRs.sort()

    for curr_run in actualRuns:
        outputFileNames=[]
        runTRs=glob(f"{tmp_dir}/001_{str(curr_run).zfill(6)}_*.nii") ; runTRs.sort()
        for curr_TR in runTRs:
            command = f"3dvolreg \
                -base {cfg.templateFunctionalVolume_converted} \
                -prefix  {curr_TR[0:-4]}_aligned.nii \
                {curr_TR}"
            call(command,shell=True)
            outputFileNames.append(f"{curr_TR[0:-4]}_aligned.nii")
        files=''
        for f in outputFileNames:
            files=files+' '+f
        command=f"fslmerge -t {cfg.recognition_dir}run{curr_run}.nii {files}"
        print('running',command)
        call(command, shell=True)

    # remove the tmp folder
    shutil.rmtree(tmp_dir)

'''
for each run, 
    load behavior data 
    push the behavior data back for 2 TRs
    save the brain TRs with images
    save the behavior data
'''

for curr_run_behav,curr_run in enumerate(actualRuns):
    # load behavior data
    behav_data = behaviorDataLoading(cfg,curr_run_behav+1) # behav_data 的数据的TR是从0开始的。brain_data 也是 中文
    print(f"behav_data.shape={behav_data.shape}") #len = 48 ，最后一个TR ID是 142 中文

    # brain data is first aligned by pushed back 2TR(4s)
    brain_data = nib.load(f"{cfg.recognition_dir}run{curr_run}.nii.gz").get_data() ; brain_data=np.transpose(brain_data,(3,0,1,2))
    print(f"brain_data.shape={brain_data.shape}") #len = 144
    Brain_TR=np.arange(brain_data.shape[0]) #假设brain_data 有144个，那么+2之后的Brain_TR就是2，3，。。。，145.一共144个TR。中文
    Brain_TR = Brain_TR + 2

    # select volumes of brain_data by counting which TR is left in behav_data
    Brain_TR=Brain_TR[list(behav_data['TR'])] # original TR begin with 0 #筛选掉无用的TR，由于两个都是从0开始计数的，所以是可以的。 中文
    # 筛选掉之后的Brain_TR长度是 48 最后一个ID是144 中文
    # Brain_TR[-1] 是想要的最后一个TR的ID，看看是否在brain_data里面？如果不在的话，那么删除最后一个Brain_TR，也删除behav里面的最后一行 中文    
    # 如果大脑数据的长度没有行为学数据长（比如大脑只收集到144个TR，然后我现在想要第145个TR的数据，这提醒我千万不要过早结束recognition run） 中文
    if Brain_TR[-1]>=brain_data.shape[0]: # when the brain data is not as long as the behavior data, delete the last row
        Brain_TR = Brain_TR[:-1]
        #behav_data = behav_data.drop([behav_data.iloc[-1].TR])
        behav_data.drop(behav_data.tail(1).index,inplace=True)

    print(f"Brain_TR.shape={Brain_TR.shape}")
    brain_data=brain_data[Brain_TR]
    print(f"brain_data.shape={brain_data.shape}")
    np.save(f"{cfg.recognition_dir}brain_run{curr_run}.npy", brain_data)
    # save the behavior data
    behav_data.to_csv(f"{cfg.recognition_dir}behav_run{curr_run}.csv")



In [17]:
behav_data = behaviorDataLoading(cfg,curr_run_behav+1)
print(behav_data.shape)
# brain data is first aligned by pushed back 2TR(4s)
brain_data = nib.load(f"{cfg.recognition_dir}run{curr_run}.nii.gz").get_data() ; brain_data=np.transpose(brain_data,(3,0,1,2))
print(brain_data.shape)

behavior pressing accuracy for run 1 = 1.0
(48, 7)



* deprecated from version: 3.0
* Will raise <class 'nibabel.deprecator.ExpiredDeprecationError'> as of version: 5.0
  after removing the cwd from sys.path.


(144, 64, 64, 36)


In [18]:
Brain_TR=np.arange(brain_data.shape[0])
Brain_TR = Brain_TR + 2
Brain_TR=Brain_TR[list(behav_data['TR'])]


Unnamed: 0,TR,image_on,Resp,Item,isCorrect,subj,run_num
3,9,18.005091,2.0,B,True,sub002,1
6,12,24.005132,1.0,A,True,sub002,1
9,15,30.005165,1.0,C,True,sub002,1
13,19,38.005211,2.0,D,True,sub002,1
16,22,44.005242,1.0,C,True,sub002,1
19,25,50.005339,2.0,D,True,sub002,1
22,28,56.005317,2.0,B,True,sub002,1
24,30,60.005341,1.0,A,True,sub002,1
27,33,66.005377,1.0,C,True,sub002,1
29,35,70.005403,2.0,D,True,sub002,1


In [16]:
print(curr_run_behav,curr_run)

0 1
