# __FW-to-BIDS__

## Download new flywheel sessions and convert them into BIDs format 

### Needs the following user input: 
#### 1) project_name: the project name

#### 2) bids_dir: the BIDS_dir you are adding to

#### 3) skip_complete: a boolean indicating whether so skip already converted files (True) or convert all files (False)

### __Requires 1) pydeface to be installed & 2) to be run in python3__



## Imports

In [None]:
#for fw downloading
import flywheel

#for both downloading and converting to BIDS
import os
import glob
import tarfile

#for BIDS conversion
import json
import nibabel as nib
import nipype.interfaces.fsl as fsl
import numpy as np
import pandas as pd
import re
import shutil
import subprocess
import sys
from zipfile import ZipFile 

## __User Input:__

In [None]:
skip_complete = True

project_name = 'russpold/uh2aim4'
bids_dir = 'uh2aim4_bids'

#if the bids_dir doesn't exist, make it and write a blank README
if not os.path.exists(bids_dir):
    os.mkdir(bids_dir)
    open(bids_dir+'/README', 'a').close()


# __Part I Download New Sessions from Flywheel__

#### Open flywheel and get the current project

In [None]:
fw = flywheel.Client()


project = fw.lookup(project_name)
uh2aim4_id = project.id # find correct project id 

fw.get_current_user() #Shows your current login situation

In [None]:
local_sessions = glob.glob(os.path.join(bids_dir, '*/*'))
local_sessions

#### Generate list/dictionary of subject IDs

In [None]:
id_dict = {} 
ids=[]
for subject in project.subjects():
    id_dict.update({subject.label: subject.id})
    ids.append(subject.label)
    for session in subject.sessions():
        print('%s: %s' % (session.id, session.label))
    
ids

#### Make or update participants.tsv as appropriate

In [None]:
df = pd.DataFrame(ids, columns=['participant_ids'])
df.to_csv(bids_dir+'/participants.tsv',index=False,sep='\t')

#### Compare flywheel sessions to local & find the new sessions to download

In [None]:
new_sessions = []

for subject in project.subjects():
    fw_subj_sessions = subject.sessions()
    local_subj_sessions = glob.glob(os.path.join(bids_dir, 'sub-%s' % subject.label)) #CHANGE TO BIDS ONCE SCRIPT IS UP AND RUNNING  
    num_missing = len(fw_subj_sessions) - len(local_subj_sessions)
    if num_missing > 0:
        for i in range(num_missing):
            new_sessions.append(fw_subj_sessions[i])
            
            
            
print('new session(s): ')
for ses in new_sessions:
    print(ses.label)

### Download the new sessions as individual tar files
This can take a while.

In [None]:
for new_ses in new_sessions:
    print('downloading new-session-%s-%s.tar' % (new_ses.subject.code, new_ses.label))
    fw.download_tar([new_ses], 'new-session-%s-%s.tar' % (new_ses.subject.code, new_ses.label), exclude_types=['dicom', 'pfile'])
    
print('downloading complete')

# __Part II - Extract & Organize files into BIDS__

### Extract the tar files

In [None]:
for new_ses in new_sessions:
    print('extracting new-session-%s-%s.tar' % (new_ses.subject.code, new_ses.label))
    tar = tarfile.open('new-session-%s-%s.tar' % (new_ses.subject.code, new_ses.label))
    tar.extractall()
    
print('extraction complete')

### Constant Variables

In [None]:
cardiac_bids = {
    "StartTime": -30.0,
    "SamplingFrequency": .01,
    "Columns": ["cardiac"]
}

respiratory_bids = {
    "StartTime": -30.0,
    "SamplingFrequency":  .04,
    "Columns": ["respiratory"]
}

## Helper Functions

In [None]:
#standardizes file names
def clean_file(f):
    f = f.replace('task_', 'task-').replace('run_','run-').replace('_ssg','')
    return f
#renames files with standardized file names
def cleanup(path):
    for f in glob.glob(os.path.join(path, '*')):
        new_name = clean_file(f)
        os.rename(f,new_name)
        
        
def bids_anat(sub_id, anat_dir, anat_path):
    """
    Moves and converts a anatomical folder associated with a T1 or T2
    to BIDS format. Assumes files are organized appropriately
    (e.g. "project_data/s999/ses-1/anat-[T1w,T2w]")
    """
    #T1 or T2?
    path_pieces = anat_dir.split('/')
    anat_type = [x for x in path_pieces if 'anat' in x][0].split('-')[1]
    anat_file = glob.glob(os.path.join(anat_dir,'*nii.gz'))
    assert len(anat_file) == 1, "More than one anat file found in directory %s" % anat_dir
    new_file = os.path.join(anat_path, sub_id + '_' + anat_type + '.nii.gz')
    if not os.path.exists(new_file):
        to_write.write('\tDefacing...\n')
        subprocess.call("pydeface %s --outfile %s" % (anat_file[0], new_file), shell=True)
    else:
        to_write.write('Did not save anat because %s already exists!\n' % new_file)        


def bids_fmap(base_file_id, fmap_dir, fmap_path, func_path):
    """
    Moves and converts an epi folder associated with a fieldmap
    to BIDS format. Assumes files are organized appropriately
    (e.g. "project_data/s999/ses-1/fmap-fieldmap/[examinfo]_fieldmap.jason"
          "project_data/s999/ses-1/fmap-fieldmap/[examinfo]_fieldmap.nii.gz" )
    """
    fmap_files = glob.glob(os.path.join(fmap_dir,'*.nii.gz'))
    assert len(fmap_files) == 2, "Didn't find the correct number of files in %s" % fmap_dir
    fmap_index = [0,1]['fieldmap.nii.gz' in fmap_files[1]]
    fmap_file = fmap_files[fmap_index]
    mag_file = fmap_files[1-fmap_index]
    fmap_name = base_file_id + '_' + 'fieldmap.nii.gz'
    mag_name = base_file_id + '_' + 'magnitude.nii.gz'
    if not os.path.exists(os.path.join(fmap_path, fmap_name)):
        shutil.copyfile(fmap_file, os.path.join(fmap_path, fmap_name))
        shutil.copyfile(mag_file, os.path.join(fmap_path, mag_name))
        func_runs = [os.sep.join(os.path.normpath(f).split(os.sep)[-3:]) for f in glob.glob(os.path.join(func_path,'*task*bold.nii.gz'))]
        fieldmap_meta = {'Units': 'Hz', 'IntendedFor': func_runs}
        json.dump(fieldmap_meta,open(os.path.join(fmap_path, base_file_id + '_fieldmap.json'),'w'))
    else:
        to_write.write('Did not save fmap_epi because %s already exists!\n' % os.path.join(fmap_path, fmap_name))

#bids_sbref(base_file_id, sbref_dir, func_path, bids_dir)        
def bids_sbref(base_file_id, sbref_dir, func_path, bids_dir):
    """
    Moves and converts an epi folder associated with a sbref
    calibration scan to BIDS format. Assumes tasks are organized appropriately
    (e.g. "project_data/s999/ses-1/task-rest_run-1_sbref/[examinfo].json"
          "project_data/s999/ses-1/task-rest_run-1_sbref/[examinfo].nii.gz" )    
    """
    filename = '_'.join(os.path.basename(sbref_dir).split('_'))
    sbref_files = glob.glob(os.path.join(sbref_dir,'*.nii.gz'))
    # remove files that are sometimes added, but are of no interest
    sbref_files = [i for i in sbref_files if 'phase' not in i]
    assert len(sbref_files) <= 1, "More than one func file found in directory %s" % sbref_dir
    if len(sbref_files) == 0:
        to_write.write('Skipping %s, no nii.gz file found\n' % sbref_dir)
        return

    # bring to subject directory and divide into sbref and bold
    sbref_file = clean_file(os.path.join(func_path, base_file_id + '_' + filename + '.nii.gz'))
    # check if file exists. If it does, check if the saved file has more time points
    if os.path.exists(sbref_file):
        to_write.write('%s already exists!\n' % sbref_file)
        saved_shape = nib.load(sbref_file).shape
        current_shape = nib.load(sbref_files[0]).shape
        to_write.write('Dimensions of saved image: %s\n' % list(saved_shape))
        to_write.write('Dimensions of current image: %s\n' % list(current_shape))
        if (current_shape[-1] <= saved_shape[-1]):
            to_write.write('Current image has fewer or equivalent time points than saved image. Exiting...\n')
            return
        else:
            to_write.write('Current image has more time points than saved image. Overwriting...\n')
    # save sbref image to bids directory
    shutil.copyfile(sbref_files[0], sbref_file)
    # get metadata
    sbref_meta_path = clean_file(os.path.join(bids_dir, re.sub('_run[-_][0-9]','',filename) + '.json'))
    if not os.path.exists(sbref_meta_path):
        try:
            json_file = [x for x in glob.glob(os.path.join(sbref_dir,'*.json')) 
                            if 'qa' not in x][0]
            func_meta = get_functional_meta(json_file, filename)
            json.dump(func_meta,open(sbref_meta_path,'w'))
        except IndexError:
            to_write.write("Metadata couldn't be created for %s\n" % sbref_file)        

            
def task_filter(task_dirs):
    """
    Filters out all but the newest run of a task with duplicates
    """
    out_dirs = task_dirs
    for task in sorted(task_dirs):
        tasks = [x for x in task_dirs if task in x]
        if len(tasks) > 1:
            tasks = sorted(tasks, reverse=True)
            to_be_removed = tasks[1:]
            out_dirs = [x for x in out_dirs if x not in to_be_removed]
    return(out_dirs)

#bids_task(base_file_id, task_dir, func_path, bids_dir)
def bids_task(base_file_id, task_dir, func_path, bids_dir):
    """
    Moves and converts an epi folder associated with a task
    to BIDS format. Assumes tasks are organized appropriately
    (e.g. "project_data/s999/ses-1/task-stopSignal_run-1_ssg/[examinfo].json"
          "project_data/s999/ses-1/task-stopSignal_run-1_ssg/[examinfo].nii.gz" ) 
    """
    taskname = os.path.basename(task_dir)
    task_file = [f for f in glob.glob(os.path.join(task_dir,'*1.nii.gz')) if "fieldmap" not in f]
    assert len(task_file) <= 1, "More than one func file found in directory %s" % task_dir
    if len(task_file) == 0:
        to_write.write('Skipping %s, no nii.gz file found\n' % task_dir)
        return

    # bring to subject directory and divide into sbref and bold
    
    bold_file = os.path.join(func_path, base_file_id + '_' + taskname + '.nii.gz')
    bold_file = bold_file.replace('_ssg*', '_bold')
    # check if file exists. If it does, check if the saved file has more time points
    if os.path.exists(bold_file):
        to_write.write('%s already exists!\n' % bold_file)
        saved_shape = nib.load(bold_file).shape
        current_shape = nib.load(task_file[0]).shape
        to_write.write('Dimensions of saved image: %s\n' % list(saved_shape))
        to_write.write('Dimensions of current image: %s\n' % list(current_shape))
        if (current_shape[-1] <= saved_shape[-1]):
            to_write.write('Current image has fewer or equal time points than saved image. Exiting...\n')
            return
        else:
            to_write.write('Current image has more time points than saved image. Overwriting...\n')
    # save bold image to bids directory
    shutil.copyfile(task_file[0], bold_file)
    # get epi metadata
    taskname_pieces = taskname.split('_')
    bold_meta_path = os.path.join(bids_dir, re.sub('_run[-_][0-9]','',taskname_pieces[0]) + '_bold.json')
    bold_meta_path = clean_file(bold_meta_path)
    if not os.path.exists(bold_meta_path):
        meta_file = [x for x in glob.glob(os.path.join(task_dir,'*.json')) if 'qa' not in x][0]
        func_meta = get_functional_meta(meta_file, taskname)
        json.dump(func_meta,open(bold_meta_path,'w'))
    # get physio if it exists
    physio_file = glob.glob(os.path.join(task_dir, '*physio.zip'))
    if len(physio_file)>0:
        assert len(physio_file)==1, ("More than one physio file found in directory %s" % task_dir)
        with ZipFile(physio_file[0], 'r') as zipf:
            zipf.extractall(path=func_path)
        # extract the actual filename of the physio data
        physio_file = os.path.basename(physio_file[0])[:-4]
        for pfile in glob.iglob(os.path.join(func_path, physio_file, '*Data*')):
            pname = 'respiratory' if 'RESP' in pfile else 'cardiac'
            new_physio_file = bold_file.replace('_bold.nii.gz', 
                                '_recording-' + pname + '_physio.tsv.gz')
            f = np.loadtxt(pfile)
            np.savetxt(new_physio_file, f, delimiter = '\t')
        shutil.rmtree(os.path.join(func_path, physio_file))

# func_meta = get_functional_meta(meta_file, filename)        
def get_functional_meta(json_file, taskname):
    """
    Returns BIDS meta data for bold using JSON file
    """
    meta_file = json.load(open(json_file,'r'))
    meta_data = {}
    mux = meta_file['num_bands']
    nslices = meta_file['num_slices'] * mux
    tr = meta_file['tr']
    n_echoes = meta_file['acquisition_matrix'][0] 

    # fill in metadata
    meta_data['TaskName'] = taskname.split('_')[0]
    meta_data['EffectiveEchoSpacing'] = meta_file['effective_echo_spacing']
    meta_data['EchoTime'] = meta_file['te']
    meta_data['FlipAngle'] = meta_file['flip_angle']
    meta_data['RepetitionTime'] = tr
    # slice timing
    meta_data['SliceTiming'] = meta_file['slice_timing']
    total_time = (n_echoes-1)*meta_data['EffectiveEchoSpacing']
    meta_data['TotalReadoutTime'] = total_time
    meta_data['PhaseEncodingDirection'] = ['i','j','k'][meta_file['phase_encode_direction']] + '-'        
    return meta_data


def get_session_num(fly_path, sub_id, bids_dir):
    '''
    takes in a flywheel path the current sub ID and the bids directory
    and figures out which session # the current session would take
    (e.g. '10099' -> 'ses-2' if it's the subject's second session)
    '''
    curr_bids_sessions = glob.glob(bids_dir + '/' + sub_id + '/*')
    base_ses_num = len(curr_bids_sessions) + 1
    curr_session = fly_path.split('/')[4]
    curr_fly_sessions = [path.split('/')[4] for path in glob.glob(fly_dir + '/' + sub_id + '/*')]
    curr_fly_sessions.sort()
    session_num = base_ses_num + curr_fly_sessions.index(curr_session)
    return('ses-%d' % session_num)
    

def get_subj_path(fly_path, bids_dir,):
    """
    Takes a flywheel path and returns a subject id and session #
    for an appropriate BIDS path.
    DOES NOT CURRENTLY MAKE USE OF id_correction_dict
    """
    path_pieces = fly_path.split('/')
    sub_id = path_pieces[3]
    session = path_pieces[4]
    if '-' in session:
        subj_path = os.path.join(bids_dir, 'sub-'+sub_id, session)
    else:
        subj_path = os.path.join(bids_dir, 'sub-'+sub_id, get_session_num(fly_path, sub_id, bids_dir))
    return subj_path


def mkdir(path):
    try:
        os.mkdir(path)
    except OSError:
        to_write.write('Directory %s already existed\n' % path)
    return path

## __Main BIDS Converter Function__

In [None]:
def bids_subj(subj_path, bids_dir, fly_path, skip_complete=False):
    """
    Takes a subject path (the BIDS path to the subject directory),
    a data path (the path to the BIDS directory), and a 
    fly_path (the path to the subject's data in the original format) 
    and moves/converts that subject's data to BIDS
    """
    if os.path.exists(subj_path) and skip_complete:
        to_write.write("Path %s already exists. Skipping.\n\n" % subj_path)
    else:
        to_write.write("********************************************\n")
        to_write.write("BIDSifying %s\n" % subj_path)
        to_write.write("Using flywheel path: %s\n" % fly_path)
        to_write.write("********************************************\n\n")
        # extract subject ID
        split_path = os.path.normpath(subj_path).split(os.sep)
        sub_id = [x for x in split_path if 'sub' in x][0]
        ses_id = [x for x in split_path if 'ses' in x][0]
        base_file_id = sub_id + '_' + ses_id
        # split subject path into a super subject path and a session path
        session_path = subj_path
        subj_path = os.path.split(subj_path)[0]
        mkdir(subj_path)
        mkdir(session_path)
        anat_path = mkdir(os.path.join(session_path,'anat'))
        func_path = mkdir(os.path.join(session_path,'func'))
        fmap_path = mkdir(os.path.join(session_path,'fmap'))
        # strip paths for rsync transfer
        stripped_anat_path = os.sep.join(anat_path.split(os.sep)[-3:-1])
        stripped_func_path = os.sep.join(func_path.split(os.sep)[-3:-1])
        stripped_fmap_path = os.sep.join(fmap_path.split(os.sep)[-3:-1])

        # anat files        
        to_write.write(anat_path)
        to_write.write('\nBIDSifying anatomy...\n')

        anat_dirs = sorted(glob.glob(os.path.join(fly_path,'*anat*')))[::-1]
        for anat_dir in anat_dirs:
            to_write.write('\t' + anat_dir + '\n')
            bids_anat(base_file_id, anat_dir, anat_path)

        # sbref files
        to_write.write('\nBIDSifying sbref...\n')

        sbref_dirs = sorted(glob.glob(os.path.join(fly_path,'*sbref*')))[::-1]
        for sbref_dir in sbref_dirs:
            to_write.write('\t' + sbref_dir + '\n')
            bids_sbref(base_file_id, sbref_dir, func_path, bids_dir)

        # task files        
        to_write.write('\nBIDSifying task...\n')

        task_dirs = sorted(glob.glob(os.path.join(fly_path,'*task*')))
        task_dirs = task_filter(task_dirs)
        for task_dir in [x for x in task_dirs if 'sbref' not in x]:
            to_write.write('\t' + task_dir + '\n')
            bids_task(base_file_id, task_dir, func_path, bids_dir)

        # cleanup
        cleanup(func_path)

        # fmap files
        to_write.write('\nBIDSifying fmap...\n')

        fmap_dirs = sorted(glob.glob(os.path.join(fly_path,'*fieldmap*')))[::-1]
        for fmap_dir in fmap_dirs:
            to_write.write('\t' + fmap_dir + '\n')
            bids_fmap(base_file_id, fmap_dir, fmap_path, func_path)

## __Convert fw files to BIDS format__

### Constants generated by the Script
These should not need to be messed with

In [None]:
fly_dir = os.path.join('scitran', project_name)
fly_paths = glob.glob(os.path.join(fly_dir, '*/*'))
study_id = fly_dir.strip(os.sep).split(os.sep)[2]

write_out = '%s_FLY_to_BIDS.txt' % study_id
to_write = open(write_out, 'w')




header = {'Name': study_id, 'BIDSVersion': '1.1-rc1'}
json.dump(header,open(os.path.join(bids_dir, 'dataset_description.json'),'w'))
error_file = os.path.join(bids_dir, 'error_record.txt')

### For each new session, convert to BIDS

In [None]:
for i, fly_path in enumerate(sorted(fly_paths)):
    to_write.write("BIDSifying path %s out of %s\n" % (str(i+1), str(len(fly_paths))))
    subj_path  = get_subj_path(fly_path, bids_dir)
    print('Flywheel path:', fly_path)
    print('Subject Path: ', subj_path)
    if subj_path == None:
        to_write.write("Couldn't find subj_path for %s\n" % fly_path)
        with open(error_file, 'a') as filey:
            filey.write("Couldn't find subj_path for %s\n" % fly_path)
        continue
    bids_subj(subj_path, bids_dir, fly_path, skip_complete)
    to_write.flush()

# add physio metadata
if not os.path.exists(os.path.join(bids_dir, 'recording-cardiac_physio.json')):
    if len(glob.glob(os.path.join(bids_dir, 'sub-*', 'ses-*', 'func', '*cardiac*'))) > 0:
        json.dump(cardiac_bids, open(os.path.join(bids_dir, 'recording-cardiac_physio.json'),'w'))
if not os.path.exists(os.path.join(bids_dir, 'recording-respiratory_physio.json')):
    if len(glob.glob(os.path.join(bids_dir, 'sub-*', 'ses-*', 'func', '*respiratory*'))) > 0:
        json.dump(respiratory_bids, open(os.path.join(bids_dir, 'recording-respiratory_physio.json'),'w'))


# Housekeeping

#### cleanup file names and close write object

In [None]:
cleanup(bids_dir)
to_write.close()

#### Remove tar files

In [None]:
for new_ses in new_sessions:
    os.remove('new-session-%s-%s.tar' % (new_ses.subject.code, new_ses.label))

#### Remove Flywheel Directory (extracted tar files)

In [None]:
shutil.rmtree('scitran')

In [None]:
fly_path = fly_paths[0]
subj_path= get_subj_path(fly_path, bids_dir)
task_dirs = sorted(glob.glob(os.path.join(fly_path,'*task*')))
task_dirs