In [1]:
from pathlib import Path

In [34]:
mbdu_bids_root = Path('/data/MBDU/ABCD/BIDS/NKI_script/MID')
dsst_bids_root = Path('/data/ABCD_DSST/bids_20190215/')
mriqc_outdir = Path('/data/ABCD_DSST/bids_20190215/derivatives/mriqc')
container_path = Path('/data/ABCD_DSST/containers/poldracklab_mriqc-2018-08-21-8efddd374773.simg')
swarm_file = Path('/data/ABCD_DSST/swarms/mriqc_swarm/mriqc_swarm')
swarm_log = Path('/data/ABCD_DSST/swarms/mriqc_swarm/logs')

In [40]:
# make sure that there aren't any subjects 
# in DSST bids root that aren't in MBDU bids root

mbdu_subs = sorted(list(mbdu_bids_root.glob('sub-*')))
mbdu_subs = set([ss.parts[-1] for ss in mbdu_subs])
dsst_subs = sorted(list(dsst_bids_root.glob('sub-*')))
dsst_subs = set([ss.parts[-1] for ss in dsst_subs])

print(f"{len(dsst_subs.difference(mbdu_subs))} are in DSST that aren't in MBDU")
if len(dsst_subs.difference(mbdu_subs)) > 0:
    for ss in dsst_subs.difference(mbdu_subs):
        assert ss != ''
        ! rm -rf {dsst_bids_root / ss}
    
mbdu_subs = sorted(list(mbdu_bids_root.glob('sub-*')))
mbdu_subs = set([ss.parts[-1] for ss in mbdu_subs])
dsst_subs = sorted(list(dsst_bids_root.glob('sub-*')))
dsst_subs = set([ss.parts[-1] for ss in dsst_subs])
assert len(dsst_subs.difference(mbdu_subs)) == 0

In [54]:
subs = sorted(list(dsst_bids_root.glob('sub-*')))

In [55]:
bad_subs = []
for sub in subs:
    tmp = (sub / 'tmp')
    rest = (sub / 'ses-1' / 'rest')
    if (tmp.exists()) & (rest.exists()):
        bad_subs.append(sub)
        
# make sure no subjects have the tmp or rest directories
assert len(bad_subs) == 0

In [23]:
# If you don't already have your singularity bind path set,
# append this to the front of your command:
# export SINGULARITY_BINDPATH=/gs3,/gs4,/gs5,/gs6,/gs7,/gs8,/gs9,/gs10,/gs11,/spin1,/scratch,/fdb,/data,/lscratch &&

In [57]:
cmds = []
for sub in subs:
    participant_label = sub.parts[-1].split('-')[-1]
    cmd = (f'singularity run {container_path} --partipant_label {participant_label}' 
           + f' {dsst_bids_root} {mriqc_outdir} participant')
    cmds.append(cmd)

In [58]:
len(cmds)

8985

In [60]:
# test swarm command with two subjects
swarm_file.write_text('\n'.join(cmds[:2]))
swarm_file.read_text().split('\n')

['singularity run /data/ABCD_DSST/containers/poldracklab_mriqc-2018-08-21-8efddd374773.simg --partipant_label NDARINV005V6D2C /data/ABCD_DSST/bids_20190215 /data/ABCD_DSST/bids_20190215/derivatives/mriqc participant',
 'singularity run /data/ABCD_DSST/containers/poldracklab_mriqc-2018-08-21-8efddd374773.simg --partipant_label NDARINV007W6H7B /data/ABCD_DSST/bids_20190215 /data/ABCD_DSST/bids_20190215/derivatives/mriqc participant']

In [61]:
# Submitting a test
!swarm -f {swarm_file} -g 24 -t 14 --partition norm,quick --logdir {swarm_log} --time 04:00:00

20591660


In [42]:
rsync_swarm_file.write_text('\n'.join(cmds[100:]))
len(rsync_swarm_file.read_text().split('\n'))

5099

In [43]:
# Submitting for real
!swarm -f {rsync_swarm_file} -g 5 -t 2 -b 24 --partition norm,quick --logdir {rsync_swarm_log} --time 00:10:00

20463478


In [52]:
# check to see how many new subs are done
cmds = []
for sub in subs:
    tmp = (sub / 'tmp')
    rest = (sub / 'ses-1' / 'rest')
    dest = (dsst_bids_root / sub.parts[-1])
    if (not tmp.exists()) & (not rest.exists()) & (not dest.exists()):
        cmd = f'rsync -ach {sub} {dsst_bids_root}/'
        cmds.append(cmd)

In [55]:
rsync_swarm_file.write_text('\n'.join(cmds))
len(rsync_swarm_file.read_text().split('\n'))


2646

In [56]:
# Submitting 
!swarm -f {rsync_swarm_file} -g 5 -t 2 -b 24 --partition norm,quick --logdir {rsync_swarm_log} --time 00:10:00

20500070


In [57]:
# check to see how many new subs are done
cmds = []
for sub in subs:
    tmp = (sub / 'tmp')
    rest = (sub / 'ses-1' / 'rest')
    dest = (dsst_bids_root / sub.parts[-1])
    if (not tmp.exists()) & (not rest.exists()) & (not dest.exists()):
        cmd = f'rsync -ach {sub} {dsst_bids_root}/'
        cmds.append(cmd)

In [58]:
rsync_swarm_file.write_text('\n'.join(cmds))
len(rsync_swarm_file.read_text().split('\n'))

44

In [59]:
# Submitting 
!swarm -f {rsync_swarm_file} -g 5 -t 2 -b 24 --partition norm,quick --logdir {rsync_swarm_log} --time 00:10:00

20544480


In [8]:
subs = sorted(list(mbdu_bids_root.glob('sub-*')))

# check to see how many new subs are done
cmds = []
for sub in subs:
    tmp = (sub / 'tmp')
    rest = (sub / 'ses-1' / 'rest')
    dest = (dsst_bids_root / sub.parts[-1])
    if (not tmp.exists()) & (not rest.exists()) & (not dest.exists()):
        cmd = f'rsync -ach {sub} {dsst_bids_root}/'
        cmds.append(cmd)

In [12]:
rsync_swarm_file.write_text('\n'.join(cmds))
len(rsync_swarm_file.read_text().split('\n'))

1284

In [13]:
!swarm -f {rsync_swarm_file} -g 5 -t 2 -b 24 --partition norm,quick --logdir {rsync_swarm_log} --time 00:10:00

20590619
