In [30]:
from argparse import ArgumentParser
import sys
import os
import shutil
import subprocess
from pathlib import Path
import re
from filelock import FileLock
from datetime import datetime

import datalad.api as dl
import pandas as pd

In [31]:
job_name = 'sub-001A_bet'
job_id = '1111'
status_csv = '/misc/geminis2/ramirezd/fb_test/code/status.csv'
status_lockfile = '/misc/geminis2/ramirezd/fb_test/code/status_lockfile'
super_ds_id = '64f1d8ac-346d-4a0a-8c5c-8dca7e25ef7c'
clone_target = '/misc/geminis2/ramirezd/test_bet/.fairlybig/input_ria/'
push_target = '/misc/geminis2/ramirezd/test_bet/.fairlybig/output_ria/'
push_lockfile = '/misc/geminis2/ramirezd/fb_test/code/push_lockfile'
inputs = 'inputs/mri-raw/sub-001A/anat/sub-001A_T1w.nii.gz'
outputs = 'outputs/sub-001A_T1w_bet.nii.gz'
output_datasets = ['outputs']
preget_inputs = None
is_explicit = False
dl_cmd = 'bet inputs/mri-raw/sub-001A/anat/sub-001A_T1w.nii.gz outputs/sub-001A_T1w_bet.nii.gz'
commit = None
container = 'fsl-6-0-4'
message = None
ephemeral_locations = ['/tmp', '/misc/{host}[0-9]/ramirezd']
req_disk_gb = 40

host = os.uname().nodename
user= os.getenv('USER')

status_lock = FileLock(status_lockfile)
push_lock = FileLock(push_lockfile)

In [37]:
os.chdir('/misc/geminis2/ramirezd/fb_test')

In [32]:
# Functions for disk space management
def get_locations(location_list):
    """
    Return tmp and non_tmp locations from the list of location patterns.
    """
    
    tmp=[]
    not_tmp_patterns=[]
    not_tmp_locations=[]

    # tmp and non_tmp list
    for location in location_list:
        if location == '/tmp' or location == '/tmp/':
            tmp.append(location)
        else:
            not_tmp_patterns.append(location)
    
    # get non_tmp locations according to the non_tmp_patterns (the script can accept multiple location patterns)
    for not_tmp_pattern in not_tmp_patterns:
        
        for index, part in enumerate(Path(not_tmp_pattern).parts):
            if host in part:
                break

        # node location
        mount_pattern = str(Path(*list(Path(not_tmp_pattern).parts[:index+1])))
        # location inside node
        after_pattern = str(Path(*list(Path(not_tmp_pattern).parts[index+1:])))
        
        # make sure those locations are within the node with the /etc/mtab file
        with open('/etc/mtab', 'r') as mtab:
            for line in mtab.readlines():
                # which mount pattern is within the node
                pattern = re.search(f'{mount_pattern} ', line)
                # which directories (after mount pattern) are within that mount
                if pattern:
                    pattern_glob = Path(pattern.group().strip()).glob(after_pattern) 
                else:
                    continue
                # after mount pattern could retrieve multiple locations
                if pattern_glob: 
                    not_tmp_locations += [str(pg) for pg in pattern_glob]
    
    return tmp, not_tmp_locations


def get_free_disk(location):
    """
    Return location's free disk space in gb.
    """
    
    _total, _used, free = shutil.disk_usage(location)
    # transform to gb
    return free // (2**30)


def get_used_disk(location):
    """
    Return location's used disk space in gb.
    """
    
    _total, used, _free = shutil.disk_usage(location)
    # transform to gb
    return used // (2**30)


def get_available_disk_resource(location, host, status_csv):
    """
    Return available disk space available (in gb).
    """
    
    total_req_disk_others_gb = (pd.read_csv(status_csv)
    .query("location == @location and status == 'ongoing' and host == @host")
    .assign(
        used_disk_gb = lambda df_: 
            df_['location'].apply(lambda x_: get_used_disk(x_)),
        req_disk_gb = lambda df_: 
            (df_['req_disk_gb'] - df_['used_disk_gb'])
    )
    .assign(
        req_disk_gb = lambda df_: 
            df_['req_disk_gb'].mask(df_['req_disk_gb'] < 0, 0)
    )
    ['req_disk_gb']
    .sum()
    )
        
    current_free_gb = get_free_disk(location)
    
    return current_free_gb - total_req_disk_others_gb


def set_status(status_csv, job_name, job_id, req_disk_gb, host, location, job_dir, status, start):
    """
    Add a new job status.
    """
    
    status_df = pd.read_csv(status_csv)
    
    new_status = {
        'job_name':[job_name],
        'job_id':[job_id],
        'req_disk_gb':[req_disk_gb],
        'host':[host],
        'location':[location],
        'job_dir':[job_dir],
        'status':[status],
        'start':[start],
        'update':[None],
        'traceback':[None]
        }
    
    new_status = pd.DataFrame(new_status)
    
    status_df = pd.concat([status_df, new_status])
    
    status_df.to_csv(status_csv, index=False)
    
    return status_df


def update_status(status_csv, job_name, job_id, host, location, status, update, traceback=None):
    """
    Update an existing job status.
    """
    
    status_df = pd.read_csv(status_csv)
    
    is_job = (
    (status_df['job_name'] == job_name) &
    (status_df['job_id'] == job_id) &
    (status_df['host'] == host) &
    (status_df['location'] == location) 
    )
    
    status_df = (status_df
    .assign(
        status = lambda df_: df_['status'].mask(is_job, status),
        update = lambda df_: df_['update'].mask(is_job, update),
        traceback = lambda df_: df_['traceback'].mask(is_job, traceback)
        )
    )
    
    status_df.to_csv(status_csv, index=False)
    
    return status_df


# Functions for cloning and checking out
def do_dead_annex(dpath='cwd'):
    """
    Set cwd as dead annex or submodules as dead annex.
    """
    if dpath == 'cwd':
        cmd = ['git', 'annex', 'dead', 'here']
    else: 
        cmd = ['git', 'submodule', 'foreach', '--recursive', 'git', 'annex', 'dead', 'here']
    subprocess.run(cmd)
    

def do_checkout(job_name, dpath='cwd'):
    """
    Change to a job branch.
    """
    if dpath == 'cwd':
        cmd = ['git', 'checkout', '-b', job_name]
    else:
        cmd = ['git', '-C', dpath ,'checkout', '-b', job_name]
    
    subprocess.run(cmd)

    
def get_private_subdataset(clone_target, sd_path, sd_id):
    # Assume clone_target is a RIA store
    clone_path = str(Path(clone_target) / Path(sd_id[:3]) / Path(sd_id[3:]))
    
    git_clone_command = ['git', 'clone', clone_path, sd_path]
    subprocess.run(git_clone_command)
    
    git_config_annex_private = ['git', '-C', sd_path, 'config', 'annex.private', 'true']
    subprocess.run(git_config_annex_private)
    
    git_annex_init = ['git', '-C', sd_path, 'annex', 'init']
    subprocess.run(git_annex_init)


def git_add_remote(push_path, dpath='cwd'):
    if dpath == 'cwd':
        cmd = ['git', 'remote', 'add', 'outputstore', push_path]
    else:
        cmd = ['git', '-C', dpath, 'remote', 'add', 'outputstore', push_path]
        
    subprocess.run(cmd)

def git_push(dpath='cwd'):
    if dpath == 'cwd':
        cmd = ['git', 'push', 'outputstore']
    else:
        cmd = ['git', '-C', dpath, 'push', 'outputstore']
    
    subprocess.run(cmd)


# cleanup and exception handling
def cleanup(job_dir):
    subprocess.run(['chmod', '-R', '+w', job_dir])
    subprocess.run(['rm', '-rf', job_dir])


# def excepthook(exctype, value, tb):
#     with status_lock:
#         update_status(status_csv, job_name, job_id, host, location, status='error', traceback=tb)
#     print('Type:', exctype)
#     print('Value:', value)
#     print('Traceback:', tb)


In [38]:
tmp, not_tmp_locations = get_locations(ephemeral_locations)

# manage available disk space
if req_disk_gb is None:
    req_disk_gb = 0
    
with status_lock:
    
    found_location=False
    
    if tmp:
        tmp = '/tmp'
        available_disk = get_available_disk_resource(tmp, host, status_csv)
        if req_disk_gb < available_disk:
            found_location=True
            location=tmp
            
    
    elif not_tmp_locations and not found_location:
        not_tmp_df = (
            pd.DataFrame({'location':not_tmp_locations})
            .assign(available_disk = lambda df_: 
                df_['location'].apply(lambda x_: get_available_disk_resource(x_, host, status_csv))
                )
            .sort_values('free_space', ascending=False)
            )

        if req_disk_gb < not_tmp_df['available_disk'].iat[0]:
            found_location = True
            location = not_tmp_df['location'].iat[0]
            
            
    if found_location:
        job_dir = str(Path(location) / f'job-{job_name}-{user}')
        set_status(status_csv, job_name, job_id, req_disk_gb, host, location, job_dir, status='ongoing', start=datetime.today().strftime("%Y/%m/%d %H:%M:%S"))
    else:
        set_status(status_csv, job_name, job_id, req_disk_gb, host, location, job_dir=None, status='no-space', start=datetime.today().strftime("%Y/%m/%d %H:%M:%S"))
        raise Exception("Coulnd't find a place with enough disk space.")

In [39]:
try:
    clone_ria_prefix = re.search(r'ria\+\w+:\/{2}', clone_target).group()
    clone_target = clone_target.replace(clone_ria_prefix, '')
except:
    # assume ria requires a file protocol if no protocol in the job_config
    clone_ria_prefix = 'ria+file://'
try:
    push_target = re.sub(r'ria\+\w+:\/{2}', '', push_target)
except:
    pass

In [40]:
super_clone_target = f'{clone_ria_prefix}{clone_target}#{super_ds_id}'

dl.clone(source=super_clone_target, path=job_dir, git_clone_opts=['-c annex.private=true'])
os.chdir(job_dir)

push_path = str(Path(push_target) / Path(super_ds_id[:3]) / Path(super_ds_id[3:]))
git_add_remote(push_path, 'cwd')

ds = dl.Dataset(job_dir)
sd = pd.DataFrame(ds.subdatasets())


[INFO] Attempting a clone into /tmp/job-sub-001A_bet-ramirezd 
[INFO] Attempting to clone from file:///misc/geminis2/ramirezd/test_bet/.fairlybig/input_ria/64f/1d8ac-346d-4a0a-8c5c-8dca7e25ef7c to /tmp/job-sub-001A_bet-ramirezd 
[INFO] Completed clone attempts for Dataset(/tmp/job-sub-001A_bet-ramirezd) 
[INFO] Reconfigured input_ria-storage for ria+file:///misc/geminis2/ramirezd/test_bet/.fairlybig/input_ria/ 
[INFO] Configure additional publication dependency on "input_ria-storage" 


configure-sibling(ok): . (sibling)
install(ok): /tmp/job-sub-001A_bet-ramirezd (dataset)
action summary:
  configure-sibling (ok: 1)
  install (ok: 1)
subdataset(ok): inputs/containers (dataset)
subdataset(ok): inputs/mri-raw (dataset)
subdataset(ok): outputs (dataset)


In [41]:
if output_datasets is None:
    output_datasets = []

In [42]:
if output_datasets and not (pd.Series(output_datasets).isin(sd['gitmodule_name']).all()):
    raise Exception("Not all output datasets are found.")

In [43]:
for output_dataset in output_datasets:
    sd_id = sd.query("gitmodule_name == @output_dataset")['gitmodule_datalad-id'].iat[0]
    get_private_subdataset(clone_target, output_dataset, sd_id)
    
    push_path = str(Path(push_target) / Path(sd_id[:3]) / Path(sd_id[3:]))
    git_add_remote(push_path, output_dataset)
    
if not Path('outputs').exists():
    Path('outputs').mkdir()

Cloning into 'outputs'...
done.
  Remote origin: This repository is not initialized for use by git-annex, but /misc/geminis2/ramirezd/test_bet/.fairlybig/input_ria/3f9/fea76-50a0-489a-b9f5-27bd92a17e09/annex/objects/ exists, which indicates this repository was used by git-annex before, and may have lost its annex.uuid and annex.version configs. Either set back missing configs, or run git-annex init to initialize with a new uuid.


init  
(Auto enabling special remote output_ria-storage...)
(Auto enabling special remote input_ria-storage...)
ok


In [44]:
# Checkout to job branch
branch_name = f'job-{job_name}'

for output_dataset in output_datasets:
    do_checkout(branch_name, output_dataset)
    
do_checkout(branch_name, 'cwd')

Switched to a new branch 'job-sub-001A_bet'
Switched to a new branch 'job-sub-001A_bet'


In [45]:
if preget_inputs is None or None in preget_inputs:
    preget_inputs = []
for preget_input in preget_inputs:
    dl.get(preget_input)

In [25]:
%set_env PATH=/home/inb/soporte/lanirem_software/go_1.20.6/bin:/home/inb/soporte/lanirem_software/apptainer/bin:/opt/sge/bin:/opt/sge/bin/lx-amd64:/opt/sge/bin:/opt/sge/bin/lx-amd64:/home/inb/ramirezd/.local/bin:/home/inb/soporte/lanirem_software/ANTs_2.4.4/Scripts:/home/inb/soporte/lanirem_software/ANTs_2.4.4/bin:/home/inb/soporte/lanirem_software/fsl_6.0.7.4/share/fsl/bin:/misc/geminis/ramirezd/miniconda3/envs/py-minis/bin:/misc/geminis/ramirezd/miniconda3/condabin:/opt/sge/bin:/opt/sge/bin/lx-amd64:/home/inb/soporte/inb_tools:/home/inb/ramirezd/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin:/snap/bin:/misc/geminis2/hcp_workbench/bin_linux64:/misc/geminis2/hcp_workbench/bin_linux64:/misc/geminis2/hcp_workbench/bin_linux64

env: PATH=/home/inb/soporte/lanirem_software/go_1.20.6/bin:/home/inb/soporte/lanirem_software/apptainer/bin:/opt/sge/bin:/opt/sge/bin/lx-amd64:/opt/sge/bin:/opt/sge/bin/lx-amd64:/home/inb/ramirezd/.local/bin:/home/inb/soporte/lanirem_software/ANTs_2.4.4/Scripts:/home/inb/soporte/lanirem_software/ANTs_2.4.4/bin:/home/inb/soporte/lanirem_software/fsl_6.0.7.4/share/fsl/bin:/misc/geminis/ramirezd/miniconda3/envs/py-minis/bin:/misc/geminis/ramirezd/miniconda3/condabin:/opt/sge/bin:/opt/sge/bin/lx-amd64:/home/inb/soporte/inb_tools:/home/inb/ramirezd/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin:/snap/bin:/misc/geminis2/hcp_workbench/bin_linux64:/misc/geminis2/hcp_workbench/bin_linux64:/misc/geminis2/hcp_workbench/bin_linux64


In [46]:
if message is None:
    message = branch_name

if commit:
    dl.rerun(
        revision=commit,
        explicit=is_explicit
    )
    
elif container:
    dl.containers_run(
        dl_cmd,
        container_name=container,
        inputs=inputs,
        outputs=outputs,
        message=message,
        explicit=is_explicit
    )
    
else:
    dl.run(
        dl_cmd,
        inputs=inputs,
        outputs=outputs,
        message=message,
        explicit=is_explicit
    )

[INFO] Making sure inputs are available (this may take some time) 
[INFO] Attempting a clone into /tmp/job-sub-001A_bet-ramirezd/inputs/mri-raw 
[INFO] Attempting to clone from file:///misc/geminis2/ramirezd/test_bet/.fairlybig/input_ria/e5c/d662d-42d9-4f28-b6ec-1b54906a0015 to /tmp/job-sub-001A_bet-ramirezd/inputs/mri-raw 
[INFO] Attempting to clone from /misc/geminis2/twinsmx/datasets/mri_study/mri_study-raw/mri-raw to /tmp/job-sub-001A_bet-ramirezd/inputs/mri-raw 
[INFO] Completed clone attempts for Dataset(/tmp/job-sub-001A_bet-ramirezd/inputs/mri-raw) 


get(ok): inputs/mri-raw/sub-001A/anat/sub-001A_T1w.nii.gz (file) [from origin...]


[INFO] Attempting a clone into /tmp/job-sub-001A_bet-ramirezd/inputs/containers 
[INFO] Attempting to clone from file:///misc/geminis2/ramirezd/test_bet/.fairlybig/input_ria/8b1/4b026-dc93-4d34-84dc-7e459c08be13 to /tmp/job-sub-001A_bet-ramirezd/inputs/containers 
[INFO] Attempting to clone from /misc/geminis2/containers to /tmp/job-sub-001A_bet-ramirezd/inputs/containers 
[INFO] Completed clone attempts for Dataset(/tmp/job-sub-001A_bet-ramirezd/inputs/containers) 


get(ok): inputs/containers/.datalad/environments/fsl-6-0-4/image (file) [from origin...]


[INFO] == Command start (output follows) ===== 
[INFO] == Command exit (modification check follows) ===== 


run(ok): /tmp/job-sub-001A_bet-ramirezd (dataset) [apptainer run -e inputs/containers/.data...]
add(ok): sub-001A_T1w_bet.nii.gz (file)
save(ok): outputs (dataset)
add(ok): outputs (dataset)
add(ok): .gitmodules (file)
save(ok): . (dataset)
action summary:
  add (ok: 3)
  get (notneeded: 3, ok: 2)
  run (ok: 1)
  save (notneeded: 2, ok: 2)


In [47]:
# push annex data
dl.push(
    dataset='.',
    to='output_ria-storage',
)

for output_dataset in output_datasets:
    dl.push(
        dataset=output_dataset,
        to='output_ria-storage',
    )

# push git data
with push_lock:
    git_push('cwd')
    for output_dataset in output_datasets:
        git_push(output_dataset)    

[INFO] Determine push target 
[INFO] Push refspecs 
[INFO] Transfer data 
[INFO] Finished push of Dataset(/tmp/job-sub-001A_bet-ramirezd) 


action summary:
  


[INFO] Determine push target 
[INFO] Push refspecs 
[INFO] Transfer data 
[INFO] Finished push of Dataset(/tmp/job-sub-001A_bet-ramirezd) 


action summary:
  


To /misc/geminis2/ramirezd/test_bet/.fairlybig/output_ria/64f/1d8ac-346d-4a0a-8c5c-8dca7e25ef7c
 * [new branch]      job-sub-001A_bet -> job-sub-001A_bet
To /misc/geminis2/ramirezd/test_bet/.fairlybig/output_ria/3f9/fea76-50a0-489a-b9f5-27bd92a17e09
 * [new branch]      job-sub-001A_bet -> job-sub-001A_bet


In [52]:
cleanup(job_dir)

In [54]:
with status_lock:
    
    update_status(status_csv, 
                    job_name, 
                    job_id, 
                    host, 
                    location, 
                    status='completed', 
                    update=datetime.today().strftime("%Y/%m/%d %H:%M:%S"), 
                    traceback=None
                    )

print("Job completed succesfully.")

Job completed succesfully.
