In [3]:
from argparse import ArgumentParser
from pathlib import Path
import re
import functools
import operator

import datalad.api as dl
import pandas as pd
import numpy as np

In [102]:
def is_numeric(x):
    try:
        int(x)
        return True
    except:
        return False
    
def try_search(regex, val):
    try:
        return re.search(regex, val).group()
    except:
        return None
        

def get_random_seed():
    """
    Return a 9 digit random integer.
    """
    return np.random.randint(100_000_000, 999_999_999)

def create_command_df(variable_commands_string):
    """
    Extract the commands within each variable definition. 
    Return a dataframe where each row is a command.
    """
    
    command_value = {'drop':1, 'glob':1,'variable':1,'paste':1,'write':1,'replace':-1, 'is_in':-1, 'not_in':-1, 'grep':-1,'unique':-1,'multiply':-1, 'repeat':-1,'exists':-1}
    
    command_names = re.findall("(?<=\<!)\w+(?=\>)", variable_commands_string)
    commands =  re.sub('<!\w+>', '<!command>', variable_commands_string).split('<!command>')[1:]
    
    command_df = (pd.DataFrame({'command_name':command_names, 'command':commands})
    .assign(
        command = lambda df_: 
            df_['command'].str.strip().str.replace(r'(^\(|\)$)', '', regex=True),
        command_value = lambda df_: df_['command_name'].map(command_value)
    )
    )

    assert command_df['command_name'].isin(command_value.keys()).all(), "Used an invalid command within a variable definition."

    assert command_df['command_value'].is_monotonic_decreasing, "'drop', 'glob', 'variable', 'paste', and 'write' have to be the first action within each variable definition."

    assert command_df['command_value'].iat[0] >= 0, "Can't start variable definition with 'grep', 'unique', 'multiply', 'repeat' or 'exists'."

    assert command_df['command_value'].sum() < 2, "You can't use both 'drop', 'glob' and 'variable' within the same variable definition."
    
    if command_df['command_name'].iat[0] == 'drop':
        assert command_df['command_value'].shape[0] == 1, "You can't use commands after 'drop'."
    
    return command_df


def call_glob(cmd):
    """
    Return an ordered list of glob results relative to super dataset.
    """
    
    glob_characters = '[a-z,A-Z,0-9,\\,\/,\-,_,\.,\*,\[,\],\:,\+,\?,\!,\s]'
    assert super_dataset_path, "No known super dataset for globbing."
    
    try:
        globbing_list = re.search(f"{glob_characters}+", cmd).group().split()
    
        values = sorted(
            [str(path.relative_to(super_dataset_path)) 
            for globbing in globbing_list
            for path in Path(super_dataset_path).glob(globbing)]
            )
    except:
        raise Exception('Not a valid globbing pattern.')
    
    return values


def call_variable(cmd, variables):
    """
    Get the values from an existing variable.
    """
    variable_characters = '[a-z,A-Z,0-9,\-,_,]'
    try:
        variable_name = re.search(f"{variable_characters}+", cmd).group()
    except:
        raise Exception("Tried to call a variable with an invalid variable name.")
    
    try:
        values = variables[variable_name]
    except:
        raise Exception("Tried to call a non-existing variable.")
    
    return values


def call_paste(cmd, variables):
    """
    Paste existing variables within text.
    Returns a list of values.
    """
    
    paste_variables = list(set(re.findall('(?<=\{)[\w,_,-]+(?=\})', cmd)))
    
    assert pd.Series(paste_variables).isin(variables.keys()).all(), "Not all variables in 'paste' exist."
    
    values = []
    for row in pd.DataFrame(variables)[paste_variables].to_dict(orient='records'):
        values.append(cmd.format(**row))
    
    return values

def call_write(cmd):
    """
    Return a list of space-separeted values from a string.
    """
    return cmd.split()


def call_replace(cmd, values):
    """
    Replace a string pattern from existing values. 
    Can call existing variables within 'cmd' using double curly brackets.
    Returns a list of values.
    """
    
    
    n_spaces = len([character for character in cmd if character == ' '])
    assert n_spaces == 1, "Didn't find exactly one space within 'replace'. Command should be:('to_be_replaced' 'replacement')."
    
    string_detect = cmd.split()[0]
    string_replace = cmd.split()[1]            
    
    new_values = []
    for value in values:
        new_values.append(
            re.sub(string_detect, string_replace, value).replace('{{' , '{').replace('}}', '}')
            )
        
    values = new_values
        
    replacement_variables = re.findall("(?<={)\w+(?=})", '{subject}')
    if replacement_variables:
        if not pd.Series(replacement_variables).isin(variables.keys()).all():
            raise Exception("Not all variables inside <!replace> exist.")
        
        new_values = []
        for value, row_dict in zip(values, pd.DataFrame(variables).to_dict(orient='records')):
            new_values.append(value.format(**row_dict))
            
        values = new_values
        
    return values


def call_drop():
    """
    Returns None.
    """
    return None


def call_grep(cmd, values):
    """
    Return a string pattern from existing values using regular expressions.
    """
    values = [try_search(cmd, value) for value in values]
    assert not all(value is None for value in values), "Not a valid regex or no matches."

    return values


def call_repeat(cmd, values):
    "Repeat existing values."
    if isinstance(values, str):
        values = values.split()
    if is_numeric(cmd):
        n_elements = int(cmd)
    else:
        assert cmd in variables.keys(), "Tried to repeat by the length of a variable that doesn't exist."
        n_elements = len(variables[cmd])    
           
    values = [[value]*n_elements for value in values]
    values = functools.reduce(operator.iconcat, values, [])
    
    return values


def call_is_in(cmd, values):
    """
    Return values in common between two existing variables.
    """
    variable = cmd.strip()
    
    variable_characters = '[a-z,A-Z,0-9,\-,_,]'
    try:
        variable = re.search(f"{variable_characters}+", variable).group()
    except:
        raise Exception("Within 'is_in', tried to call a variable with an invalid variable name.")
    
    try:
        variable = pd.Series(variables[variable])
        values = pd.Series(values)
    except:
        raise Exception("Within 'is_in', tried to call a variable that doesn't exist.")
    
    return values[values.isin(variable)].to_list()


def call_not_in(cmd, values):
    """
    Return values not in common between two existing variables.
    """
    variable = cmd.strip()
    
    variable_characters = '[a-z,A-Z,0-9,\-,_,]'
    try:
        variable = re.search(f"{variable_characters}+", variable).group()
    except:
        raise Exception("Within 'not_in', tried to call a variable with an invalid variable name.")
    
    try:
        variable = pd.Series(variables[variable])
        values = pd.Series(values)
    except:
        raise Exception("Within 'not_in', tried to call a variable that doesn't exist.")
    
    return values[~values.isin(variable)].to_list()
    

def call_multiply(cmd, values):
    "Multiply existing values."
    if isinstance(values, str):
        values = values.split()
    if is_numeric(cmd):
        n_elements = int(cmd)
    else:
        assert cmd in variables.keys(), "Tried to multiply by the length of a variable that doesn't exist."
        n_elements = len(variables[cmd])    
           
    values = values*n_elements 
    
    return values

    
def call_unique(values):
    """
    Remove duplicated values, keep the first one and maintain the same order. 
    """
    return pd.Series(['pcc', 'acc', 'pcc']).drop_duplicates(keep='first').to_list()
    

def call_exists(values):
    """
    
    """
    assert super_dataset_path, "No known super dataset for globbing." 
    new_values = []
    for value in values:
        if (super_dataset_path / str(value)).is_symlink() or (super_dataset_path / str(value)).exists():
            new_values.append(value)
        else:
            new_values.append(None)
    
    values = new_values
    
    return values
    

def select_command(command_name, command, values=None, variables=None, current_variable_name=None):
    """
    Select one of the possible 9 commands and return the values.
    """
    match command_name:
        # first-postion commands
        case 'drop':
            return call_drop()
        
        case 'glob':
            return call_glob(command)

        case 'write':
            return call_write(command)
        
        case 'variable':
            assert variables, "Can't use 'variable' if no variable has been defined."
            return call_variable(command, variables)
        
        case 'paste':
            assert variables, "Can't use 'paste' if no variable has been defined."
            return call_paste(command, variables)
        
        # non-first commands    
        case 'replace':
            assert values, "Can't use 'replace' without existing values."
            return call_replace(command, values)
        
        case 'grep':
            assert values, "Can't use 'grep' without existing values."
            return call_grep(command, values)
        
        case 'is_in':
            assert values, "Can't use 'is_in' without existing values."
            return call_is_in(command, values)
        
        case 'not_in':
            assert values, "Can't use 'not_in' without existing values."
            return call_not_in(command, values)
        
        case 'multiply':
            assert values, "Can't use 'multiply' without existing values."
            return call_multiply(command, values)
        
        case 'repeat':
            assert values, "Can't use 'repeat' without existing values."
            return call_repeat(command, values)
        
        case 'unique':
            assert values, "Can't use 'unique' without existing values."
            return call_unique(values)
        
        case 'exists':
            assert values, "Can't use 'exists' without existing values."
            return call_exists(values)
            
        case _:
            raise Exception("Non-existing command selected.")
            

In [118]:
command_df

Unnamed: 0,command_name,command,command_value
0,write,acc pcc,1
1,multiply,5,-1


In [116]:
super_dataset_path = Path('/misc/geminis2/ramirezd/test_bet/')
variable_definition_string = "svs == <!glob>(inputs/mri-raw/sub-*/mrs/*acq-press*svs.nii.gz) ; subject == <!variable>(svs)<!grep>(sub-\w+); t1w == <!paste> (inputs/mri-raw/{subject}/anat/{subject}_T1w.nii.gz) ; t2w == <!variable>(t1w)<!replace>(T1w T2w)<!exists> ; ref == <!variable>(svs)<!replace>(svs(?=.nii.gz) ref)<!exists> "

variable_definition_string = "voi == <!write>(acc pcc)<!multiply>5"

# Create a tuple of variable-commands
variable_command = []

for index, variable_definition in enumerate(variable_definition_string.split(';')):
    variable_name = variable_definition.split('==')[0].strip()
    variable_commands_string = variable_definition.split('==')[1].strip()
    command_df = create_command_df(variable_commands_string)
    variable_command.append((index, variable_name, command_df))


# Create a dictionary of variable-values
variables = {}

len_dict = {}
max_len=0

for index, variable_name, command_df in variable_command:
    values = []
    for command_row in command_df.itertuples():
        values = select_command(
            command_row.command_name,
            command_row.command,
            values,
            variables
            )
    
    # Drop variable if it's None and skip next steps
    if values is None: 
        try:
            del variables[variable_name]
            del len_dict[variable_name]
        except:
            pass
        
        continue
        
    variables[variable_name] = values
    len_dict[variable_name] = len(values)


# Calculate the max length of each variable for broadcasting. 
for variable_name, len_of_values in len_dict.items():
    if len_of_values > max_len:
        max_len = len_of_values
        max_len_variable = variable_name

# Broadcast variables.
if np.all(max_len % np.array(list(len_dict.values())) == 0):
    for key, value_list in variables.items():
        if key == max_len_variable:
            continue
        variables[key] = value_list * int(max_len / len(value_list))
else:
    raise Exception("Can't broadcast variables.")
    

array([3])

In [95]:
len_dict

{'subject': 3}

In [62]:
for value in variables.values():
    print(len(value))

863
863
863
863


In [117]:
pd.DataFrame(variables)

Unnamed: 0,voi
0,acc
1,pcc
2,acc
3,pcc
4,acc
5,pcc
6,acc
7,pcc
8,acc
9,pcc


In [108]:
dl_cmd = "bet inputs/mri_raw/{subject}/anat/{subject}_T1w.nii.gz outputs/bet/{subject}_T1w_bet.nii.gz --radom_seed <!random>"
inputs = "inputs/mri_raw/{subject}/anat/{subject}_T1w.nii.gz"
outputs = "outputs/bet/{subject}_T1w_bet.nii.gz"
job_name = "{subject}_T1w_bet"
data_dict = {'job_name':[], 'dl_cmd':[], 'inputs':[], 'outputs':[]}

if ("<!random>" in dl_cmd) and not "{random_seed}" in dl_cmd:
    random_seeds=[]
    for i in range(max_len):
        random_seeds.append(get_random_seed())
    
    dl_cmd = re.sub("<!random>", "{random_seed}", dl_cmd)
    variables['random_seed'] = random_seeds

for row_dict in pd.DataFrame(variables).dropna().to_dict(orient='records'):
    data_dict['job_name'].append(job_name.format(**row_dict))
    data_dict['dl_cmd'].append(dl_cmd.format(**row_dict))
    data_dict['inputs'].append(inputs.format(**row_dict))
    data_dict['outputs'].append(outputs.format(**row_dict))

In [109]:
pd.DataFrame(data_dict)

Unnamed: 0,job_name,dl_cmd,inputs,outputs
0,sub-178A_T1w_bet,bet inputs/mri_raw/sub-178A/anat/sub-178A_T1w....,inputs/mri_raw/sub-178A/anat/sub-178A_T1w.nii.gz,outputs/bet/sub-178A_T1w_bet.nii.gz
1,sub-178B_T1w_bet,bet inputs/mri_raw/sub-178B/anat/sub-178B_T1w....,inputs/mri_raw/sub-178B/anat/sub-178B_T1w.nii.gz,outputs/bet/sub-178B_T1w_bet.nii.gz
2,sub-217B_T1w_bet,bet inputs/mri_raw/sub-217B/anat/sub-217B_T1w....,inputs/mri_raw/sub-217B/anat/sub-217B_T1w.nii.gz,outputs/bet/sub-217B_T1w_bet.nii.gz


In [13]:
pd.Series(['a','b','c', 'z'])[~pd.Series(['a','b','c', 'z']).isin(['c'])]

0    a
1    b
3    z
dtype: object

In [228]:
pd.DataFrame(data_dict)['dl_cmd'][0]

'bet inputs/mri_raw/sub-031B/anat/sub-031B_T1w.nii.gz outputs/bet/sub-031B_T1w_bet.nii.gz --radom_seed 113632864'

In [200]:
command_row.command_name

'exists'

In [185]:
command_row.command

'inputs/mri-raw/sub-*/mrs/*acq-press*svs.nii.gz'

In [183]:
variable_command['subject']

Unnamed: 0,command_name,command,command_value
0,variable,svs,1
1,grep,sub-\w+,-1


In [157]:
command_df

Unnamed: 0,command_name,command,command_value
0,variable,svs,1
1,replace,svs(?=.nii.gz) ref,-1
2,exists,,-1


In [135]:
command_df

Unnamed: 0,command_name,command,command_value
0,variable,svs,1
1,replace,svs(?=.nii.gz) ref,0
2,exists,,0


In [102]:
re.search("(?<=<!command>).+(?=<!command>)", re.sub('<!\w+>', '<!command>', full_pattern))

<re.Match object; span=(10, 45), match='(svs)<!command>(svs(?=.nii.gz) ref)'>

In [100]:
re.findall("(?<=<!(variable|exists)>).+", full_pattern)

error: look-behind requires fixed-width pattern

In [83]:
pd.Series(re.findall("(?<=<!\w+>)", full_pattern))

error: look-behind requires fixed-width pattern

In [30]:
super_dataset_path = Path('/misc/geminis2/ramirezd/test_bet/')

regex_characters = '[a-z,A-Z,0-9,\\\,\/,\-,_,\.,\*,\[,\],\:,\+,\?,\!,\(,\),\<,\>,\s]'
variable_characters = '[a-z,A-Z,0-9,\-,_,]'
path_characters = '[a-z,A-Z,0-9,\\,\/,\-,_,\},\{,\.,\+]'
patterns = "svs == <!glob>(inputs/mri-raw/sub-*/mrs/*acq-press*svs.nii.gz) ; subject == <!variable>(svs)<!grep>(sub-\w+); t1w == <!paste>(inputs/mri-raw/{subject}/anat/{subject}_T1w.nii.gz) ; t2w == <!variable>(t1w)<!replace>(T1w {{subject}}) ; ref == <!variable>(svs)<!replace>(svs(?=.nii.gz) ref)<!exists>"

variable_dict = {}

max_len = 0
max_key = ''
len_list = []

for key_pattern in patterns.split(';'):
    key_pattern = key_pattern.split('==')
    key = key_pattern[0].strip()
    if re.search('\s', key):
        raise Exception("Don't use spaces in variable names.")
    
    full_pattern = key_pattern[1].strip()
    values = full_pattern
    
    if '<!multiply>' in full_pattern:
        values = full_pattern.split('<!multiply>')[0]
        multiply = full_pattern.split('<!multiply>')[1].strip()
        full_pattern = values
    else:
        multiply = False
    
    function_order = pd.Series(re.findall("(?<=\<!)\w+(?=\>)", full_pattern))
    if not function_order.map(function_order_dict).is_monotonic_increasing:
        raise Exception("Not using the correct order of functions: 'glob':1,'paste':2,'variable':3,'grep':4,'replace':5,'unique':6,'multiply':7,'exists':8")
    if function_order.duplicated().any():
        raise Exception("Can't use the same function more than once within the same pattern.")
    
        
    if '<!glob>' in full_pattern:
        if '<!paste>' in full_pattern or '<!variable>' in full_pattern:
            raise Exception("Can't use <!glob>, <!paste> and <!variable> in the same pattern")
        
        try:
            globbing = re.search(f"(?<=\<!glob\>\(){glob_characters}+(?=\))", values).group()
            values = sorted(
                [str(path.relative_to(super_dataset_path)) 
                 for path in Path(super_dataset_path).glob(globbing)]
                )
        except:
            raise Exception('Not a valid globbing pattern.')
        
         
    if '<!paste>' in full_pattern:
        if '<!glob>' in full_pattern or '<!variable>' in full_pattern:
            raise Exception("Can't use <!glob>, <!paste> and <!variable> in the same pattern")
        
        
        glob_vars = list(set(re.findall('(?<=\{)[\w,_,-]+(?=\})', values)))
        
        if not pd.Series(glob_vars).isin(variable_dict.keys()).all():
            raise Exception("Not all variables inside the <!path> pattern exist.")
        
        values =  re.search(f"(?<=\<!paste\>\(){path_characters}+(?=\))", values).group()
        new_values = []
        for row in pd.DataFrame(variable_dict)[glob_vars].to_dict(orient='records'):
            new_values.append(values.format(**row))
        
        values=new_values
        

    if '<!variable>' in full_pattern:
        if '<!glob>' in full_pattern or '<!paste>' in full_pattern:
            raise Exception("Can't use <!glob>, <!paste> and <!variable> in the same pattern")
            
        variable = re.search(f"(?<=\<!variable\>\(){variable_characters}+(?=\))", values).group()
        values = variable_dict[variable]


    
    if '<!grep>' in full_pattern:
        
        regex = full_pattern.replace('<!unique>','').replace('<!multiply>', '').replace('<!exists>', '').strip()
        if '<!replace>' in regex:
            regex = regex.split('<!replace>')[0].strip()
        
        regex = re.search(f"(?<=\<!grep\>\(){regex_characters}+(?=\)$)", regex).group()
    
        values = [try_search(regex, value) for value in values]
        if all(value is None for value in values):
            raise Exception("Not a valid regex or no matches.")
        
        
        
    if '<!replace>' in full_pattern:
        replace_pattern = full_pattern.replace('<!unique>','').replace('<!multiply>', '').replace('<!exists>', '').strip()
        replace_pattern = re.search(f"(?<=\<!replace\>\().+(?=\)$)", replace_pattern).group()
        
        n_spaces = len([character for character in replace_pattern if character == ' '])
        if n_spaces >= 2:
            raise Exception("More than one space within <!replace>. Don't use spaces for string-detection or string-replacement.")
        
        string_detect = replace_pattern.split()[0]
        string_replace = replace_pattern.split()[1]            
        
        new_values = []
        for value in values:
            new_values.append(re.sub(string_detect, string_replace, value).replace('{{' , '{').replace('}}', '}'))
            
        values = new_values
            
        replacement_variables = re.findall("(?<={)\w+(?=})", '{subject}')
        if replacement_variables:
            if not pd.Series(replacement_variables).isin(variable_dict.keys()).all():
                raise Exception("Not all variables inside <!replace> exist.")
            
            new_values = []
            for value, row_dict in zip(values, pd.DataFrame(variable_dict).to_dict(orient='records')):
                new_values.append(value.format(**row_dict))
                
            values = new_values
        
        
        
    if '<!unique>' in full_pattern:
        values = sorted(set(values))       
        
        
    if multiply:
        if isinstance(values, str):
            values = values.split()
        
        if is_numeric(multiply):
            n_elements = int(multiply)
        else:
            n_elements = len(variable_dict[multiply])       
        values = [[value]*n_elements for value in values]
        values = functools.reduce(operator.iconcat, values, [])
    elif isinstance(values, str):
        values = values.split()
        
        
    if '<!exists>' in full_pattern:
        new_values = []
        for value in values:
            if (super_dataset_path / str(value)).is_symlink() or (super_dataset_path / str(value)).exists():
                new_values.append(value)
            else:
                new_values.append(None)
        
        values = new_values
    
    
    len_list.append(len(values))
    if len_list[-1] > max_len:
        max_len = len_list[-1]
        max_key = key

    variable_dict[key] = values
    
    
    
if np.all(max_len % np.array(len_list) == 0):
    for key, value_list in variable_dict.items():
        if key == max_key:
            continue
        variable_dict[key] = value_list * int(max_len / len(value_list))
else:
    raise Exception("Can't broadcast variables.")
    

In [80]:
dl_cmd = "bet inputs/mri_raw/{subject}/anat/{subject}_T1w.nii.gz outputs/bet/{subject}_T1w_bet.nii.gz --radom_seed <!random>"
inputs = "inputs/mri_raw/{subject}/anat/{subject}_T1w.nii.gz"
outputs = "outputs/bet/{subject}_T1w_bet.nii.gz"
job_name = "{subject}_T1w_bet"
data_dict = {'job_name':[], 'dl_cmd':[], 'inputs':[], 'outputs':[]}

if ("<!random>" in dl_cmd) and not "{random_seed}" in dl_cmd:
    random_seeds=[]
    for i in range(max_len):
        random_seeds.append(get_random_seed())
    
    dl_cmd = re.sub("<!random>", "{random_seed}", dl_cmd)
    variable_dict['random_seed'] = random_seeds

for row_dict in pd.DataFrame(variable_dict).to_dict(orient='records'):
    data_dict['job_name'].append(job_name.format(**row_dict))
    data_dict['dl_cmd'].append(dl_cmd.format(**row_dict))
    data_dict['inputs'].append(inputs.format(**row_dict))
    data_dict['outputs'].append(outputs.format(**row_dict))

In [81]:
pd.DataFrame(data_dict)['dl_cmd'].to_list()

['bet inputs/mri_raw/sub-001A/anat/sub-001A_T1w.nii.gz outputs/bet/sub-001A_T1w_bet.nii.gz --radom_seed 881027',
 'bet inputs/mri_raw/sub-001A/anat/sub-001A_T1w.nii.gz outputs/bet/sub-001A_T1w_bet.nii.gz --radom_seed 734137',
 'bet inputs/mri_raw/sub-001B/anat/sub-001B_T1w.nii.gz outputs/bet/sub-001B_T1w_bet.nii.gz --radom_seed 612471',
 'bet inputs/mri_raw/sub-001B/anat/sub-001B_T1w.nii.gz outputs/bet/sub-001B_T1w_bet.nii.gz --radom_seed 838735',
 'bet inputs/mri_raw/sub-002A/anat/sub-002A_T1w.nii.gz outputs/bet/sub-002A_T1w_bet.nii.gz --radom_seed 800281',
 'bet inputs/mri_raw/sub-002A/anat/sub-002A_T1w.nii.gz outputs/bet/sub-002A_T1w_bet.nii.gz --radom_seed 707830',
 'bet inputs/mri_raw/sub-002B/anat/sub-002B_T1w.nii.gz outputs/bet/sub-002B_T1w_bet.nii.gz --radom_seed 177309',
 'bet inputs/mri_raw/sub-002B/anat/sub-002B_T1w.nii.gz outputs/bet/sub-002B_T1w_bet.nii.gz --radom_seed 782059',
 'bet inputs/mri_raw/sub-003A/anat/sub-003A_T1w.nii.gz outputs/bet/sub-003A_T1w_bet.nii.gz --rad

In [33]:
variable_dict['t2w']

['inputs/mri-raw/sub-001A/anat/sub-001A_sub-001A.nii.gz',
 'inputs/mri-raw/sub-001A/anat/sub-001A_sub-001A.nii.gz',
 'inputs/mri-raw/sub-001B/anat/sub-001B_sub-001B.nii.gz',
 'inputs/mri-raw/sub-001B/anat/sub-001B_sub-001B.nii.gz',
 'inputs/mri-raw/sub-002A/anat/sub-002A_sub-002A.nii.gz',
 'inputs/mri-raw/sub-002A/anat/sub-002A_sub-002A.nii.gz',
 'inputs/mri-raw/sub-002B/anat/sub-002B_sub-002B.nii.gz',
 'inputs/mri-raw/sub-002B/anat/sub-002B_sub-002B.nii.gz',
 'inputs/mri-raw/sub-003A/anat/sub-003A_sub-003A.nii.gz',
 'inputs/mri-raw/sub-003A/anat/sub-003A_sub-003A.nii.gz',
 'inputs/mri-raw/sub-003B/anat/sub-003B_sub-003B.nii.gz',
 'inputs/mri-raw/sub-003B/anat/sub-003B_sub-003B.nii.gz',
 'inputs/mri-raw/sub-004A/anat/sub-004A_sub-004A.nii.gz',
 'inputs/mri-raw/sub-004A/anat/sub-004A_sub-004A.nii.gz',
 'inputs/mri-raw/sub-004B/anat/sub-004B_sub-004B.nii.gz',
 'inputs/mri-raw/sub-004B/anat/sub-004B_sub-004B.nii.gz',
 'inputs/mri-raw/sub-005A/anat/sub-005A_sub-005A.nii.gz',
 'inputs/mri-r

In [32]:
pd.DataFrame(data_dict)

Unnamed: 0,job_name,dl_cmd,inputs,outputs
0,sub-001A_T1w_bet,bet inputs/mri_raw/sub-001A/anat/sub-001A_T1w....,inputs/mri_raw/sub-001A/anat/sub-001A_T1w.nii.gz,outputs/bet/sub-001A_T1w_bet.nii.gz
1,sub-001A_T1w_bet,bet inputs/mri_raw/sub-001A/anat/sub-001A_T1w....,inputs/mri_raw/sub-001A/anat/sub-001A_T1w.nii.gz,outputs/bet/sub-001A_T1w_bet.nii.gz
2,sub-001B_T1w_bet,bet inputs/mri_raw/sub-001B/anat/sub-001B_T1w....,inputs/mri_raw/sub-001B/anat/sub-001B_T1w.nii.gz,outputs/bet/sub-001B_T1w_bet.nii.gz


In [33]:
def get_glob_var_text(string_list):
    combined_string = ''
    for string in string_list:
        combined_string+=string+','
    return '['+combined_string[:-1]+']'

In [10]:
variable_dict.keys()

dict_keys(['svs', 'subject', 't1w', 't2w', 'ref'])

In [75]:
dl_cmd = "python code/process_svg.py -a {t1w} -s {svs}"
inputs = "{t1w} {svs}"
outputs = "outputs/bet/{subject}_T1w_bet.nii.gz"
job_name = "{subject}_T1w_bet"
data_dict = {'job_name':[], 'dl_cmd':[], 'inputs':[], 'outputs':[]}

for row_dict in pd.DataFrame(variable_dict).to_dict(orient='records'):
    data_dict['job_name'].append(job_name.format(**row_dict))
    data_dict['dl_cmd'].append(dl_cmd.format(**row_dict))
    data_dict['inputs'].append(inputs.format(**row_dict))
    data_dict['outputs'].append(outputs.format(**row_dict))
    
(pd.DataFrame(data_dict)
 .assign(
     container,
     is_explicit,
     output_datasets,
     prereq_get,
     message,
     super_id,
     clone_target,
     push_target,
     ephemeral_location,
     req_disk_gb,
     queue,
     slots,
     vmem,
     h_rt,
     env_vars,
     batch
 )
 )