In [1]:
import os
import yaml
import json
from collections import OrderedDict, defaultdict, Counter

def configure_json_dict(json_dict, config_dict):
    """ Usage: configured_dict, data_not_dict = configure_json_dict(json_dict, config_dict) 
    """
    configured_dict = json_dict
    data_not_dict = defaultdict()
    
    keys_d = get_json_keys_config_dict(json_dict)
    
    json_keys_counter = Counter(json_dict.keys())
    
    for k, v in config_dict.items():
        if k in keys_d:
            for var_name in keys_d[k]:
                configured_dict[var_name] = v
                json_keys_counter[var_name] += 1
                
    for k, v in json_keys_counter.items():
        if v < 2:
            data_not_dict[k] = configured_dict[k]
                
    return configured_dict, data_not_dict


def get_json_keys_config_dict(json_dict):
    keys_dict = defaultdict(list)
    for k, v in json_dict.items():
        k_list = k.split('.')
        keys_dict[k_list[-1]].append(k)
    
    return keys_dict


def get_config_file_dict(configfile_fullpath):
    """ Usage: config_file_dict = get_config_file_dict(configfile_fullpath) 
    """
    
    pairs_list = []
    
    with open(configfile_fullpath, 'r') as fh:
        lines = fh.readlines()
        
    for line in lines:
        l = line.strip().split("=")
        if len(l) > 0 and len(l[0]) > 0:
            lefty = l[0].strip()
            if len(lefty) == 0:
                continue

            if not lefty[0] == "#" and len(l) > 1 and len(l[1]) > 0:
                righty = l[1].strip()
                if len(righty) == 0:
                    righty = ' '
                        
                if len(lefty) > 0:
                    pairs_list.append((lefty, righty))

    if len(pairs_list) > 0:
        config_file_dict = OrderedDict(pairs_list)
    else:
        config_file_dict = {}

    return config_file_dict


def get_run_file_dict(data_fullfilename):
    """ Read json, yaml or key: value text file into a dictionary.
    Adapted from     --    get_run_parameters(run_directory, run_file): 
    https://github.com/KnowEnG/KnowEnG_Pipelines_Library/tree/master/knpackage/toolbox.py
    
    Args:
        data_fullfilename: json or yaml format full path filename
                                quoted strings or not (if consistant) 
                                -but no lines start with tab characters
    Returns:
        json_dict:               python dictionary of name - value parameters.
    """
    with open(data_fullfilename, 'r') as file_handle:
        json_dict = yaml.load(file_handle)

    return json_dict


def get_localized_wdl_dict(json_template, template_key_dict):
    """ Usage: FilledIn_wdl_dict = get_localized_wdl_dict(json_template, template_key_dict)
    Args:
        json_template:     
        template_key_dict: 
    Returns:
        FilledIn_wdl_dict: python dictionary of key value strings
    
    """
    json_dict = get_run_file_dict(json_template)
    keys_dict = get_run_file_dict(template_key_dict)

    FilledIn_wdl_dict = {}
    
    # for each (line) key in the json template file dictionary
    for wdl_key, wdl_value in json_dict.items():
        FilledIn_wdl_dict[wdl_key] = wdl_value
        
        # look for each key in the template key dictionary
        for template_key, template_value in keys_dict.items():
            
            if len(wdl_value) > 0 and template_key in wdl_value:    
                # replace the json template-key string with the template-key value string
                FilledIn_wdl_dict[wdl_key] = wdl_value.replace(template_key, template_value)
    
    return FilledIn_wdl_dict

def write_json_from_templates(json_template, 
                         template_key_dict, 
                         json_test_filename, 
                         json_file_path=None):
    """ write a json file with updated-localized path names from a json template and the template keys 
    Args:
        test_env_template_json_fullpath: 
        test_env_yaml_fullpath:
        json_test_filename:
        (default=cwd) json_file_path:
        
    Writes:
        json_file:                       that will work with cromwell wdl
        
    """
    if json_file_path is None or os.path.isdir(json_file_path) == False:
        # set default path to current directory
        json_file_path = os.getcwd()
        
    json_test_full_filename = os.path.join(json_file_path, json_test_filename)
    
    # call to replace the strings in the json template defined in the template keys dictionary
    localized_cromwell_wdl_dict = get_localized_wdl_dict(json_template, template_key_dict)
    
    # write the json file with default quotes, full colon separator, comma w/o spaces at the end of each line
    with open(json_test_full_filename, 'w') as outfile:
        json.dump(localized_cromwell_wdl_dict, outfile, indent="    ")
        
    return json_test_full_filename

def get_key_righties_dict_from_json(json_fullfilename):
    """ Usage: key_righties_dict = get_key_righties_dict_from_json(json_fullfilename) """
    json_dict = get_run_file_dict(json_fullfilename)
    key_righties_pairs = []
    for k, v in json_dict.items():
        k_list = k.strip().strip('"').split('.')
        if isinstance(v, list):
            key_righties_pairs.append((k_list[-1], v))
        elif isinstance(v, str):
            key_righties_pairs.append((k_list[-1], v.strip().strip(',').strip('"')))
        
    if len(key_righties_pairs) > 0:
        key_righties_dict = OrderedDict(key_righties_pairs)
    else:
        key_righties_dict = {}
        
    return key_righties_dict

# What Should Be: Post config_parser & parsing indictment

In [2]:
%%writefile jjalltheway.json
{
    "wf0.task0.var0": "Boolean",
    "wf0.task0.var1": "Int",
    "wf0.task0.var2": "String",
    "wf0.task0.var3": "File",
    "wf0.task0.var4": "Array[File]",
    "wf0.task0.var5": "Array[Array[File]]",
    "wf0.var6": "Array[Array[Array[File]]]",
    "wf0.var7": "Array[Array[Array[File]]]"
}

Overwriting jjalltheway.json


In [3]:
%%writefile conf.txt
var0="true"
var1="true"
var2="true"
var3="fileA"
var4=["fileA","fileB"]
var5=[["fileA","fileB"],["fileC","fileD"]]
var6=[[["fileA","fileB"],["fileC","fileD"]],[["fileE","fileF"],["fileG","fileH"]]]


Overwriting conf.txt


## Definition of json WOMTOOL template 
    * Keys are decimal-point separated, call stack variable names.
    * Leftmost Key part is stack bottom (call-start).
    * Rightmost Key part is variable name used in Config file (HICKUP).
    * Values are wdl types defined in the WDL specification.
    
### Variable Name Space HICKUP(s):
    * Json keys are multipart nameing but config files are key-value sets.
    * Config file keys and convention allow AMBIGUOUS json assignment.
<img src="images/HookahCaterpillerAliceInWDLand.jpg" width="200" height="300"> <br>
#### Alice in WDLand

### Python function that reads WOMTOOL generated json template.
    * wombat_dict, wombat_config_vars = 
    

In [4]:
# demonstrate get_run_file_dict on json with quotes:
TestTask_dir = os.getcwd()
TestTask_jason_file = 'jjalltheway.json'
json_fullfilename = os.path.join(TestTask_dir, TestTask_jason_file)

if os.path.isfile(json_fullfilename):
    WOMTOOL_wdl_dict = get_run_file_dict(json_fullfilename)
    print('{0} variables found\n'.format(len(WOMTOOL_wdl_dict)))
    if len(WOMTOOL_wdl_dict) > 0:
        for k, v in WOMTOOL_wdl_dict.items():
            print('%s\n\t%s\n'%(k,v))
else:
    print(json_fullfilename, '\nNot Found')


8 variables found

wf0.task0.var0
	Boolean

wf0.task0.var1
	Int

wf0.task0.var2
	String

wf0.task0.var3
	File

wf0.task0.var4
	Array[File]

wf0.task0.var5
	Array[Array[File]]

wf0.var6
	Array[Array[Array[File]]]

wf0.var7
	Array[Array[Array[File]]]



## python function that reads (special.txt, yaml, json) config files.
    * config_dict_o_dicts = 
   

In [5]:
# demonstrate get_run_file_dict on json with quotes:
TestTask_dir = os.getcwd()
TestTask_jason_file = 'conf.txt'
json_fullfilename = os.path.join(TestTask_dir, TestTask_jason_file)

if os.path.isfile(json_fullfilename):
    CONFIG_txt_dict = get_config_file_dict(json_fullfilename)
    print('{0} variables found\n'.format(len(CONFIG_txt_dict)))
    if len(CONFIG_txt_dict) > 0:
        for k, v in CONFIG_txt_dict.items():
            print('%s\n\t%s\n'%(k,v))
else:
    print(json_fullfilename, '\nNot Found')


7 variables found

var0
	"true"

var1
	"true"

var2
	"true"

var3
	"fileA"

var4
	["fileA","fileB"]

var5
	[["fileA","fileB"],["fileC","fileD"]]

var6
	[[["fileA","fileB"],["fileC","fileD"]],[["fileE","fileF"],["fileG","fileH"]]]



 
## python function puts the config dict o dicts into wombat variables dict.
    * wombat_data_dict = 
    

In [7]:
fi_dict, missedata = configure_json_dict(json_dict=WOMTOOL_wdl_dict, config_dict=CONFIG_txt_dict)
for k, v in fi_dict.items():
    print('%30s: %s'%(k,v))
    
print('\n\nMissing data?\n')
for k, v in missedata.items():
    print('%30s: %s'%(k,v))

                wf0.task0.var0: "true"
                wf0.task0.var1: "true"
                wf0.task0.var2: "true"
                wf0.task0.var3: "fileA"
                wf0.task0.var4: ["fileA","fileB"]
                wf0.task0.var5: [["fileA","fileB"],["fileC","fileD"]]
                      wf0.var6: [[["fileA","fileB"],["fileC","fileD"]],[["fileE","fileF"],["fileG","fileH"]]]
                      wf0.var7: Array[Array[Array[File]]]


Missing data?

                      wf0.var7: Array[Array[Array[File]]]


## python function that takes all the above, checks and writes the wombat_data_dict
    * status, full_filename = 