```bash
ConfigsBeingUsed = "-i Config/run_info.txt -i Config/tool_info.txt -i Config/memory_info.txt -i Config/sample_info.txt"
python MayomicsVC/src/python/config_parser.py ConfigsBeingUsed --jsonTemplate Jsons/GermlineMasterWorkflow.template.json -o Jsons/GermlineMasterWorkflow.FilledIn.json;
```

In [1]:
import os
import sys
import numpy as np
import pandas as pd
conf_dir = '../../data/Config'
json_dir = '../../data/Jsons'
mayo_repo = '../../../../ncsa_genomics/MayomicsVC'

mayo_python_code = os.path.join(mayo_repo, 'src/python')
sys.path.insert(1, mayo_python_code)
import config_parser

mayo_python_parser_code = os.path.join(mayo_python_code, 'config/parser')
sys.path.insert(1, mayo_python_parser_code)
from parsing import Parser

In [2]:
conf_dir = '../../data/Config'
json_dir = '../../data/Jsons'
mayo_repo = '../../../../ncsa_genomics/MayomicsVC'
mayo_python_code = os.path.join(mayo_repo, 'src/python')
# os.listdir(conf_dir)

In [3]:
json_template = os.path.join(json_dir, 'GermlineMasterWorkflow.template.json')
json_test_template = os.path.join(json_dir, 'GermlineMasterWorkflow.templateSAVEME.json')
json_output = os.path.join(json_dir, 'GermlineMasterWorkflow.FilledInTest.json')
config_files = ['run_info.txt', 'tool_info.txt', 'memory_info.txt', 'sample_info.txt']
config_full_files = []
for conf_file in config_files:
    config_full_files.append(os.path.join(conf_dir, conf_file))

In [4]:
jobID = '23Skidoo'
debugmode = False
k_v_parser = Parser(jobID, debugmode)

In [5]:
os.listdir(json_dir)
# config_full_files == parsed_args.i
# json_template == parsed_args.jsonTemplate
# json_output == parsed_args.o
k_v_parser.fill_in_json_template(config_full_files, json_template, json_output)



In [6]:
def template_del_keys(config_file, key_names_list):
    """ Usage: new_lines_list = template_del_keys(config_file, key_names_list) """
    
    print('Opening:\n', config_file)
    with open(config_file, 'r') as tfh:
        lines = tfh.readlines()
    print('found %i lines'%(len(lines)))
    new_lines_list = ['{\n']
    for line in lines:
        l_key = line.strip().split('=')[0]
        for key_name in key_names_list:
            if not key_name in l_key:
                new_lines_list.append('  ' + line.strip() + '\n')
            else:
                print('removeing\n', line.strip())
                
    new_lines_list.append('}')
    outie_full_file, fext = os.path.splitext(config_file)
    outie_full_file = outie_full_file + '.cleaned' + fext
    print(outie_full_file)
    
    return new_lines_list

In [7]:
nix_key_list = ['InputReads', 
                'Strelka', 
                'StrelkaThreads', 
                'Mutect', 
                'MutectThreads', 
                'DeliverySomaticVC_Script', 
                'StrelkaScript', 
                'MutectScript', 
                'MergeSomaticVcfScript', 
                'StrelkaEnvProfile', 
                'MutectEnvProfile', 
                'MergeSomaticVcfEnvProfile']
new_lines_list = template_del_keys(config_file=config_full_files[3], key_names_list=nix_key_list)

Opening:
 ../../data/Config/sample_info.txt
found 1 lines
../../data/Config/sample_info.cleaned.txt


In [8]:
line = '  "GermlineMasterWF.merge.MergeSoftMemLimit": "String",'
l_key = line.strip().strip('"').split(':')[0].strip('"')
l_key

'GermlineMasterWF.merge.MergeSoftMemLimit'

In [9]:
def get_json_keys_list(json_full_file):
    """ Usage: keys_list = get_json_keys_list(json_full_file) """
    keys_list = []
    with open(json_full_file, 'r') as tfh:
        lines = tfh.readlines()

    for line in lines:
        l_key = line.strip().strip('"').split(':')[0]
        l_key = l_key.strip('"').split('.')[-1]
        if not l_key is None and len(l_key) > 0:
            keys_list.append(str(l_key))
        
    return keys_list

def get_keys_dict(config_full_file):
    """ Usage: config_dict = get_keys_dict(config_full_file) """
    config_dict = {}
    with open(config_full_file, 'r') as cfh:
        lines = cfh.readlines()
        
    for line in lines:
        l_list = line.strip().replace('""', '').split('=')
        
        if len(l_list) >= 1:
            l_key = l_list[0]
            
            if len(l_key) >=1 and l_key[0] != '#':
                if len(l_list) <= 1 or l_list[1] is None:
                    config_dict[l_key] = ''
                elif len(l_list) == 2:
                    config_dict[l_key] = ''.join(l_list[1:][0])
                else:
                    config_dict[l_key] = ' '.join(l_list[1:])
                
    return config_dict

def config_list_to_json_dict(config_full_files_list, json_full_file):
    """ Usage: jsonic_dict, missing_keys_list = config_list_to_json_dict(config_full_files_list, json_full_file) 
    
    """
    nullkey = 'missing dakine key'
    missing_keys_list = []
    jsonic_dict = {}
    conf_files_dict_of_dicts = {}
    
    for config_full_file in config_full_files_list:
        _, conf_name = os.path.split(config_full_file)
        conf_files_dict_of_dicts[conf_name] = get_keys_dict(config_full_file)
    
    keys_list = get_json_keys_list(json_full_file)
    for k in keys_list:
        jsonic_dict[k] = nullkey
        k_found_in_conf_keys = False
        for conf_k, conf_dict in conf_files_dict_of_dicts.items():
            conf_keys_list = conf_dict.keys()
            if k in conf_keys_list:
                k_found_in_conf_keys = True
        if k_found_in_conf_keys == False:
            missing_keys_list.append(k)
            
    return jsonic_dict, missing_keys_list

In [10]:
json_test_template = os.path.join(json_dir, 'GermlineMasterWorkflow.templateSAVEME.json')
jsonic_dict, missing_keys_list = config_list_to_json_dict(config_full_files_list=config_full_files, 
                                                          json_full_file=json_test_template)
missing_keys_list

['{', 'NormalInputReads', '}']

In [11]:
config_dict = get_keys_dict(config_full_file=config_full_files[0])
print('\n\n')
for k, v in config_dict.items():
    print(type(v), '\t%s: \n\t%s\n'%(k,v))




<class 'str'> 	DebugMode: 
	

<class 'str'> 	PairedEnd: 
	"true"

<class 'str'> 	Trimming: 
	"true"

<class 'str'> 	MarkDuplicates: 
	"true"

<class 'str'> 	Bqsr: 
	"true"

<class 'str'> 	Vqsr: 
	"true"

<class 'str'> 	InputReads: 
	"/projects/bioinformatics/DEL/Inputs/WGS_chr1_5X_E0.005_L1_read1.fastq.gz,/projects/bioinformatics/DEL/Inputs/WGS_chr1_5X_E0.005_L1_read2.fastq.gz"

<class 'str'> 	NormalInputRead1: 
	"/projects/bioinformatics/DEL/Inputs/WGS_chr1_5X_E0.005_L1_read1.fastq.gz,/projects/bioinformatics/DEL/Inputs/WGS_chr1_5X_E0.005_L2_read1.fastq.gz"

<class 'str'> 	NormalInputRead2: 
	"/projects/bioinformatics/DEL/Inputs/WGS_chr1_5X_E0.005_L1_read2.fastq.gz,/projects/bioinformatics/DEL/Inputs/WGS_chr1_5X_E0.005_L2_read2.fastq.gz"

<class 'str'> 	Adapters: 
	"/projects/bioinformatics/DEL/Inputs/TruSeqAdaptors.fasta"

<class 'str'> 	Ref: 
	"/projects/mgc/Project_1/MayomicsVC_MayoTesting/Reference/Homo_sapiens_assembly38.fasta"

<class 'str'> 	RefAnn: 
	"/projects/mgc/Project_

```python
def template_del_keys(config_file, key_names_list):
    """ Usage: new_lines_list = template_del_keys(config_file, key_names_list) """
    
    print('Opening:\n', config_file)
    with open(config_file, 'r') as tfh:
        lines = tfh.readlines()
    print('found %i lines'%(len(lines)))
    new_lines_list = ['{\n']
    for line in lines:
        l_key = line.strip().split('=')[0]
        for key_name in key_names_list:
            if not key_name in l_key:
                new_lines_list.append('  ' + line.strip() + '\n')
            else:
                print('removeing\n', line.strip())
                
    new_lines_list.append('}')
    outie_full_file, fext = os.path.splitext(config_file)
    outie_full_file = outie_full_file + '.cleaned' + fext
    print(outie_full_file)
    
    return new_lines_list
```

In [12]:
json_test_template = os.path.join(json_dir, 'GermlineMasterWorkflow.templateSAVEME.json')
keys_list = get_json_keys_list(json_test_template)
print(type(keys_list))
for key in keys_list:
    print(key)

<class 'list'>
{
RealignSoftMemLimit
DebugMode
Bqsr
VqsrHardMemLimit
BashSharedFunctions
BashPreamble
MergeBamEnvProfile
BashPreamble
BqsrSoftMemLimit
BashPreamble
BashSharedFunctions
Sentieon
CenterName
VqsrSoftMemLimit
BqsrHardMemLimit
AlignEnvProfile
AnnotateText
RefPac
TrimSoftMemLimit
BashSharedFunctions
Ref
HaplotyperSoftMemLimit
BqsrKnownSites
RealignHardMemLimit
DebugMode
SentieonThreads
SentieonThreads
RefFai
HaplotyperEnvProfile
SentieonThreads
MarkDuplicates
BashPreamble
RefBwt
Platform
RealignmentScript
NormalInputReads
TrimHardMemLimit
RefFai
BashPreamble
BashSharedFunctions
DebugMode
WorkflowJson
PairedEnd
BashPreamble
TrimEnvProfile
MergeBamScript
RefSa
SampleName
DedupScript
Ref
AlignSoftMemLimit
RefFai
DebugMode
Sentieon
BashSharedFunctions
Sentieon
AlignmentScript
RefPac
BashSharedFunctions
DeliveryHaplotyperVC_Script
HaplotyperVCFSourceField
DebugMode
AlignSoftMemLimit
DBSNP
VqsrIndelResourceString
DeliveryFolder_HaplotyperVC
DeliveryFolder_Alignment
VqsrScript
Platf