# Prototype script to merge "local_env.txt" with "Testonetask.json"

In [1]:
import os
import sys
import numpy as np
import pandas as pd

import yaml
import json

In [2]:
%%writefile ../../data/TestTasks/Testmutect.test_env.txt
BIOAPPS:       /usr/local/apps/bioapps
RUNDIR:        /projects/bioinformatics/DEL
RUNENV:        /projects/bioinformatics/DEL/Config
SRCDIR:        /projects/bioinformatics/DEL/MayomicsVC
GATKDATA:      /projects/bioinformatics/DataPacks/human/gatk_bundle_Oct_2017
REFERENCEDATA: /projects/bioinformatics/DataPacks/human
NORMALDATA:    /projects/mgc/Project_1/BR/MayomicsVC-Dev
TUMORDATA:     /projects/bioinformatics/MRW/MayomicsVC-Dev

Overwriting ../../data/TestTasks/Testmutect.test_env.txt


In [3]:
%%writefile ../../data/TestTasks/Testmutect.test_env_template.json
{
    "CallMutectTask.mutect.BashPreamble": "SRCDIR/src/shell/shell_preamble.sh",
    "CallMutectTask.mutect.BashSharedFunctions": "SRCDIR/src/shell/shared_functions.sh",
    "CallMutectTask.mutect.Bcftools": "BIOAPPS/bcftools/bcftools-1.5",
    "CallMutectTask.mutect.Bgzip": "BIOAPPS/bcftools/htslib-1.3.1/bin",
    "CallMutectTask.mutect.DebugMode": "",
    "CallMutectTask.mutect.FixDPScript": "SRCDIR/src/perl/fixDP.pl",
    "CallMutectTask.mutect.GatkJar": "BIOAPPS/gatk/GenomeAnalysisTK-3.8-1-0-gf15c1c3ef",
    "CallMutectTask.mutect.Java": "BIOAPPS/java/java-1.8-64bit/bin",
    "CallMutectTask.mutect.MutectEnvProfile": "RUNDIR/Config/MutectEnvProfile.file",
    "CallMutectTask.mutect.MutectExtraOptionsString": "--dbsnp REFERENCEDATA/gatk_bundle_Oct_2017/gatk_bundle_hg38/dbsnp_138.hg38.vcf",
    "CallMutectTask.mutect.MutectHardMemLimit": "100G",
    "CallMutectTask.mutect.MutectJavaMemOption": "'-Xms2G -Xmx8G'",
    "CallMutectTask.mutect.MutectScript": "SRCDIR/src/shell/mutect.sh",
    "CallMutectTask.mutect.MutectSoftMemLimit": "100G",
    "CallMutectTask.mutect.MutectThreads": "40",
    "CallMutectTask.mutect.NormalBais": "NORMALDATA/Test_Tumor-Normal_Data/normal_80x_tumor_20x_set/hg38_chr_20_21_22_TN_normal.aligned.sorted.bam.bai",
    "CallMutectTask.mutect.NormalBams": "NORMALDATA/Test_Tumor-Normal_Data/normal_80x_tumor_20x_set/hg38_chr_20_21_22_TN_normal.aligned.sorted.bam",
    "CallMutectTask.mutect.Ref": "REFERENCEDATA/Hg38_chr20_21_22_simulated_data_Jan_2018/reference_and_index/Homo_sapiens_assembly38_chr20_21_22.fasta",
    "CallMutectTask.mutect.RefAmb": "REFERENCEDATA/Hg38_chr20_21_22_simulated_data_Jan_2018/reference_and_index/Homo_sapiens_assembly38_chr20_21_22.fasta.amb",
    "CallMutectTask.mutect.RefAnn": "REFERENCEDATA/Hg38_chr20_21_22_simulated_data_Jan_2018/reference_and_index/Homo_sapiens_assembly38_chr20_21_22.fasta.ann",
    "CallMutectTask.mutect.RefBwt": "REFERENCEDATA/Hg38_chr20_21_22_simulated_data_Jan_2018/reference_and_index/Homo_sapiens_assembly38_chr20_21_22.fasta.bwt",
    "CallMutectTask.mutect.RefDict": "REFERENCEDATA/Hg38_chr20_21_22_simulated_data_Jan_2018/reference_and_index/Homo_sapiens_assembly38_chr20_21_22.dict",
    "CallMutectTask.mutect.RefFai": "REFERENCEDATA/Hg38_chr20_21_22_simulated_data_Jan_2018/reference_and_index/Homo_sapiens_assembly38_chr20_21_22.fasta.fai",
    "CallMutectTask.mutect.RefPac": "REFERENCEDATA/Hg38_chr20_21_22_simulated_data_Jan_2018/reference_and_index/Homo_sapiens_assembly38_chr20_21_22.fasta.pac",
    "CallMutectTask.mutect.RefSa": "REFERENCEDATA/Hg38_chr20_21_22_simulated_data_Jan_2018/reference_and_index/Homo_sapiens_assembly38_chr20_21_22.fasta.sa",
    "CallMutectTask.mutect.SampleName": "WGS_chr1_5X_E0.005_chunk1",
    "CallMutectTask.mutect.Samtools": "BIOAPPS/samtools/samtools-1.5",
    "CallMutectTask.mutect.TumorBais": "TUMORDATA/SharedFunctions/ID_fix/ID_FIX.aligned.sorted.bam.bai",
    "CallMutectTask.mutect.TumorBams": "TUMORDATA/SharedFunctions/ID_fix/ID_FIX.aligned.sorted.bam"
}

Overwriting ../../data/TestTasks/Testmutect.test_env_template.json


In [4]:

def get_run_file_dict(dir_data_fullfilename):
    """ Read system input arguments run directory name and run_file into a dictionary.
    
    Adapted from     --    get_run_parameters(run_directory, run_file): 
    https://github.com/KnowEnG/KnowEnG_Pipelines_Library/tree/master/knpackage/toolbox.py
    
    Args:
        run_directory: directory where run_file is expected.

    Returns:
        run_parameters: python dictionary of name - value parameters.
    """
    with open(dir_data_fullfilename, 'r') as file_handle:
        run_dirs_dict = yaml.load(file_handle)

    return run_dirs_dict


def get_localized_wdl_dict(json_fullfilename, dir_data_fullfilename):
    """ Usage: localized_cromwell_wdl_dict = get_localized_wdl_dict(json_fullfilename, dir_data_fullfilename) 
    """
    cromwell_wdl_dict = get_run_file_dict(json_fullfilename)
    relative_dirs_dict = get_run_file_dict(dir_data_fullfilename)

    localized_cromwell_wdl_dict = {}
    for wdl_key, wdl_value in cromwell_wdl_dict.items():
        localized_cromwell_wdl_dict[wdl_key] = wdl_value
        for dir_key, dir_value in relative_dirs_dict.items():
            if len(wdl_value) > 0 and dir_key in wdl_value:
                localized_cromwell_wdl_dict[wdl_key] = wdl_value.replace(dir_key, dir_value)
    
    return localized_cromwell_wdl_dict

def write_json_test_file(test_env_template_json_fullpath, 
                         test_env_yaml_fullpath, 
                         json_test_filename, 
                         json_file_path=None):
    """ write a json file with updated-localized path names from a json template and the template keys 
    Args:
        test_env_template_json_fullpath: 
        test_env_yaml_fullpath:
        json_test_filename:
        (default=cwd) json_file_path:
        
    Writes:
        json_file:                       that will work with cromwell wdl
        
    """
    if json_file_path is None or os.path.isdir(json_file_path) == False:
        # set default path to current directory
        json_file_path = os.getcwd()
        
    json_test_full_filename = os.path.join(json_file_path, json_test_filename)
    
    localized_cromwell_wdl_dict = get_localized_wdl_dict(test_env_template_json_fullpath, 
                                                         test_env_yaml_fullpath)
    
    with open(json_test_full_filename, 'w') as outfile:
        json.dump(localized_cromwell_wdl_dict, outfile, indent="    ")


In [5]:
TestTask_dir = '/Users/mojo/git_clone/dlanier/pipe_tools/data/TestTasks'
TestTask_jason_file = 'Testmutect.test_env_template.json'
TestTask_yaml_file = 'Testmutect.test_env.txt'

json_fullfilename = os.path.join(TestTask_dir, TestTask_jason_file)
data_fullfilename = os.path.join(TestTask_dir, TestTask_yaml_file)

json_test_filename = 'Testmutect.json'
json_file_path = os.getcwd()

write_json_test_file(json_fullfilename, data_fullfilename, json_test_filename, json_file_path)

In [6]:
# demonstrate get_run_file_dict on json with quotes:
TestTask_dir = '/Users/mojo/git_clone/dlanier/pipe_tools/data/TestTasks'
TestTask_jason_file = 'Testmutect.test_env_template.json'
json_fullfilename = os.path.join(TestTask_dir, TestTask_jason_file)

if os.path.isfile(json_fullfilename):
    cromwell_wdl_dict = get_run_file_dict(json_fullfilename)
    if len(cromwell_wdl_dict) > 0:
        for k, v in cromwell_wdl_dict.items():
            print('%s\n\t%s\n'%(k,v))
else:
    print(json_fullfilename, '\nNot Found')


CallMutectTask.mutect.BashPreamble
	SRCDIR/src/shell/shell_preamble.sh

CallMutectTask.mutect.BashSharedFunctions
	SRCDIR/src/shell/shared_functions.sh

CallMutectTask.mutect.Bcftools
	BIOAPPS/bcftools/bcftools-1.5

CallMutectTask.mutect.Bgzip
	BIOAPPS/bcftools/htslib-1.3.1/bin

CallMutectTask.mutect.DebugMode
	

CallMutectTask.mutect.FixDPScript
	SRCDIR/src/perl/fixDP.pl

CallMutectTask.mutect.GatkJar
	BIOAPPS/gatk/GenomeAnalysisTK-3.8-1-0-gf15c1c3ef

CallMutectTask.mutect.Java
	BIOAPPS/java/java-1.8-64bit/bin

CallMutectTask.mutect.MutectEnvProfile
	RUNDIR/Config/MutectEnvProfile.file

CallMutectTask.mutect.MutectExtraOptionsString
	--dbsnp REFERENCEDATA/gatk_bundle_Oct_2017/gatk_bundle_hg38/dbsnp_138.hg38.vcf

CallMutectTask.mutect.MutectHardMemLimit
	100G

CallMutectTask.mutect.MutectJavaMemOption
	'-Xms2G -Xmx8G'

CallMutectTask.mutect.MutectScript
	SRCDIR/src/shell/mutect.sh

CallMutectTask.mutect.MutectSoftMemLimit
	100G

CallMutectTask.mutect.MutectThreads
	40

CallMutectTask.m

In [7]:
# display get_run_file_dict on yaml file
TestTask_dir = '/Users/mojo/git_clone/dlanier/pipe_tools/data/TestTasks'
TestTask_yaml_file = 'Testmutect.test_env.txt'
dir_data_fullfilename = os.path.join(TestTask_dir, TestTask_yaml_file)

if os.path.isfile(dir_data_fullfilename):
    dirs_dict = get_run_file_dict(dir_data_fullfilename)
    for dir_alias, dir_name in dirs_dict.items():
        print('%20s: %s'%(dir_alias, dir_name))
else:
    print(dir_data_fullfilename, '\nNot found')

             BIOAPPS: /usr/local/apps/bioapps
              RUNDIR: /projects/bioinformatics/DEL
              RUNENV: /projects/bioinformatics/DEL/Config
              SRCDIR: /projects/bioinformatics/DEL/MayomicsVC
            GATKDATA: /projects/bioinformatics/DataPacks/human/gatk_bundle_Oct_2017
       REFERENCEDATA: /projects/bioinformatics/DataPacks/human
          NORMALDATA: /projects/mgc/Project_1/BR/MayomicsVC-Dev
           TUMORDATA: /projects/bioinformatics/MRW/MayomicsVC-Dev


In [8]:
# display get localized wdl dictionary function output
localized_cromwell_wdl_dict = get_localized_wdl_dict(json_fullfilename, dir_data_fullfilename)

for k, v in localized_cromwell_wdl_dict.items():
    print('%s\n%s\n'%(k,v))

CallMutectTask.mutect.BashPreamble
/projects/bioinformatics/DEL/MayomicsVC/src/shell/shell_preamble.sh

CallMutectTask.mutect.BashSharedFunctions
/projects/bioinformatics/DEL/MayomicsVC/src/shell/shared_functions.sh

CallMutectTask.mutect.Bcftools
/usr/local/apps/bioapps/bcftools/bcftools-1.5

CallMutectTask.mutect.Bgzip
/usr/local/apps/bioapps/bcftools/htslib-1.3.1/bin

CallMutectTask.mutect.DebugMode


CallMutectTask.mutect.FixDPScript
/projects/bioinformatics/DEL/MayomicsVC/src/perl/fixDP.pl

CallMutectTask.mutect.GatkJar
/usr/local/apps/bioapps/gatk/GenomeAnalysisTK-3.8-1-0-gf15c1c3ef

CallMutectTask.mutect.Java
/usr/local/apps/bioapps/java/java-1.8-64bit/bin

CallMutectTask.mutect.MutectEnvProfile
/projects/bioinformatics/DEL/Config/MutectEnvProfile.file

CallMutectTask.mutect.MutectExtraOptionsString
--dbsnp /projects/bioinformatics/DataPacks/human/gatk_bundle_Oct_2017/gatk_bundle_hg38/dbsnp_138.hg38.vcf

CallMutectTask.mutect.MutectHardMemLimit
100G

CallMutectTask.mutect.Mutect