In [87]:
import json
import yaml
import uuid
import os
from pathlib import Path

In [88]:
import pandas as pd

In [89]:
%load_ext autoreload

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [90]:
%autoreload 2

In [91]:
import wombat.pecgs as pecgs

#### make pecgs pipeline cwl

In [92]:
cwl_dir = '/home/estorrs/pecgs-pipeline/cwl/pecgs_workflows'
Path(cwl_dir).mkdir(parents=True, exist_ok=True)

###### pecgs_TN_wxs_fq

In [17]:
template = {
    'sample': 'a_string',
    'cpu': 'a_int',
    'tumor_wxs_fq_1': {
        'class': 'File',
        'path': 'a/file/path'
    },
    'tumor_wxs_fq_2': {
        'class': 'File',
        'path': 'a/file/path'
    },
    'normal_wxs_fq_1': {
        'class': 'File', 
        'path': 'a/file/path'
    },
    'normal_wxs_fq_2': {
        'class': 'File',
        'path': 'a/file/path'
    },  
    'reference': {
        'class': 'File',
        'path': 'path/to/GRCh38.d1.vd1.fa',
        'secondaryFiles': [
            {'class': 'File', 'path': 'path/to/GRCh38.d1.vd1.fa.amb'},
            {'class': 'File', 'path': 'path/to/GRCh38.d1.vd1.fa.ann'},
            {'class': 'File', 'path': 'path/to/GRCh38.d1.vd1.fa.bwt'},
            {'class': 'File', 'path': 'path/to/GRCh38.d1.vd1.fa.fai'},
            {'class': 'File', 'path': 'path/to/GRCh38.d1.vd1.fa.pac'},
            {'class': 'File', 'path': 'path/to/GRCh38.d1.vd1.fa.sa'},
            {'class': 'File', 'path': 'path/to/GRCh38.d1.vd1.dict'}
        ]
    },
    
    ## align dna-seq
    'known_sites': {
        'class': 'File',
        'path': 'path/to/vcf.gz',
        'secondaryFiles': [
            {'class': 'File', 'path': 'path/to/vcf.gz.tbi'}
        ]
    },
    'wxs_normal_flowcell': 'a_string',
    'wxs_normal_lane': 'a_string',
    'wxs_normal_index_sequencer': 'a_string',
    'wxs_normal_library_preparation': 'a_string',
    'wxs_normal_platform': 'a_string',
    'wxs_tumor_flowcell': 'a_string',
    'wxs_tumor_lane': 'a_string',
    'wxs_tumor_index_sequencer': 'a_string',
    'wxs_tumor_library_preparation': 'a_string',
    'wxs_tumor_platform': 'a_string',
    
    # CNV
    'reference_dir': {
        'class': 'Directory',
        'path': 'a/file/path'
    },
    'target_interval_list': {
        'class': 'File',
        'path': 'a/file/path'
    },
    'common_biallelic': {
        'class': 'File',
        'path': 'a/file/path'
    },
    'protein_coding_gene': {
        'class': 'File',
        'path': 'a/file/path'
    },
    'pool_of_normals': {
        'class': 'File',
        'path': 'a/file/path'
    },
    
    # msisensor
    'microsatellite': {
        'class': 'File',
        'path': 'a/file/path'
    },
    'minimal_homopolymer_size': 'a_int',
    'minimal_microsatellite_size': 'a_int',
    
    # TinDaisy
    'rescue_clinvar': False,
    'rescue_cosmic': False,
    'vep_cache_version': 'a_string',
    'vep_cache_gz': {'class': 'File', 'path': 'a/file/path'},
    'clinvar_annotation': {'class': 'File', 'path': 'a/file/path'},
    'classification_config': {'class': 'File', 'path': 'a/file/path'},
    'af_config': {'class': 'File', 'path': 'a/file/path'},
    'call_regions': {'class': 'File', 'path': 'a/file/path'},
    'canonical_BED': {'class': 'File', 'path': 'a/file/path'},
    'normal_barcode': 'a_string',
    'tumor_barcode': 'a_string',
    'tindaisy_chrlist': {'class': 'File', 'path': 'a/file/path'},
    'strelka_config': {'class': 'File', 'path': 'a/file/path'},
    'centromere_bed': {'class': 'File', 'path': 'a/file/path'},
    'assembly': 'a_string',
    'varscan_config': {'class': 'File', 'path': 'a/file/path'},
    'pindel_config': {'class': 'File', 'path': 'a/file/path'},
    
    # TinJasmine
    'sample_barcode': 'a_string',
    'centromere': {'class': 'File', 'path': 'a/file/path'},
    'tinjasmine_chrlist': {'class': 'File', 'path': 'a/file/path'},
    'Canonical_BED': {'class': 'File', 'path': 'a/file/path'},
    'ROI_BED': {'class': 'File', 'path': 'a/file/path'},
    'varscan_filter_config': {'class': 'File', 'path': 'a/file/path'},
    'pindel_filter_config': {'class': 'File', 'path': 'a/file/path'},
    'pindel_config_template': {'class': 'File', 'path': 'a/file/path'},
    'gatk_filter_config': {'class': 'File', 'path': 'a/file/path'}, 

}

In [18]:
yaml.safe_dump(template, open(os.path.join(cwl_dir, 'template.pecgs_TN_wxs_fq.yaml'), 'w'))

In [19]:
cwl = {
    'cwlVersion': 'v1.0',
    'class': 'Workflow',
    'id': 'pecgs_TN_wxs_fq',
    'label': 'pecgs_TN_wxs_fq',
    'inputs': [
        {
            'id': 'sample',
            'type': 'string',
        },
        {
            'id': 'cpu',
            'type': 'int?',
            'default': 40
        },
        {
            'id': 'tumor_wxs_fq_1',
            'type': 'File'
        },
        {
            'id': 'tumor_wxs_fq_2',
            'type': 'File'
        },
        {
            'id': 'normal_wxs_fq_1',
            'type': 'File'
        },
        {
            'id': 'normal_wxs_fq_2',
            'type': 'File'
        },
        
        # align dna-seq
        {
            'id': 'tumor_sample',
            'type': 'string?',
            'default': '$(inputs.sample).WXS.T'
        },
        {
            'id': 'normal_sample',
            'type': 'string?',
            'default': '$(inputs.sample).WXS.N'
        },
        {
            'id': 'known_sites',
            'type': 'File',
           'secondaryFiles': ['.tbi']
        },
        {
            'id': 'reference',
            'type': 'File',
            'secondaryFiles': [
               '.amb',
               '.ann',
               '.bwt',
               '.fai',
               '.pac',
               '.sa',
               '^.dict'
           ]
        },
        {
            'id': 'wxs_normal_flowcell',
            'type': 'string?',
            'default': 'flowcellABCDE'
        },
        {
            'id': 'wxs_normal_lane',
            'type': 'string?',
            'default': '1'
        },
        {
            'id': 'wxs_normal_index_sequencer',
            'type': 'string?',
            'default': 'sequencerABCDE'
        },
        {
            'id': 'wxs_normal_library_preparation',
            'type': 'string?',
            'default': 'lib1'
        },
        {
            'id': 'wxs_normal_platform',
            'type': 'string?',
            'default': 'ILLUMINA'
        },
        {
            'id': 'wxs_tumor_flowcell',
            'type': 'string?',
            'default': 'flowcellABCDE'
        },
        {
            'id': 'wxs_tumor_lane',
            'type': 'string?',
            'default': '1'
        },
        {
            'id': 'wxs_tumor_index_sequencer',
            'type': 'string?',
            'default': 'sequencerABCDE'
        },
        {
            'id': 'wxs_tumor_library_preparation',
            'type': 'string?',
            'default': 'lib1'
        },
        {
            'id': 'wxs_tumor_platform',
            'type': 'string?',
            'default': 'ILLUMINA'
        },
        
        # CNV
        {
            'id': 'reference_dir',
            'type': 'Directory',
        },
        {
            'id': 'target_interval_list',
            'type': 'File',
        },
        {
            'id': 'common_biallelic',
            'type': 'File',
        },
        {
            'id': 'protein_coding_gene',
            'type': 'File',
        },
        {
            'id': 'pool_of_normals',
            'type': 'File',
        },
        
        # msisensor
        {
            'id': 'microsatellite',
            'type': 'File',
        },
        {
            'id': 'minimal_homopolymer_size',
            'type': 'int?',
            'default': 1
        },
        {
            'id': 'minimal_microsatellite_size',
            'type': 'int?',
            'default': 1
        },
        
        # TinDaisy
        {
            'id': 'rescue_clinvar',
            'type': 'boolean?',
            'default': False,
        },
        {
            'id': 'rescue_cosmic',
            'type': 'boolean?',
            'default': False,
        },
        {
            'id': 'vep_cache_version',
            'type': 'string'
        },
        {
            'id': 'vep_cache_gz',
            'type': 'File'
        },
        {
            'id': 'clinvar_annotation',
            'type': 'File'
        },
        {
            'id': 'classification_config',
            'type': 'File'
        },
        {
            'id': 'af_config',
            'type': 'File'
        },
        {
            'id': 'call_regions',
            'type': 'File'
        },
        {
            'id': 'canonical_BED',
            'type': 'File'
        },
        {
            'id': 'normal_barcode',
            'type': 'string?',
            'default': '$(inputs.sample).N',
        },
        {
            'id': 'tumor_barcode',
            'type': 'string?',
            'default': '$(inputs.sample).T',
        },
        {
            'id': 'tindaisy_chrlist',
            'type': 'File'
        },
        {
            'id': 'strelka_config',
            'type': 'File'
        },
        {
            'id': 'centromere_bed',
            'type': 'File'
        },
        {
            'id': 'assembly',
            'type': 'string'
        },
        {
            'id': 'varscan_config',
            'type': 'File'
        },
        {
            'id': 'pindel_config',
            'type': 'File'
        },
        
        # TinJasmine
        {
            'id': 'sample_barcode',
            'type': 'string?',
            'default': '$(inputs.sample)'
        },
        {
            'id': 'centromere',
            'type': 'File'
        },
        {
            'id': 'tinjasmine_chrlist',
            'type': 'File'
        },
        {
            'id': 'Canonical_BED',
            'type': 'File'
        },
        {
            'id': 'ROI_BED',
            'type': 'File'
        },
        {
            'id': 'varscan_filter_config',
            'type': 'File'
        },
        {
            'id': 'pindel_filter_config',
            'type': 'File'
        },
        {
            'id': 'pindel_config_template',
            'type': 'File'
        },
        {
            'id': 'gatk_filter_config',
            'type': 'File'
        }
    ],
    'outputs': [
        {
            'id': 'tumor_wxs_output_bam',
            'type': 'File',
            'outputSource': 'align_tumor_wxs/output_bam',
            'secondaryFiles': ['^.bai']
        },
        {
            'id': 'normal_wxs_output_bam',
            'type': 'File',
            'outputSource': 'align_normal_wxs/output_bam',
            'secondaryFiles': ['^.bai']
        },
        {
            'id': 'gene_level_cnv',
            'type': 'File',
            'outputSource': 'run_cnv/gene_level_cnv'
        },
        {
            'id': 'msisensor_output_summary',
            'type': 'File',
            'outputSource': 'run_msisensor/output_summary'
        },
        {
            'id': 'msisensor_output_dis',
            'type': 'File',
            'outputSource': 'run_msisensor/output_dis'
        },
        {
            'id': 'msisensor_output_germline',
            'type': 'File',
            'outputSource': 'run_msisensor/output_germline'
        },
        {
            'id': 'msisensor_output_somatic',
            'type': 'File',
            'outputSource': 'run_msisensor/output_somatic'
        },
        {
            'id': 'tindaisy_output_maf_clean',
            'type': 'File',
            'outputSource': 'run_tindaisy/output_maf_clean'
        },
        {
            'id': 'tindaisy_output_vcf_clean',
            'type': 'File',
            'outputSource': 'run_tindaisy/output_vcf_clean'
        },
        {
            'id': 'tindaisy_output_vcf_all',
            'type': 'File',
            'outputSource': 'run_tindaisy/output_vcf_all'
        },
        {
            'id': 'tinjasmine_output_maf_clean',
            'type': 'File',
            'outputSource': 'run_tinjasmine/clean_MAF'
        },
        {
            'id': 'tinjasmine_output_vcf_clean',
            'type': 'File',
            'outputSource': 'run_tinjasmine/clean_VCF'
        },
        {
            'id': 'tinjasmine_output_vcf_all',
            'type': 'File',
            'outputSource': 'run_tinjasmine/allCall_VCF'
        },
    ],
    'steps': [
        {
            'id': 'align_tumor_wxs',
            'label': 'align_tumor_wxs',
            'run': '../../submodules/align-dnaseq/cwl/align_dnaseq.cwl',
            'in': [
                {'id': 'sample', 'source': 'tumor_sample'},
                {'id': 'cpu', 'source': 'cpu'},
                {'id': 'fq_1', 'source': 'tumor_wxs_fq_1'},
                {'id': 'fq_2', 'source': 'tumor_wxs_fq_2'},
                {'id': 'known_sites', 'source': 'known_sites'},
                {'id': 'reference', 'source': 'reference'},
                {'id': 'flowcell', 'source': 'wxs_tumor_flowcell'},
                {'id': 'lane', 'source': 'wxs_tumor_lane'},
                {'id': 'index_sequencer', 'source': 'wxs_tumor_index_sequencer'},
                {'id': 'library_preparation', 'source': 'wxs_tumor_library_preparation'},
                {'id': 'platform', 'source': 'wxs_tumor_platform'},  
            ],
            'out': [
                {'id': 'output_bam'}
            ]
        },
        {
            'id': 'align_normal_wxs',
            'label': 'align_normal_wxs',
            'run': '../../submodules/align-dnaseq/cwl/align_dnaseq.cwl',
            'in': [
                {'id': 'sample', 'source': 'normal_sample'},
                {'id': 'cpu', 'source': 'cpu'},
                {'id': 'fq_1', 'source': 'normal_wxs_fq_1'},
                {'id': 'fq_2', 'source': 'normal_wxs_fq_2'},
                {'id': 'known_sites', 'source': 'known_sites'},
                {'id': 'reference', 'source': 'reference'},
                {'id': 'flowcell', 'source': 'wxs_normal_flowcell'},
                {'id': 'lane', 'source': 'wxs_normal_lane'},
                {'id': 'index_sequencer', 'source': 'wxs_normal_index_sequencer'},
                {'id': 'library_preparation', 'source': 'wxs_normal_library_preparation'},
                {'id': 'platform', 'source': 'wxs_normal_platform'},  
            ],
            'out': [
                {'id': 'output_bam'}
            ]
        },
        {
            'id': 'run_cnv',
            'label': 'run_cnv',
            'run': '../../submodules/pecgs-cnv/cwl/cnv_workflow.cwl',
            'in': [
                {'id': 'sample', 'source': 'sample'},
                {'id': 'cpu', 'source': 'cpu'},
                {'id': 'tumor_bam', 'source': 'align_tumor_wxs/output_bam'},
                {'id': 'normal_bam', 'source': 'align_normal_wxs/output_bam'},
                {'id': 'reference_dir', 'source': 'reference_dir'},
                {'id': 'target_interval_list', 'source': 'target_interval_list'},
                {'id': 'common_biallelic', 'source': 'common_biallelic'},
                {'id': 'protein_coding_gene', 'source': 'protein_coding_gene'},
                {'id': 'pool_of_normals', 'source': 'pool_of_normals'},
            ],
            'out': [
                {'id': 'gene_level_cnv'}
            ]
        },
        {
            'id': 'run_msisensor',
            'label': 'run_msisensor',
            'run': '../msisensor/msisensor_workflow.cwl',
            'in': [
                {'id': 'threads', 'source': 'cpu'},
                {'id': 'tumor_bam', 'source': 'align_tumor_wxs/output_bam'},
                {'id': 'normal_bam', 'source': 'align_normal_wxs/output_bam'},
                {'id': 'microsatellite', 'source': 'microsatellite'},
                {'id': 'minimal_homopolymer_size', 'source': 'minimal_homopolymer_size'},
                {'id': 'minimal_microsatellite_size', 'source': 'minimal_microsatellite_size'},
            ],
            'out': [
                {'id': 'output_summary'},
                {'id': 'output_dis'},
                {'id': 'output_germline'},
                {'id': 'output_somatic'},
            ]
        },
        {
            'id': 'run_tindaisy',
            'label': 'run_tindaisy',
            'run': '../../submodules/TinDaisy/cwl/workflows/tindaisy2.cwl',
            'in': [
                {'id': 'tumor_bam', 'source': 'align_tumor_wxs/output_bam'},
                {'id': 'normal_bam', 'source': 'align_normal_wxs/output_bam'},
                {'id': 'reference_fasta', 'source': 'reference'},
                {'id': 'pindel_config', 'source': 'pindel_config'},
                {'id': 'varscan_config', 'source': 'varscan_config'},
                {'id': 'assembly', 'source': 'assembly'},
                {'id': 'centromere_bed', 'source': 'centromere_bed'},
                {'id': 'strelka_config', 'source': 'strelka_config'},
                {'id': 'chrlist', 'source': 'tindaisy_chrlist'},
                {'id': 'tumor_barcode', 'source': 'tumor_barcode'},
                {'id': 'normal_barcode', 'source': 'normal_barcode'},
                {'id': 'canonical_BED', 'source': 'canonical_BED'},
                {'id': 'call_regions', 'source': 'call_regions'},
                {'id': 'af_config', 'source': 'af_config'},
                {'id': 'classification_config', 'source': 'classification_config'},
                {'id': 'clinvar_annotation', 'source': 'clinvar_annotation'},
                {'id': 'vep_cache_gz', 'source': 'vep_cache_gz'},
                {'id': 'vep_cache_version', 'source': 'vep_cache_version'},
                {'id': 'rescue_cosmic', 'source': 'rescue_cosmic'},
                {'id': 'rescue_clinvar', 'source': 'rescue_clinvar'},
            ],
            'out': [
                {'id': 'output_maf_clean'},
                {'id': 'output_vcf_clean'},
                {'id': 'output_vcf_all'},
            ]
        },
        {
            'id': 'run_tinjasmine',
            'label': 'run_tinjasmine',
            'run': '../../submodules/TinJasmine/cwl/TinJasmine.cwl',
            'in': [
                {'id': 'sample_barcode', 'source': 'normal_barcode'},
                {'id': 'bam', 'source': 'align_normal_wxs/output_bam'},
                {'id': 'reference', 'source': 'reference'},
                {'id': 'gatk_filter_config', 'source': 'gatk_filter_config'},
                {'id': 'pindel_config_template', 'source': 'pindel_config_template'},
                {'id': 'pindel_filter_config', 'source': 'pindel_filter_config'},
                {'id': 'varscan_filter_config', 'source': 'varscan_filter_config'},
                {'id': 'ROI_BED', 'source': 'ROI_BED'},
                {'id': 'vep_cache_gz', 'source': 'vep_cache_gz'},
                {'id': 'vep_cache_version', 'source': 'vep_cache_version'},
                {'id': 'assembly', 'source': 'assembly'},
                {'id': 'Canonical_BED', 'source': 'Canonical_BED'},
                {'id': 'chrlist', 'source': 'tinjasmine_chrlist'},
                {'id': 'centromere', 'source': 'centromere'},
            ],
            'out': [
                {'id': 'clean_VCF'},
                {'id': 'allCall_VCF'},
                {'id': 'clean_MAF'},
            ]
        },
        
    ],
   'requirements': [
   ]
}

In [20]:
yaml.safe_dump(cwl, open(os.path.join(cwl_dir, 'pecgs_TN_wxs_fq.cwl'), 'w'))

###### pecgs_TN_wxs_bam

In [26]:
template = {
    'sample': 'a_string',
    'cpu': 'a_int',
    'tumor_wxs_bam': {
        'class': 'File',
        'path': 'a/file/path'
    },
    'normal_wxs_bam': {
        'class': 'File',
        'path': 'a/file/path'
    }, 
    'reference': {
        'class': 'File',
        'path': 'path/to/GRCh38.d1.vd1.fa',
        'secondaryFiles': [
            {'class': 'File', 'path': 'path/to/GRCh38.d1.vd1.fa.amb'},
            {'class': 'File', 'path': 'path/to/GRCh38.d1.vd1.fa.ann'},
            {'class': 'File', 'path': 'path/to/GRCh38.d1.vd1.fa.bwt'},
            {'class': 'File', 'path': 'path/to/GRCh38.d1.vd1.fa.fai'},
            {'class': 'File', 'path': 'path/to/GRCh38.d1.vd1.fa.pac'},
            {'class': 'File', 'path': 'path/to/GRCh38.d1.vd1.fa.sa'},
            {'class': 'File', 'path': 'path/to/GRCh38.d1.vd1.dict'}
        ]
    },
    
    # CNV
    'reference_dir': {
        'class': 'Directory',
        'path': 'a/file/path'
    },
    'target_interval_list': {
        'class': 'File',
        'path': 'a/file/path'
    },
    'common_biallelic': {
        'class': 'File',
        'path': 'a/file/path'
    },
    'protein_coding_gene': {
        'class': 'File',
        'path': 'a/file/path'
    },
    'pool_of_normals': {
        'class': 'File',
        'path': 'a/file/path'
    },
    
    # msisensor
    'microsatellite': {
        'class': 'File',
        'path': 'a/file/path'
    },
    'minimal_homopolymer_size': 'a_int',
    'minimal_microsatellite_size': 'a_int',
    
    # TinDaisy
    'rescue_clinvar': False,
    'rescue_cosmic': False,
    'vep_cache_version': 'a_string',
    'vep_cache_gz': {'class': 'File', 'path': 'a/file/path'},
    'clinvar_annotation': {'class': 'File', 'path': 'a/file/path'},
    'classification_config': {'class': 'File', 'path': 'a/file/path'},
    'af_config': {'class': 'File', 'path': 'a/file/path'},
    'call_regions': {'class': 'File', 'path': 'a/file/path'},
    'canonical_BED': {'class': 'File', 'path': 'a/file/path'},
    'normal_barcode': 'a_string',
    'tumor_barcode': 'a_string',
    'tindaisy_chrlist': {'class': 'File', 'path': 'a/file/path'},
    'strelka_config': {'class': 'File', 'path': 'a/file/path'},
    'centromere_bed': {'class': 'File', 'path': 'a/file/path'},
    'assembly': 'a_string',
    'varscan_config': {'class': 'File', 'path': 'a/file/path'},
    'pindel_config': {'class': 'File', 'path': 'a/file/path'},
    
    # TinJasmine
    'sample_barcode': 'a_string',
    'centromere': {'class': 'File', 'path': 'a/file/path'},
    'tinjasmine_chrlist': {'class': 'File', 'path': 'a/file/path'},
    'Canonical_BED': {'class': 'File', 'path': 'a/file/path'},
    'ROI_BED': {'class': 'File', 'path': 'a/file/path'},
    'varscan_filter_config': {'class': 'File', 'path': 'a/file/path'},
    'pindel_filter_config': {'class': 'File', 'path': 'a/file/path'},
    'pindel_config_template': {'class': 'File', 'path': 'a/file/path'},
    'gatk_filter_config': {'class': 'File', 'path': 'a/file/path'}, 

}

In [27]:
yaml.safe_dump(template, open(os.path.join(cwl_dir, 'template.pecgs_TN_wxs_bam.yaml'), 'w'))

In [28]:
cwl = {
    'cwlVersion': 'v1.0',
    'class': 'Workflow',
    'id': 'pecgs_TN_wxs_bam',
    'label': 'pecgs_TN_wxs_bam',
    'inputs': [
        {
            'id': 'sample',
            'type': 'string',
        },
        {
            'id': 'cpu',
            'type': 'int?',
            'default': 40
        },
        {
            'id': 'tumor_wxs_bam',
            'type': 'File'
        },
        {
            'id': 'normal_wxs_bam',
            'type': 'File'
        },
        
        {
            'id': 'reference',
            'type': 'File',
            'secondaryFiles': [
               '.amb',
               '.ann',
               '.bwt',
               '.fai',
               '.pac',
               '.sa',
               '^.dict'
           ]
        },
        
        # CNV
        {
            'id': 'reference_dir',
            'type': 'Directory',
        },
        {
            'id': 'target_interval_list',
            'type': 'File',
        },
        {
            'id': 'common_biallelic',
            'type': 'File',
        },
        {
            'id': 'protein_coding_gene',
            'type': 'File',
        },
        {
            'id': 'pool_of_normals',
            'type': 'File',
        },
        
        # msisensor
        {
            'id': 'microsatellite',
            'type': 'File',
        },
        {
            'id': 'minimal_homopolymer_size',
            'type': 'int?',
            'default': 1
        },
        {
            'id': 'minimal_microsatellite_size',
            'type': 'int?',
            'default': 1
        },
        
        # TinDaisy
        {
            'id': 'rescue_clinvar',
            'type': 'boolean?',
            'default': False,
        },
        {
            'id': 'rescue_cosmic',
            'type': 'boolean?',
            'default': False,
        },
        {
            'id': 'vep_cache_version',
            'type': 'string'
        },
        {
            'id': 'vep_cache_gz',
            'type': 'File'
        },
        {
            'id': 'clinvar_annotation',
            'type': 'File'
        },
        {
            'id': 'classification_config',
            'type': 'File'
        },
        {
            'id': 'af_config',
            'type': 'File'
        },
        {
            'id': 'call_regions',
            'type': 'File'
        },
        {
            'id': 'canonical_BED',
            'type': 'File'
        },
        {
            'id': 'normal_barcode',
            'type': 'string?',
            'default': '$(inputs.sample).N',
        },
        {
            'id': 'tumor_barcode',
            'type': 'string?',
            'default': '$(inputs.sample).T',
        },
        {
            'id': 'tindaisy_chrlist',
            'type': 'File'
        },
        {
            'id': 'strelka_config',
            'type': 'File'
        },
        {
            'id': 'centromere_bed',
            'type': 'File'
        },
        {
            'id': 'assembly',
            'type': 'string'
        },
        {
            'id': 'varscan_config',
            'type': 'File'
        },
        {
            'id': 'pindel_config',
            'type': 'File'
        },
        
        # TinJasmine
        {
            'id': 'sample_barcode',
            'type': 'string?',
            'default': '$(inputs.sample)'
        },
        {
            'id': 'centromere',
            'type': 'File'
        },
        {
            'id': 'tinjasmine_chrlist',
            'type': 'File'
        },
        {
            'id': 'Canonical_BED',
            'type': 'File'
        },
        {
            'id': 'ROI_BED',
            'type': 'File'
        },
        {
            'id': 'varscan_filter_config',
            'type': 'File'
        },
        {
            'id': 'pindel_filter_config',
            'type': 'File'
        },
        {
            'id': 'pindel_config_template',
            'type': 'File'
        },
        {
            'id': 'gatk_filter_config',
            'type': 'File'
        }
    ],
    'outputs': [
        {
            'id': 'gene_level_cnv',
            'type': 'File',
            'outputSource': 'run_cnv/gene_level_cnv'
        },
        {
            'id': 'msisensor_output_summary',
            'type': 'File',
            'outputSource': 'run_msisensor/output_summary'
        },
        {
            'id': 'msisensor_output_dis',
            'type': 'File',
            'outputSource': 'run_msisensor/output_dis'
        },
        {
            'id': 'msisensor_output_germline',
            'type': 'File',
            'outputSource': 'run_msisensor/output_germline'
        },
        {
            'id': 'msisensor_output_somatic',
            'type': 'File',
            'outputSource': 'run_msisensor/output_somatic'
        },
        {
            'id': 'tindaisy_output_maf_clean',
            'type': 'File',
            'outputSource': 'run_tindaisy/output_maf_clean'
        },
        {
            'id': 'tindaisy_output_vcf_clean',
            'type': 'File',
            'outputSource': 'run_tindaisy/output_vcf_clean'
        },
        {
            'id': 'tindaisy_output_vcf_all',
            'type': 'File',
            'outputSource': 'run_tindaisy/output_vcf_all'
        },
        {
            'id': 'tinjasmine_output_maf_clean',
            'type': 'File',
            'outputSource': 'run_tinjasmine/clean_MAF'
        },
        {
            'id': 'tinjasmine_output_vcf_clean',
            'type': 'File',
            'outputSource': 'run_tinjasmine/clean_VCF'
        },
        {
            'id': 'tinjasmine_output_vcf_all',
            'type': 'File',
            'outputSource': 'run_tinjasmine/allCall_VCF'
        },
    ],
    'steps': [
        {
            'id': 'run_cnv',
            'label': 'run_cnv',
            'run': '../../submodules/pecgs-cnv/cwl/cnv_workflow.cwl',
            'in': [
                {'id': 'sample', 'source': 'sample'},
                {'id': 'cpu', 'source': 'cpu'},
                {'id': 'tumor_bam', 'source': 'tumor_wxs_bam'},
                {'id': 'normal_bam', 'source': 'normal_wxs_bam'},
                {'id': 'reference_dir', 'source': 'reference_dir'},
                {'id': 'target_interval_list', 'source': 'target_interval_list'},
                {'id': 'common_biallelic', 'source': 'common_biallelic'},
                {'id': 'protein_coding_gene', 'source': 'protein_coding_gene'},
                {'id': 'pool_of_normals', 'source': 'pool_of_normals'},
            ],
            'out': [
                {'id': 'gene_level_cnv'}
            ]
        },
        {
            'id': 'run_msisensor',
            'label': 'run_msisensor',
            'run': '../msisensor/msisensor_workflow.cwl',
            'in': [
                {'id': 'threads', 'source': 'cpu'},
                {'id': 'tumor_bam', 'source': 'tumor_wxs_bam'},
                {'id': 'normal_bam', 'source': 'normal_wxs_bam'},
                {'id': 'microsatellite', 'source': 'microsatellite'},
                {'id': 'minimal_homopolymer_size', 'source': 'minimal_homopolymer_size'},
                {'id': 'minimal_microsatellite_size', 'source': 'minimal_microsatellite_size'},
            ],
            'out': [
                {'id': 'output_summary'},
                {'id': 'output_dis'},
                {'id': 'output_germline'},
                {'id': 'output_somatic'},
            ]
        },
        {
            'id': 'run_tindaisy',
            'label': 'run_tindaisy',
            'run': '../../submodules/TinDaisy/cwl/workflows/tindaisy2.cwl',
            'in': [
                {'id': 'tumor_bam', 'source': 'tumor_wxs_bam'},
                {'id': 'normal_bam', 'source': 'normal_wxs_bam'},
                {'id': 'reference_fasta', 'source': 'reference'},
                {'id': 'pindel_config', 'source': 'pindel_config'},
                {'id': 'varscan_config', 'source': 'varscan_config'},
                {'id': 'assembly', 'source': 'assembly'},
                {'id': 'centromere_bed', 'source': 'centromere_bed'},
                {'id': 'strelka_config', 'source': 'strelka_config'},
                {'id': 'chrlist', 'source': 'tindaisy_chrlist'},
                {'id': 'tumor_barcode', 'source': 'tumor_barcode'},
                {'id': 'normal_barcode', 'source': 'normal_barcode'},
                {'id': 'canonical_BED', 'source': 'canonical_BED'},
                {'id': 'call_regions', 'source': 'call_regions'},
                {'id': 'af_config', 'source': 'af_config'},
                {'id': 'classification_config', 'source': 'classification_config'},
                {'id': 'clinvar_annotation', 'source': 'clinvar_annotation'},
                {'id': 'vep_cache_gz', 'source': 'vep_cache_gz'},
                {'id': 'vep_cache_version', 'source': 'vep_cache_version'},
                {'id': 'rescue_cosmic', 'source': 'rescue_cosmic'},
                {'id': 'rescue_clinvar', 'source': 'rescue_clinvar'},
            ],
            'out': [
                {'id': 'output_maf_clean'},
                {'id': 'output_vcf_clean'},
                {'id': 'output_vcf_all'},
            ]
        },
        {
            'id': 'run_tinjasmine',
            'label': 'run_tinjasmine',
            'run': '../../submodules/TinJasmine/cwl/TinJasmine.cwl',
            'in': [
                {'id': 'sample_barcode', 'source': 'normal_barcode'},
                {'id': 'bam', 'source': 'normal_wxs_bam'},
                {'id': 'reference', 'source': 'reference'},
                {'id': 'gatk_filter_config', 'source': 'gatk_filter_config'},
                {'id': 'pindel_config_template', 'source': 'pindel_config_template'},
                {'id': 'pindel_filter_config', 'source': 'pindel_filter_config'},
                {'id': 'varscan_filter_config', 'source': 'varscan_filter_config'},
                {'id': 'ROI_BED', 'source': 'ROI_BED'},
                {'id': 'vep_cache_gz', 'source': 'vep_cache_gz'},
                {'id': 'vep_cache_version', 'source': 'vep_cache_version'},
                {'id': 'assembly', 'source': 'assembly'},
                {'id': 'Canonical_BED', 'source': 'Canonical_BED'},
                {'id': 'chrlist', 'source': 'tinjasmine_chrlist'},
                {'id': 'centromere', 'source': 'centromere'},
            ],
            'out': [
                {'id': 'clean_VCF'},
                {'id': 'allCall_VCF'},
                {'id': 'clean_MAF'},
            ]
        },
        
    ],
   'requirements': [
   ]
}

In [29]:
yaml.safe_dump(cwl, open(os.path.join(cwl_dir, 'pecgs_TN_wxs_bam.cwl'), 'w'))

###### pecgs_T_rna_fq

In [138]:
template = {
    'sample': 'a_string',
    'cpu': 'a_int',
    'tumor_rna_fq_1': {
        'class': 'File',
        'path': 'a/file/path'
    },
    'tumor_rna_fq_2': {
        'class': 'File',
        'path': 'a/file/path'
    },   
    
    # fusion
    'genome_lib_dir': {
        'class': 'Directory',
        'path': 'a/file/path'
    },
    'genome_db': {
        'class': 'Directory',
        'path': 'a/file/path'
    },
    'bwts': {
        'class': 'Directory',
        'path': 'a/file/path'
    },
    'integrate_executable': {
        'class': 'File',
        'path': 'a/file/path'
    },
    'integrate_fasta': {
        'class': 'File',
        'path': 'a/file/path'
    },
    'integrate_annotations': {
        'class': 'File',
        'path': 'a/file/path'
    },
    'filter_database': {
        'class': 'Directory',
        'path': 'a/file/path'
    },
    'fusion_annotator_dir': {
        'class': 'Directory',
        'path': 'a/file/path'
    },
    
    # bulk expression
    'bulk_expression_fq_1_list': [
        {
            'class': 'File',
            'path': 'a/file/path'
        }
    ],
    'bulk_expression_fq_2_list': [
        {
            'class': 'File',
            'path': 'a/file/path'
        }
    ],
    'star_index': {
        'class': 'Directory',
        'path': 'a/dir/path'
    },
    'gtf': {
        'class': 'File',
        'path': 'a/file/path'
    },
    'gene_info': {
        'class': 'File',
        'path': 'a/file/path'
    },
}

In [139]:
yaml.safe_dump(template, open(os.path.join(cwl_dir, 'template.pecgs_T_rna_fq.yaml'), 'w'))

In [136]:
cwl = {
    'cwlVersion': 'v1.0',
    'class': 'Workflow',
    'id': 'pecgs_T_rna_fq',
    'label': 'pecgs_T_rna_fq',
    'inputs': [
        {
            'id': 'sample',
            'type': 'string',
        },
        {
            'id': 'cpu',
            'type': 'int?',
            'default': 40
        },
        {
            'id': 'tumor_rna_fq_1',
            'type': 'File'
        },
        {
            'id': 'tumor_rna_fq_2',
            'type': 'File'
        },
        
        # fusion
        {
            'id': 'genome_lib_dir',
            'type': 'Directory',
        },
        {
            'id': 'genome_db',
            'type': 'Directory',
        },
        {
            'id': 'bwts',
            'type': 'Directory',
        },
        {
            'id': 'integrate_executable',
            'type': 'File',
        },
        {
            'id': 'integrate_fasta',
            'type': 'File',
        },
        {
            'id': 'integrate_annotations',
            'type': 'File',
        },
        {
            'id': 'filter_database',
            'type': 'Directory',
        },
        {
            'id': 'fusion_annotator_dir',
            'type': 'Directory',
        },
        
        # bulk_expression
        {
            'id': 'bulk_expression_fq_1_list',
            'type': 'File[]'
        },
        {
            'id': 'bulk_expression_fq_2_list',
            'type': 'File[]'
        },
        {
            'id': 'star_index',
            'type': 'Directory',
        },
        {
            'id': 'gtf',
            'type': 'File',
        },
        { 
            'id': 'gene_info',
            'type': 'File',
        },
    ],
    'outputs': [
        {
            'id': 'filtered_fusions',
            'type': 'File',
            'outputSource': 'run_fusion/filtered_fusions'
        },
        {
            'id': 'total_fusions',
            'type': 'File',
            'outputSource': 'run_fusion/total_fusions'
        },
        {
            'id': 'readcounts_and_fpkm_tsv',
            'type': 'File',
            'outputSource': 'run_bulk_expression/readcounts_and_fpkm_tsv'
        },
        {
            'id': 'output_bam',
            'type': 'File',
            'outputSource': 'run_bulk_expression/output_bam'
        },
    ],
    'steps': [
        {
            'id': 'run_fusion',
            'label': 'run_fusion',
            'run': '../../submodules/pecgs-fusion/cwl/fusion.cwl',
            'in': [
                {'id': 'sample', 'source': 'sample'},
                {'id': 'cpu', 'source': 'cpu'},
                {'id': 'fq_1', 'source': 'tumor_rna_fq_1'},
                {'id': 'fq_2', 'source': 'tumor_rna_fq_2'},
                {'id': 'filter_database', 'source': 'filter_database'},
                {'id': 'bwts', 'source': 'bwts'},
                {'id': 'fusion_annotator_dir', 'source': 'fusion_annotator_dir'},
                {'id': 'genome_db', 'source': 'genome_db'},
                {'id': 'genome_lib_dir', 'source': 'genome_lib_dir'},
                {'id': 'integrate_annotations', 'source': 'integrate_annotations'},
                {'id': 'integrate_executable', 'source': 'integrate_executable'},
                {'id': 'integrate_fasta', 'source': 'integrate_fasta'}
            ],
            'out': [
                {'id': 'filtered_fusions'},
                {'id': 'total_fusions'}
            ]
        },
        { 
            'id': 'run_bulk_expression',
            'label': 'run_bulk_expression',
            'run': '../../submodules/pecgs-bulk-expression/cwl/bulk_expression.cwl',
            'in': [
                {'id': 'cpu', 'source': 'cpu'},
                {'id': 'fq_1', 'source': 'bulk_expression_fq_1_list'},
                {'id': 'fq_2', 'source': 'bulk_expression_fq_2_list'},
                {'id': 'star_index', 'source': 'star_index'},
                {'id': 'gtf', 'source': 'gtf'},
                {'id': 'gene_info', 'source': 'gene_info'},
            ],
            'out': [
                {'id': 'readcounts_and_fpkm_tsv'},
                {'id': 'output_bam'}
            ]
        }, 
    ],
   'requirements': [
#        {'class': 'MultipleInputFeatureRequirement'}
   ]
}

In [137]:
yaml.safe_dump(cwl, open(os.path.join(cwl_dir, 'pecgs_T_rna_fq.cwl'), 'w'))

#### test runs

###### test run pecgs_TN_wxs_fq

In [31]:
run_list = pd.read_csv('../tests/data/pecgs_TN_wxs_fq/run_list.txt', sep='\t', index_col='run_id')
run_list

Unnamed: 0_level_0,case_id,run_uuid,wxs_normal_R1.filepath,wxs_normal_R1.uuid,wxs_normal_R2.filepath,wxs_normal_R2.uuid,wxs_tumor_R1.filepath,wxs_tumor_R1.uuid,wxs_tumor_R2.filepath,wxs_tumor_R2.uuid
run_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
HT191P1-S1H1A3Y3_afe9f9e5-2921-4539-9578-fd01b71c6c1e,HT191P1-S1H1A3Y3,afe9f9e5-2921-4539-9578-fd01b71c6c1e,/scratch1/fs1/dinglab/estorrs/pecgs_resources/...,eafb3178-e459-44c0-a374-8e17a8107fce,/scratch1/fs1/dinglab/estorrs/pecgs_resources/...,9923349f-dc3c-40d4-a097-803c4828f732,/scratch1/fs1/dinglab/estorrs/pecgs_resources/...,78777884-505c-4b2f-9170-107662c1effa,/scratch1/fs1/dinglab/estorrs/pecgs_resources/...,2699158f-25fe-4f19-a7cf-2a4e1cf687b3


In [33]:
sequencing_info = pd.read_csv('../tests/data/pecgs_TN_wxs_fq/sequencing_info.txt', sep='\t', index_col='run_id')
sequencing_info

Unnamed: 0_level_0,sample_id,run_uuid,experimental_strategy,sample_type,flowcell,lane,index_sequencer,library_preparation,platform
run_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
HT191P1-S1H1A3Y3_419b8098-e4b2-4318-883d-c233cb7e06c8,HT191P1-S1H1A3Y3,419b8098-e4b2-4318-883d-c233cb7e06c8,wxs,tumor,HFMFWDSXY,2,CCAGTAGCGT-ATGTATTGGC,TWCE-HT191P1-S1H1A3Y3D1_1-lib1,ILLUMINA
HT191P1-S1H1A3Y3_419b8098-e4b2-4318-883d-c233cb7e06c8,HT191P1-S1H1A3Y3,419b8098-e4b2-4318-883d-c233cb7e06c8,wxs,normal,HH7KNDSXY,1,CATTATCGCT-CTTGAAGGTT,TWCE-HT191P1-JM1D1_1-lib1,ILLUMINA


In [50]:
run_dir = '/scratch1/fs1/dinglab/estorrs/cromwell-data/pecgs/testing/pecgs_TN_wxs_fq'
tool_root = '/storage1/fs1/dinglab/Active/Projects/estorrs/pecgs-pipeline'
out_dir = '../tests/data/pecgs_TN_wxs_fq'
run_list = 'run_list.txt'
sequencing_info = 'sequencing_info.txt'
pipeline_variant = 'pecgs_TN_wxs_fq'

In [51]:
launch_cmds, make_cmd, tidy_cmd, summarize_cmd = pecgs.create_run_setup_scripts(
    tool_root, out_dir, pipeline_variant, run_list, run_dir, sequencing_info=sequencing_info)

In [52]:
for c in launch_cmds: print(c)

export LSF_DOCKER_VOLUMES="/storage1/fs1/dinglab/Active:/storage1/fs1/dinglab/Active /scratch1/fs1/dinglab:/scratch1/fs1/dinglab"
export PATH="/miniconda/envs/pecgs/bin:$PATH"
bsub -q dinglab-interactive -G compute-dinglab -Is -a 'docker(estorrs/pecgs-pipeline:0.0.1)' '/bin/bash'


In [53]:
make_cmd

'python /storage1/fs1/dinglab/Active/Projects/estorrs/pecgs-pipeline/src/compute1/generate_run_commands.py make-run --sequencing-info sequencing_info.txt pecgs_TN_wxs_fq run_list.txt /scratch1/fs1/dinglab/estorrs/cromwell-data/pecgs/testing/pecgs_TN_wxs_fq'

In [54]:
tidy_cmd

'python /storage1/fs1/dinglab/Active/Projects/estorrs/pecgs-pipeline/src/compute1/generate_run_commands.py tidy-run pecgs_TN_wxs_fq run_list.txt /scratch1/fs1/dinglab/estorrs/cromwell-data/pecgs/testing/pecgs_TN_wxs_fq'

In [55]:
summarize_cmd

'python /storage1/fs1/dinglab/Active/Projects/estorrs/pecgs-pipeline/src/compute1/generate_run_commands.py summarize-run pecgs_TN_wxs_fq run_list.txt /scratch1/fs1/dinglab/estorrs/cromwell-data/pecgs/testing/pecgs_TN_wxs_fq'

###### test run pecgs_TN_wxs_bam

In [80]:
run_list = pd.read_csv('../tests/data/pecgs_TN_wxs_bam/run_list.txt', sep='\t', index_col='run_id')
run_list

Unnamed: 0_level_0,case_id,run_uuid,wxs_normal_bam.filepath,wxs_normal_bam.uuid,wxs_tumor_bam.filepath,wxs_tumor_bam.uuid
run_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
C3L-00677_2a9edf1e-76df-4595-9125-fb9b9d919621,C3L-00677,2a9edf1e-76df-4595-9125-fb9b9d919621,/storage1/fs1/dinglab/Active/Projects/estorrs/...,8919accb-6973-4ec5-b29f-9d895a1713d4,/storage1/fs1/dinglab/Active/Projects/estorrs/...,6e0bde4a-22c1-4e1f-a38a-dff275b61472


In [81]:
run_dir = '/scratch1/fs1/dinglab/estorrs/cromwell-data/pecgs/testing/pecgs_TN_wxs_bam'
tool_root = '/storage1/fs1/dinglab/Active/Projects/estorrs/pecgs-pipeline'
out_dir = '../tests/data/pecgs_TN_wxs_bam'
run_list = 'run_list.txt'
pipeline_variant = 'pecgs_TN_wxs_bam'

In [82]:
launch_cmds, make_cmd, tidy_cmd, summarize_cmd = pecgs.create_run_setup_scripts(
    tool_root, out_dir, pipeline_variant, run_list, run_dir)

In [83]:
for c in launch_cmds: print(c)

export LSF_DOCKER_VOLUMES="/storage1/fs1/dinglab/Active:/storage1/fs1/dinglab/Active /scratch1/fs1/dinglab:/scratch1/fs1/dinglab"
export PATH="/miniconda/envs/pecgs/bin:$PATH"
bsub -q dinglab-interactive -G compute-dinglab -Is -a 'docker(estorrs/pecgs-pipeline:0.0.1)' '/bin/bash'


In [84]:
make_cmd

'python /storage1/fs1/dinglab/Active/Projects/estorrs/pecgs-pipeline/src/compute1/generate_run_commands.py make-run pecgs_TN_wxs_bam run_list.txt /scratch1/fs1/dinglab/estorrs/cromwell-data/pecgs/testing/pecgs_TN_wxs_bam'

In [85]:
tidy_cmd

'python /storage1/fs1/dinglab/Active/Projects/estorrs/pecgs-pipeline/src/compute1/generate_run_commands.py tidy-run pecgs_TN_wxs_bam run_list.txt /scratch1/fs1/dinglab/estorrs/cromwell-data/pecgs/testing/pecgs_TN_wxs_bam'

In [86]:
summarize_cmd

'python /storage1/fs1/dinglab/Active/Projects/estorrs/pecgs-pipeline/src/compute1/generate_run_commands.py summarize-run pecgs_TN_wxs_bam run_list.txt /scratch1/fs1/dinglab/estorrs/cromwell-data/pecgs/testing/pecgs_TN_wxs_bam'

###### test run pecgs_T_rna_fq

In [126]:
run_list = pd.read_csv('../tests/data/pecgs_T_rna_fq/run_list.txt', sep='\t', index_col='run_id')
run_list

Unnamed: 0_level_0,case_id,run_uuid,rna-seq_tumor_R1.filepath,rna-seq_tumor_R1.uuid,rna-seq_tumor_R2.filepath,rna-seq_tumor_R2.uuid
run_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
HT191P1-S1H1A3Y3_be6b68e6-365d-4451-8195-f1b3d5bb5519,HT191P1-S1H1A3Y3,be6b68e6-365d-4451-8195-f1b3d5bb5519,/scratch1/fs1/dinglab/estorrs/pecgs_resources/...,fb1f4b8a-a56b-4bc6-b059-864c2b066dc8,/scratch1/fs1/dinglab/estorrs/pecgs_resources/...,80ce8102-e8a4-4529-85b0-701a4dc022ea


In [127]:
run_dir = '/scratch1/fs1/dinglab/estorrs/cromwell-data/pecgs/testing/pecgs_T_rna_fq'
tool_root = '/storage1/fs1/dinglab/Active/Projects/estorrs/pecgs-pipeline'
out_dir = '../tests/data/pecgs_T_rna_fq'
run_list = 'run_list.txt'
pipeline_variant = 'pecgs_T_rna_fq'
queue = 'dinglab'

In [128]:
launch_cmds, make_cmd, tidy_cmd, summarize_cmd = pecgs.create_run_setup_scripts(
    tool_root, out_dir, pipeline_variant, run_list, run_dir, queue=queue)

In [129]:
make_cmd

'python /storage1/fs1/dinglab/Active/Projects/estorrs/pecgs-pipeline/src/compute1/generate_run_commands.py make-run  --queue dinglab pecgs_T_rna_fq run_list.txt /scratch1/fs1/dinglab/estorrs/cromwell-data/pecgs/testing/pecgs_T_rna_fq'