In [2]:
import json
import yaml
import uuid
import os
from pathlib import Path

In [3]:
import pandas as pd

In [4]:
%load_ext autoreload

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [5]:
%autoreload 2

In [11]:
import wombat.pecgs as pecgs

###### make pecgs pipeline cwl

In [26]:
cwl_dir = '/home/estorrs/pecgs-pipeline/cwl/pecgs_workflows'
Path(cwl_dir).mkdir(parents=True, exist_ok=True)

In [27]:
template = {
    'sample': 'a_string',
    'cpu': 'a_int',
    'tumor_wxs_fq_1': {
        'class': 'File',
        'path': 'a/file/path'
    },
    'tumor_wxs_fq_2': {
        'class': 'File',
        'path': 'a/file/path'
    },
    'normal_wxs_fq_1': {
        'class': 'File', 
        'path': 'a/file/path'
    },
    'normal_wxs_fq_2': {
        'class': 'File',
        'path': 'a/file/path'
    },
    'tumor_rna_fq_1': {
        'class': 'File',
        'path': 'a/file/path'
    },
    'tumor_rna_fq_2': {
        'class': 'File',
        'path': 'a/file/path'
    },
    
    ## align dna-seq
    'reference': {
        'class': 'File',
        'path': 'path/to/GRCh38.d1.vd1.fa',
        'secondaryFiles': [
            {'class': 'File', 'path': 'path/to/GRCh38.d1.vd1.fa.amb'},
            {'class': 'File', 'path': 'path/to/GRCh38.d1.vd1.fa.ann'},
            {'class': 'File', 'path': 'path/to/GRCh38.d1.vd1.fa.bwt'},
            {'class': 'File', 'path': 'path/to/GRCh38.d1.vd1.fa.fai'},
            {'class': 'File', 'path': 'path/to/GRCh38.d1.vd1.fa.pac'},
            {'class': 'File', 'path': 'path/to/GRCh38.d1.vd1.fa.sa'},
            {'class': 'File', 'path': 'path/to/GRCh38.d1.vd1.dict'}
        ]
    },
    'known_sites': {
        'class': 'File',
        'path': 'path/to/vcf.gz',
        'secondaryFiles': [
            {'class': 'File', 'path': 'path/to/vcf.gz.tbi'}
        ]
    },
    'wxs_normal_flowcell': 'a_string',
    'wxs_normal_lane': 'a_string',
    'wxs_normal_index_sequencer': 'a_string',
    'wxs_normal_library_preparation': 'a_string',
    'wxs_normal_platform': 'a_string',
    'wxs_tumor_flowcell': 'a_string',
    'wxs_tumor_lane': 'a_string',
    'wxs_tumor_index_sequencer': 'a_string',
    'wxs_tumor_library_preparation': 'a_string',
    'wxs_tumor_platform': 'a_string',
    
    # fusion
    'genome_lib_dir': {
        'class': 'Directory',
        'path': 'a/file/path'
    },
    'genome_db': {
        'class': 'Directory',
        'path': 'a/file/path'
    },
    'bwts': {
        'class': 'Directory',
        'path': 'a/file/path'
    },
    'integrate_executable': {
        'class': 'File',
        'path': 'a/file/path'
    },
    'integrate_fasta': {
        'class': 'File',
        'path': 'a/file/path'
    },
    'integrate_annotations': {
        'class': 'File',
        'path': 'a/file/path'
    },
    'filter_database': {
        'class': 'Directory',
        'path': 'a/file/path'
    },
    'fusion_annotator_dir': {
        'class': 'Directory',
        'path': 'a/file/path'
    },
    
    # CNV
    'reference_dir': {
        'class': 'Directory',
        'path': 'a/file/path'
    },
    'target_interval_list': {
        'class': 'File',
        'path': 'a/file/path'
    },
    'common_biallelic': {
        'class': 'File',
        'path': 'a/file/path'
    },
    'protein_coding_gene': {
        'class': 'File',
        'path': 'a/file/path'
    },
    'pool_of_normals': {
        'class': 'File',
        'path': 'a/file/path'
    },
    
    # msisensor
    'microsatellite': {
        'class': 'File',
        'path': 'a/file/path'
    },
    'minimal_homopolymer_size': 'a_int',
    'minimal_microsatellite_size': 'a_int',
    
    # TinDaisy
    'rescue_clinvar': False,
    'rescue_cosmic': False,
    'vep_cache_version': 'a_string',
    'vep_cache_gz': {'class': 'File', 'path': 'a/file/path'},
    'clinvar_annotation': {'class': 'File', 'path': 'a/file/path'},
    'classification_config': {'class': 'File', 'path': 'a/file/path'},
    'af_config': {'class': 'File', 'path': 'a/file/path'},
    'call_regions': {'class': 'File', 'path': 'a/file/path'},
    'canonical_BED': {'class': 'File', 'path': 'a/file/path'},
    'normal_barcode': 'a_string',
    'tumor_barcode': 'a_string',
    'tindaisy_chrlist': {'class': 'File', 'path': 'a/file/path'},
    'strelka_config': {'class': 'File', 'path': 'a/file/path'},
    'centromere_bed': {'class': 'File', 'path': 'a/file/path'},
    'assembly': 'a_string',
    'varscan_config': {'class': 'File', 'path': 'a/file/path'},
    'pindel_config': {'class': 'File', 'path': 'a/file/path'},
    
    # TinJasmine
    'sample_barcode': 'a_string',
    'centromere': {'class': 'File', 'path': 'a/file/path'},
    'tinjasmine_chrlist': {'class': 'File', 'path': 'a/file/path'},
    'Canonical_BED': {'class': 'File', 'path': 'a/file/path'},
    'ROI_BED': {'class': 'File', 'path': 'a/file/path'},
    'varscan_filter_config': {'class': 'File', 'path': 'a/file/path'},
    'pindel_filter_config': {'class': 'File', 'path': 'a/file/path'},
    'pindel_config_template': {'class': 'File', 'path': 'a/file/path'},
    'gatk_filter_config': {'class': 'File', 'path': 'a/file/path'}, 

}

In [28]:
yaml.safe_dump(template, open(os.path.join(cwl_dir, 'template.pecgs_TN_wxs_fq_T_rna_fq.yaml'), 'w'))

In [53]:
cwl = {
    'cwlVersion': 'v1.0',
    'class': 'Workflow',
    'id': 'pecgs_TN_wxs_fq_T_rna_fq',
    'label': 'pecgs_TN_wxs_fq_T_rna_fq',
    'inputs': [
        {
            'id': 'sample',
            'type': 'string',
        },
        {
            'id': 'cpu',
            'type': 'int?',
            'default': 40
        },
        {
            'id': 'tumor_wxs_fq_1',
            'type': 'File'
        },
        {
            'id': 'tumor_wxs_fq_2',
            'type': 'File'
        },
        {
            'id': 'normal_wxs_fq_1',
            'type': 'File'
        },
        {
            'id': 'normal_wxs_fq_2',
            'type': 'File'
        },
        {
            'id': 'tumor_rna_fq_1',
            'type': 'File'
        },
        {
            'id': 'tumor_rna_fq_2',
            'type': 'File'
        },
        
        # align dna-seq
        {
            'id': 'tumor_sample',
            'type': 'string?',
            'default': '$(inputs.sample).WXS.T'
        },
        {
            'id': 'normal_sample',
            'type': 'string?',
            'default': '$(inputs.sample).WXS.N'
        },
        {
            'id': 'known_sites',
            'type': 'File',
           'secondaryFiles': ['.tbi']
        },
        {
            'id': 'reference',
            'type': 'File',
            'secondaryFiles': [
               '.amb',
               '.ann',
               '.bwt',
               '.fai',
               '.pac',
               '.sa',
               '^.dict'
           ]
        },
        {
            'id': 'wxs_normal_flowcell',
            'type': 'string?',
            'default': 'flowcellABCDE'
        },
        {
            'id': 'wxs_normal_lane',
            'type': 'string?',
            'default': '1'
        },
        {
            'id': 'wxs_normal_index_sequencer',
            'type': 'string?',
            'default': 'sequencerABCDE'
        },
        {
            'id': 'wxs_normal_library_preparation',
            'type': 'string?',
            'default': 'lib1'
        },
        {
            'id': 'wxs_normal_platform',
            'type': 'string?',
            'default': 'ILLUMINA'
        },
        {
            'id': 'wxs_tumor_flowcell',
            'type': 'string?',
            'default': 'flowcellABCDE'
        },
        {
            'id': 'wxs_tumor_lane',
            'type': 'string?',
            'default': '1'
        },
        {
            'id': 'wxs_tumor_index_sequencer',
            'type': 'string?',
            'default': 'sequencerABCDE'
        },
        {
            'id': 'wxs_tumor_library_preparation',
            'type': 'string?',
            'default': 'lib1'
        },
        {
            'id': 'wxs_tumor_platform',
            'type': 'string?',
            'default': 'ILLUMINA'
        },
        
        # fusion
        {
            'id': 'genome_lib_dir',
            'type': 'Directory',
        },
        {
            'id': 'genome_db',
            'type': 'Directory',
        },
        {
            'id': 'bwts',
            'type': 'Directory',
        },
        {
            'id': 'integrate_executable',
            'type': 'File',
        },
        {
            'id': 'integrate_fasta',
            'type': 'File',
        },
        {
            'id': 'integrate_annotations',
            'type': 'File',
        },
        {
            'id': 'filter_database',
            'type': 'Directory',
        },
        {
            'id': 'fusion_annotator_dir',
            'type': 'Directory',
        },
        
        # CNV
        {
            'id': 'reference_dir',
            'type': 'Directory',
        },
        {
            'id': 'target_interval_list',
            'type': 'File',
        },
        {
            'id': 'common_biallelic',
            'type': 'File',
        },
        {
            'id': 'protein_coding_gene',
            'type': 'File',
        },
        {
            'id': 'pool_of_normals',
            'type': 'File',
        },
        
        # msisensor
        {
            'id': 'microsatellite',
            'type': 'File',
        },
        {
            'id': 'minimal_homopolymer_size',
            'type': 'int?',
            'default': 1
        },
        {
            'id': 'minimal_microsatellite_size',
            'type': 'int?',
            'default': 1
        },
        
        # TinDaisy
        {
            'id': 'rescue_clinvar',
            'type': 'boolean?',
            'default': False,
        },
        {
            'id': 'rescue_cosmic',
            'type': 'boolean?',
            'default': False,
        },
        {
            'id': 'vep_cache_version',
            'type': 'string'
        },
        {
            'id': 'vep_cache_gz',
            'type': 'File'
        },
        {
            'id': 'clinvar_annotation',
            'type': 'File'
        },
        {
            'id': 'classification_config',
            'type': 'File'
        },
        {
            'id': 'af_config',
            'type': 'File'
        },
        {
            'id': 'call_regions',
            'type': 'File'
        },
        {
            'id': 'canonical_BED',
            'type': 'File'
        },
        {
            'id': 'normal_barcode',
            'type': 'string?',
            'default': '$(inputs.sample).N',
        },
        {
            'id': 'tumor_barcode',
            'type': 'string?',
            'default': '$(inputs.sample).T',
        },
        {
            'id': 'tindaisy_chrlist',
            'type': 'File'
        },
        {
            'id': 'strelka_config',
            'type': 'File'
        },
        {
            'id': 'centromere_bed',
            'type': 'File'
        },
        {
            'id': 'assembly',
            'type': 'string'
        },
        {
            'id': 'varscan_config',
            'type': 'File'
        },
        {
            'id': 'pindel_config',
            'type': 'File'
        },
        
        # TinJasmine
        {
            'id': 'sample_barcode',
            'type': 'string?',
            'default': '$(inputs.sample)'
        },
        {
            'id': 'centromere',
            'type': 'File'
        },
        {
            'id': 'tinjasmine_chrlist',
            'type': 'File'
        },
        {
            'id': 'Canonical_BED',
            'type': 'File'
        },
        {
            'id': 'ROI_BED',
            'type': 'File'
        },
        {
            'id': 'varscan_filter_config',
            'type': 'File'
        },
        {
            'id': 'pindel_filter_config',
            'type': 'File'
        },
        {
            'id': 'pindel_config_template',
            'type': 'File'
        },
        {
            'id': 'gatk_filter_config',
            'type': 'File'
        }
    ],
    'outputs': [
        {
            'id': 'tumor_wxs_output_bam',
            'type': 'File',
            'outputSource': 'align_tumor_wxs/output_bam',
            'secondaryFiles': ['^.bai']
        },
        {
            'id': 'normal_wxs_output_bam',
            'type': 'File',
            'outputSource': 'align_normal_wxs/output_bam',
            'secondaryFiles': ['^.bai']
        },
        {
            'id': 'filtered_fusions',
            'type': 'File',
            'outputSource': 'run_fusion/filtered_fusions'
        },
        {
            'id': 'total_fusions',
            'type': 'File',
            'outputSource': 'run_fusion/total_fusions'
        },
        {
            'id': 'gene_level_cnv',
            'type': 'File',
            'outputSource': 'run_cnv/gene_level_cnv'
        },
        {
            'id': 'msisensor_output_summary',
            'type': 'File',
            'outputSource': 'run_msisensor/output_summary'
        },
        {
            'id': 'msisensor_output_dis',
            'type': 'File',
            'outputSource': 'run_msisensor/output_dis'
        },
        {
            'id': 'msisensor_output_germline',
            'type': 'File',
            'outputSource': 'run_msisensor/output_germline'
        },
        {
            'id': 'msisensor_output_somatic',
            'type': 'File',
            'outputSource': 'run_msisensor/output_somatic'
        },
        {
            'id': 'tindaisy_output_maf_clean',
            'type': 'File',
            'outputSource': 'run_tindaisy/output_maf_clean'
        },
        {
            'id': 'tindaisy_output_vcf_clean',
            'type': 'File',
            'outputSource': 'run_tindaisy/output_vcf_clean'
        },
        {
            'id': 'tindaisy_output_vcf_all',
            'type': 'File',
            'outputSource': 'run_tindaisy/output_vcf_all'
        },
        {
            'id': 'tinjasmine_output_maf_clean',
            'type': 'File',
            'outputSource': 'run_tinjasmine/clean_MAF'
        },
        {
            'id': 'tinjasmine_output_vcf_clean',
            'type': 'File',
            'outputSource': 'run_tinjasmine/clean_VCF'
        },
        {
            'id': 'tinjasmine_output_vcf_all',
            'type': 'File',
            'outputSource': 'run_tinjasmine/allCall_VCF'
        },
    ],
    'steps': [
        {
            'id': 'align_tumor_wxs',
            'label': 'align_tumor_wxs',
            'run': '../../submodules/align-dnaseq/cwl/align_dnaseq.cwl',
            'in': [
                {'id': 'sample', 'source': 'tumor_sample'},
                {'id': 'cpu', 'source': 'cpu'},
                {'id': 'fq_1', 'source': 'tumor_wxs_fq_1'},
                {'id': 'fq_2', 'source': 'tumor_wxs_fq_2'},
                {'id': 'known_sites', 'source': 'known_sites'},
                {'id': 'reference', 'source': 'reference'},
                {'id': 'flowcell', 'source': 'wxs_tumor_flowcell'},
                {'id': 'lane', 'source': 'wxs_tumor_lane'},
                {'id': 'index_sequencer', 'source': 'wxs_tumor_index_sequencer'},
                {'id': 'library_preparation', 'source': 'wxs_tumor_library_preparation'},
                {'id': 'platform', 'source': 'wxs_tumor_platform'},  
            ],
            'out': [
                {'id': 'output_bam'}
            ]
        },
        {
            'id': 'align_normal_wxs',
            'label': 'align_normal_wxs',
            'run': '../../submodules/align-dnaseq/cwl/align_dnaseq.cwl',
            'in': [
                {'id': 'sample', 'source': 'normal_sample'},
                {'id': 'cpu', 'source': 'cpu'},
                {'id': 'fq_1', 'source': 'normal_wxs_fq_1'},
                {'id': 'fq_2', 'source': 'normal_wxs_fq_2'},
                {'id': 'known_sites', 'source': 'known_sites'},
                {'id': 'reference', 'source': 'reference'},
                {'id': 'flowcell', 'source': 'wxs_normal_flowcell'},
                {'id': 'lane', 'source': 'wxs_normal_lane'},
                {'id': 'index_sequencer', 'source': 'wxs_normal_index_sequencer'},
                {'id': 'library_preparation', 'source': 'wxs_normal_library_preparation'},
                {'id': 'platform', 'source': 'wxs_normal_platform'},  
            ],
            'out': [
                {'id': 'output_bam'}
            ]
        },
        {
            'id': 'run_fusion',
            'label': 'run_fusion',
            'run': '../../submodules/pecgs-fusion/cwl/fusion.cwl',
            'in': [
                {'id': 'sample', 'source': 'sample'},
                {'id': 'cpu', 'source': 'cpu'},
                {'id': 'fq_1', 'source': 'tumor_rna_fq_1'},
                {'id': 'fq_2', 'source': 'tumor_rna_fq_2'},
                {'id': 'filter_database', 'source': 'filter_database'},
                {'id': 'bwts', 'source': 'bwts'},
                {'id': 'fusion_annotator_dir', 'source': 'fusion_annotator_dir'},
                {'id': 'genome_db', 'source': 'genome_db'},
                {'id': 'genome_lib_dir', 'source': 'genome_lib_dir'},
                {'id': 'integrate_annotations', 'source': 'integrate_annotations'},
                {'id': 'integrate_executable', 'source': 'integrate_executable'},
                {'id': 'integrate_fasta', 'source': 'integrate_fasta'}
            ],
            'out': [
                {'id': 'filtered_fusions'},
                {'id': 'total_fusions'}
            ]
        },
        {
            'id': 'run_cnv',
            'label': 'run_cnv',
            'run': '../../submodules/pecgs-cnv/cwl/cnv_workflow.cwl',
            'in': [
                {'id': 'sample', 'source': 'sample'},
                {'id': 'cpu', 'source': 'cpu'},
                {'id': 'tumor_bam', 'source': 'align_tumor_wxs/output_bam'},
                {'id': 'normal_bam', 'source': 'align_normal_wxs/output_bam'},
                {'id': 'reference_dir', 'source': 'reference_dir'},
                {'id': 'target_interval_list', 'source': 'target_interval_list'},
                {'id': 'common_biallelic', 'source': 'common_biallelic'},
                {'id': 'protein_coding_gene', 'source': 'protein_coding_gene'},
                {'id': 'pool_of_normals', 'source': 'pool_of_normals'},
            ],
            'out': [
                {'id': 'gene_level_cnv'}
            ]
        },
        {
            'id': 'run_msisensor',
            'label': 'run_msisensor',
            'run': '../msisensor/msisensor_workflow.cwl',
            'in': [
                {'id': 'threads', 'source': 'cpu'},
                {'id': 'tumor_bam', 'source': 'align_tumor_wxs/output_bam'},
                {'id': 'normal_bam', 'source': 'align_normal_wxs/output_bam'},
                {'id': 'microsatellite', 'source': 'microsatellite'},
                {'id': 'minimal_homopolymer_size', 'source': 'minimal_homopolymer_size'},
                {'id': 'minimal_microsatellite_size', 'source': 'minimal_microsatellite_size'},
            ],
            'out': [
                {'id': 'output_summary'},
                {'id': 'output_dis'},
                {'id': 'output_germline'},
                {'id': 'output_somatic'},
            ]
        },
        {
            'id': 'run_tindaisy',
            'label': 'run_tindaisy',
            'run': '../../submodules/TinDaisy/cwl/workflows/tindaisy2.cwl',
            'in': [
                {'id': 'tumor_bam', 'source': 'align_tumor_wxs/output_bam'},
                {'id': 'normal_bam', 'source': 'align_normal_wxs/output_bam'},
                {'id': 'reference_fasta', 'source': 'reference'},
                {'id': 'pindel_config', 'source': 'pindel_config'},
                {'id': 'varscan_config', 'source': 'varscan_config'},
                {'id': 'assembly', 'source': 'assembly'},
                {'id': 'centromere_bed', 'source': 'centromere_bed'},
                {'id': 'strelka_config', 'source': 'strelka_config'},
                {'id': 'chrlist', 'source': 'tindaisy_chrlist'},
                {'id': 'tumor_barcode', 'source': 'tumor_barcode'},
                {'id': 'normal_barcode', 'source': 'normal_barcode'},
                {'id': 'canonical_BED', 'source': 'canonical_BED'},
                {'id': 'call_regions', 'source': 'call_regions'},
                {'id': 'af_config', 'source': 'af_config'},
                {'id': 'classification_config', 'source': 'classification_config'},
                {'id': 'clinvar_annotation', 'source': 'clinvar_annotation'},
                {'id': 'vep_cache_gz', 'source': 'vep_cache_gz'},
                {'id': 'vep_cache_version', 'source': 'vep_cache_version'},
                {'id': 'rescue_cosmic', 'source': 'rescue_cosmic'},
                {'id': 'rescue_clinvar', 'source': 'rescue_clinvar'},
            ],
            'out': [
                {'id': 'output_maf_clean'},
                {'id': 'output_vcf_clean'},
                {'id': 'output_vcf_all'},
            ]
        },
        {
            'id': 'run_tinjasmine',
            'label': 'run_tinjasmine',
            'run': '../../submodules/TinJasmine/cwl/TinJasmine.cwl',
            'in': [
                {'id': 'sample_barcode', 'source': 'normal_barcode'},
                {'id': 'bam', 'source': 'align_normal_wxs/output_bam'},
                {'id': 'reference', 'source': 'reference'},
                {'id': 'gatk_filter_config', 'source': 'gatk_filter_config'},
                {'id': 'pindel_config_template', 'source': 'pindel_config_template'},
                {'id': 'pindel_filter_config', 'source': 'pindel_filter_config'},
                {'id': 'varscan_filter_config', 'source': 'varscan_filter_config'},
                {'id': 'ROI_BED', 'source': 'ROI_BED'},
                {'id': 'vep_cache_gz', 'source': 'vep_cache_gz'},
                {'id': 'vep_cache_version', 'source': 'vep_cache_version'},
                {'id': 'assembly', 'source': 'assembly'},
                {'id': 'Canonical_BED', 'source': 'Canonical_BED'},
                {'id': 'chrlist', 'source': 'tinjasmine_chrlist'},
                {'id': 'centromere', 'source': 'centromere'},
            ],
            'out': [
                {'id': 'clean_VCF'},
                {'id': 'allCall_VCF'},
                {'id': 'clean_MAF'},
            ]
        },
        
    ],
   'requirements': [
   ]
}

In [54]:
yaml.safe_dump(cwl, open(os.path.join(cwl_dir, 'pecgs_TN_wxs_fq_T_rna_fq.cwl'), 'w'))

###### test run

In [6]:
run_list = pd.read_csv('../tests/data/pecgs_pipeline/run_list.txt', sep='\t', index_col=0)
run_list

Unnamed: 0_level_0,rna-seq_tumor_R1.filepath,rna-seq_tumor_R1.uuid,rna-seq_tumor_R2.filepath,rna-seq_tumor_R2.uuid,wxs_normal_R1.filepath,wxs_normal_R1.uuid,wxs_normal_R2.filepath,wxs_normal_R2.uuid,wxs_tumor_R1.filepath,wxs_tumor_R1.uuid,wxs_tumor_R2.filepath,wxs_tumor_R2.uuid,sample_id,run_uuid
run_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
HT191P1-S1H1A3Y3_23402d1f-7ea6-48d0-8553-02d5f4163aed,/scratch1/fs1/dinglab/estorrs/pecgs_resources/...,ab2eb486-8926-4329-b14c-81e9605edcc4,/scratch1/fs1/dinglab/estorrs/pecgs_resources/...,530103e2-ecc2-4400-b89b-329fb16631b0,/scratch1/fs1/dinglab/estorrs/pecgs_resources/...,ee5aa566-1492-49bd-a44f-70a28a9bd6e1,/scratch1/fs1/dinglab/estorrs/pecgs_resources/...,08739a15-9bb4-4ab1-82e4-67ac523d65e4,/scratch1/fs1/dinglab/estorrs/pecgs_resources/...,53ccd273-07b5-416f-a68a-bd4953424612,/scratch1/fs1/dinglab/estorrs/pecgs_resources/...,6fb1f30a-a19a-4702-9707-7d072d13222e,HT191P1-S1H1A3Y3,23402d1f-7ea6-48d0-8553-02d5f4163aed


In [7]:
run_map = run_list.transpose().to_dict()
run_map = {k:{c.replace('.filepath', ''):val for c, val in v.items() if 'filepath' in c}
          for k, v in run_map.items()}
run_map

{'HT191P1-S1H1A3Y3_23402d1f-7ea6-48d0-8553-02d5f4163aed': {'rna-seq_tumor_R1': '/scratch1/fs1/dinglab/estorrs/pecgs_resources/test_samples/HT191P1-S1H1A3Y3/rna-seq/ht191p1-s1h1a3y3e1_1.AAGGTGTTAG-TAAGCGCGTG/ht191p1-s1h1a3y3e1_1.AAGGTGTTAG-TAAGCGCGTG.HFWJGDSXY_AAGGTGTTAG-TAAGCGCGTG_L001_R1.fastq.gz',
  'rna-seq_tumor_R2': '/scratch1/fs1/dinglab/estorrs/pecgs_resources/test_samples/HT191P1-S1H1A3Y3/rna-seq/ht191p1-s1h1a3y3e1_1.AAGGTGTTAG-TAAGCGCGTG/ht191p1-s1h1a3y3e1_1.AAGGTGTTAG-TAAGCGCGTG.HFWJGDSXY_AAGGTGTTAG-TAAGCGCGTG_L001_R1.fastq.gz',
  'wxs_normal_R1': '/scratch1/fs1/dinglab/estorrs/pecgs_resources/test_samples/HT191P1-S1H1A3Y3/wxs/CATTATCGCT-CTTGAAGGTT_S34_L001_R1_001.fastq.gz',
  'wxs_normal_R2': '/scratch1/fs1/dinglab/estorrs/pecgs_resources/test_samples/HT191P1-S1H1A3Y3/wxs/CATTATCGCT-CTTGAAGGTT_S34_L001_R2_001.fastq.gz',
  'wxs_tumor_R1': '/scratch1/fs1/dinglab/estorrs/pecgs_resources/test_samples/HT191P1-S1H1A3Y3/wxs/CCAGTAGCGT-ATGTATTGGC_S53_L002_R1_001.fastq.gz',
  'wxs_tu

In [8]:
sequencing_info = yaml.safe_load(open('../tests/data/pecgs_pipeline/sequencing_info.yaml'))
sequencing_info

{'HT191P1-S1H1A3Y3_23402d1f-7ea6-48d0-8553-02d5f4163aed': {'normal': {'flowcell': 'HH7KNDSXY',
   'index_sequencer': 'CATTATCGCT-CTTGAAGGTT',
   'lane': '1',
   'library_preparation': 'TWCE-HT191P1-JM1D1_1-lib1',
   'platform': 'ILLUMINA'},
  'tumor': {'flowcell': 'HFMFWDSXY',
   'index_sequencer': 'CCAGTAGCGT-ATGTATTGGC',
   'lane': '2',
   'library_preparation': 'TWCE-HT191P1-S1H1A3Y3D1_1-lib1',
   'platform': 'ILLUMINA'}}}

In [9]:
run_dir = '/scratch1/fs1/dinglab/estorrs/cromwell-data/pecgs/testing/pecgs'
proxy_run_dir = '../tests/data/pecgs_pipeline/run_dir'
tool_root = '/storage1/fs1/dinglab/Active/Projects/estorrs/pecgs-pipeline'

In [20]:
start_cmds, server_cmds, job_cmds = pecgs.from_run_list_TN_wxs_fq_T_rna_fq(
    run_map, run_dir, tool_root, sequencing_info_map=sequencing_info, proxy_run_dir=proxy_run_dir)

In [21]:
start_cmds

['source /opt/ibm/lsfsuite/lsf/conf/lsf.conf',
 'export LSF_DOCKER_NETWORK=host',
 'export LSF_DOCKER_VOLUMES="/scratch1/fs1/dinglab/estorrs/cromwell-data/pecgs/testing/pecgs:/scratch1/fs1/dinglab/estorrs/cromwell-data/pecgs/testing/pecgs /storage1/fs1/dinglab/Active/Projects/estorrs/pecgs-pipeline:/storage1/fs1/dinglab/Active/Projects/estorrs/pecgs-pipeline /storage1/fs1/dinglab/Active:/storage1/fs1/dinglab/Active /storage1/fs1/m.wyczalkowski/Active:/storage1/fs1/m.wyczalkowski/Active /scratch1/fs1/dinglab:/scratch1/fs1/dinglab"',
 'bgadd -L 50 /estorrs/default_pecgs_pipeline',
 "bsub -n 1 -q dinglab-interactive -G compute-dinglab -a 'docker(mwyczalkowski/cromwell-runner)' -g /estorrs/default_pecgs_pipeline -J b70f2017-e818-43e2-bfa8-009a831b08a6 -Is '/bin/bash'"]

In [22]:
server_cmds

'/usr/bin/java -Dconfig.file=/scratch1/fs1/dinglab/estorrs/cromwell-data/pecgs/testing/pecgs/inputs/server-cromwell-config.compute1.dat -jar /usr/local/cromwell/cromwell-47.jar server'

In [23]:
job_cmds

['mkdir -p /scratch1/fs1/dinglab/estorrs/cromwell-data/pecgs/testing/pecgs/logs',
 'mkdir -p /scratch1/fs1/dinglab/estorrs/cromwell-data/pecgs/testing/pecgs/runs/HT191P1-S1H1A3Y3_23402d1f-7ea6-48d0-8553-02d5f4163aed/cromwell-workdir/logs',
 'source /opt/ibm/lsfsuite/lsf/conf/lsf.conf',
 'export LSF_DOCKER_NETWORK=host',
 'export LSF_DOCKER_VOLUMES="/scratch1/fs1/dinglab/estorrs/cromwell-data/pecgs/testing/pecgs:/scratch1/fs1/dinglab/estorrs/cromwell-data/pecgs/testing/pecgs /storage1/fs1/dinglab/Active/Projects/estorrs/pecgs-pipeline:/storage1/fs1/dinglab/Active/Projects/estorrs/pecgs-pipeline /storage1/fs1/dinglab/Active:/storage1/fs1/dinglab/Active /storage1/fs1/m.wyczalkowski/Active:/storage1/fs1/m.wyczalkowski/Active /scratch1/fs1/dinglab:/scratch1/fs1/dinglab"',
 "bsub -n 1 -q dinglab -G compute-dinglab -a 'docker(mwyczalkowski/cromwell-runner)' -g /estorrs/default_pecgs_pipeline -J cromwell_launch_HT191P1-S1H1A3Y3_23402d1f-7ea6-48d0-8553-02d5f4163aed -oo /scratch1/fs1/dinglab/est