In [1]:
import os
import yaml
import json
import pandas as pd
from pathlib import Path

In [5]:
cwl_dir = '../cwl/'
Path(cwl_dir).mkdir(parents=True, exist_ok=True)

In [6]:
template = {
    'fq_1': {
        'class': 'File[]',
        'path': ['a/file/path']
    },
    'fq_2': {
        'class': 'File[]',
        'path': ['a/file/path']
    },
    'star_index': {
        'class': 'Directory',
        'path': 'a/dir/path'
    },
    'gtf': {
        'class': 'File',
        'path': 'a/file/path'
    },
    'gene_info': {
        'class': 'File',
        'path': 'a/file/path'
    },
    'cpu': 'a_int'
}

In [7]:
yaml.safe_dump(template, open(os.path.join(cwl_dir, 'template.bulk_expression.yaml'), 'w'))

In [11]:
cwl = {
    'class': 'CommandLineTool',
    'cwlVersion': 'v1.0',
    'id': 'bulk_expression',
    'label': 'bulk_expression',
    'baseCommand': ['python', '/pecgs-bulk-expression/bulk_expression/bulk_expression.py'],
    'inputs': [
        {
            'id': 'fq_1',
            'type': 'File[]',
            'inputBinding': {
                'position': '1',
                'itemSeparator': ",",
                'separate': False
            }
        },
        {
            'id': 'fq_2',
            'type': 'File[]',
            'inputBinding': {
                'position': '2',
                'itemSeparator': ",",
                'separate': False
            }
        },
        {
            'id': 'star_index',
            'type': 'Directory',
            'inputBinding': {
                'prefix': '--star-index',
                'position': '0'
            }
        },
        {
            'id': 'gtf',
            'type': 'File',
            'inputBinding': {
                'prefix': '--gtf',
                'position': '0'
            }
        },
         {
            'id': 'gene_info',
            'type': 'File',
            'inputBinding': {
                'prefix': '--gene-info',
                'position': '0'
            }
        },
        {
            'id': 'cpu',
            'type': 'int?',
            'default': 16,
            'inputBinding': {
                'prefix': '--cpu',
                'position': '0'
            }
        },
        # needs path to be set so it works on compute1
        {
            'id': 'environ_PATH',
            'type': 'string?',
            'default': '/miniconda/envs/bulk_expression/bin:$PATH'
        },
    ],
    'arguments': [
        {
            'position': 0,
            'prefix': '--out-dir',
            'valueFrom': 'output'
        },
        {
            'position': 0,
            'prefix': '--compress-featurecounts-script',
            'valueFrom': '/pecgs-bulk-expression/bulk_expression/shrink_featurecounts.py'
        },
        {
            'position': 0,
            'prefix': '--generate-fpkm-script',
            'valueFrom': '/pecgs-bulk-expression/bulk_expression/gen_fpkm.py'
        },
        
    ],
    'outputs': [
        {
            'id': 'output_bam',
            'type': 'File',
            'outputBinding': {'glob': 'star/Aligned.sortedByCoord.out.bam'},
            'secondaryFiles': ['.bai']
        },
        {
            'id': 'readcounts_and_fpkm_tsv',
            'type': 'File',
            'outputBinding': {'glob': 'output/readcount_and_fpkm.tsv.gz'}
        },
    ],
    'requirements': [
        {
            'class': 'DockerRequirement',
            'dockerPull': 'estorrs/pecgs-bulk-expression:0.0.1'
        },
        {
            'class': 'ResourceRequirement',
            'ramMin': 80000,
            'coresMin': '$(inputs.cpu)'
        },
        {
            'class': 'EnvVarRequirement',
            'envDef': {
                'PATH': '$(inputs.environ_PATH)',
            }
        }
    ]
}

In [12]:
yaml.safe_dump(cwl, open(os.path.join(cwl_dir, 'bulk_expression.cwl'), 'w'))