workflows/super-enhancer.cwl

cwlVersion: v1.0
class: Workflow


requirements:
  - class: StepInputExpressionRequirement
  - class: InlineJavascriptRequirement
  - class: MultipleInputFeatureRequirement


'sd:upstream':
  chipseq_sample:
    - "chipseq-se.cwl"
    - "chipseq-pe.cwl"
    - "trim-chipseq-pe.cwl"
    - "trim-chipseq-se.cwl"
  chipseq_control:
    - "chipseq-se.cwl"
    - "chipseq-pe.cwl"
    - "trim-chipseq-pe.cwl"
    - "trim-chipseq-se.cwl"
  genome_indices:
    - "genome-indices.cwl"
    
inputs:

  alias:
    type: string
    label: "Experiment short name/Alias"
    sd:preview:
      position: 1

  peak_file:
    type: File
    format: "http://edamontology.org/format_3468"
    label: "ChIP-Seq experiment"
    doc: "XLS file generated by MACS2"
    'sd:upstreamSource': "chipseq_sample/macs2_called_peaks"
    'sd:localLabel': true

  alignment_file:
    type: File
    secondaryFiles:
    - .bai
    format: "http://edamontology.org/format_2572"
    label: "ChIP-Seq experiment"
    doc: "Coordinate sorted BAM file and BAI index file"
    'sd:upstreamSource': "chipseq_sample/bambai_pair"
    'sd:localLabel': true

  peak_control_file:
    type: File?
    format: "http://edamontology.org/format_3468"
    label: "Control ChIP-Seq experiment"    
    doc: "XLS file generated by MACS2 used as control"
    'sd:upstreamSource': "chipseq_control/macs2_called_peaks"
    'sd:localLabel': true

  annotation_file:
    type: File
    label: "Genome annotation"
    format: "http://edamontology.org/format_3475"
    doc: "TSV genome annotation file"
    'sd:upstreamSource': "genome_indices/annotation"

  chrom_length_file:
    type: File
    format: "http://edamontology.org/format_2330"
    label: "Chromosome length file"
    doc: "Chromosome length file"
    'sd:upstreamSource': "genome_indices/chrom_length"   

  stitching_distance:
    type: int?
    default: 20000
    label: "Maximum distance between two regions that will be stitched together"
    doc: |
      Maximum distance between two regions that will be stitched together.
      For ROSE default is 12.5kb, in workflow default is 20000
    'sd:layout':
      advanced: true

  tss_exclusion_zone_size:
    type: int?
    default: 2500
    label: "Exclude regions contained within +/- this distance from TSS (skip TSS exclusion if 0)"
    doc: |
      Exclude regions contained within +/- this distance from TSS in order to account
      for promoter biases (Default: 0; recommended if used: 2500). If this value is 0,
      will not look for a gene file
    'sd:layout':
      advanced: true

  promoter_distance:
    type: int?
    default: 1000
    label: "Promoter distance, bp"
    doc: |
      Max distance from gene TSS (in both direction) overlapping which the peak will
      be assigned to the promoter region. Default: 1000 bp
    'sd:layout':
      advanced: true

  upstream_distance:
    type: int?
    default: 20000
    label: "Upstream distance, bp"
    doc: |
      Max distance from the promoter (only in upstream direction) overlapping which the
      peak will be assigned to the upstream region. Default: 20,000 bp
    'sd:layout':
      advanced: true


outputs:

  ranked_super_enhancers_plot_png:
    type: File
    format: "http://edamontology.org/format_3603"
    label: "Ranked super-enhancers plot"
    doc: "Ranked super-enhancers plot"
    outputSource: rename_png/target_file
    'sd:visualPlugins':
    - image:
        tab: 'Plots'
        Caption: 'Ranked super-enhancers plot'

  super_enhancers_report_file:
    type: File
    format: "http://edamontology.org/format_3475"
    label: "Super-enhancers report file with assigned genes"
    doc: "Super-enhancers report file with assigned genes"
    outputSource: add_island_names/output_file
    'sd:visualPlugins':
    - syncfusiongrid:
        tab: 'Super-enhancers'
        Title: 'Super-enhancers Analysis Results'

  super_enhancers_bigbed_file:
    type: File
    format: "http://edamontology.org/format_3004"
    label: "Super-enhancers"
    doc: "Super-enhancers bigBed file"
    outputSource: bed_to_bigbed/bigbed_file
    'sd:visualPlugins':
    - igvbrowser:
        tab: 'IGV Genome Browser'
        id: 'igvbrowser'
        type: 'annotation'
        name: "Super-enchancers"
        displayMode: "COLLAPSE"
        height: 40

  super_enhancers_raw_txt:
    type: File
    format: "http://edamontology.org/format_2330"
    label: "Super-enhancers report file (raw, from ROSE)"
    doc: "Super-enhancers report file (raw, from ROSE)"
    outputSource: run_rose/super_enhancers_table


steps:

  make_gff:
    run: ../tools/makegff.cwl
    in:
      islands_file: peak_file
      islands_control_file: peak_control_file
    out: [gff_file]

  run_rose:
    run: ../tools/rose.cwl
    in:
      binding_sites_file: make_gff/gff_file
      bam_file: alignment_file
      annotation_file: annotation_file
      stitch_distance: stitching_distance
      tss_distance: tss_exclusion_zone_size
    out:
    - plot_points_pic
    - gateway_super_enhancers_bed
    - super_enhancers_table

  rename_png:
    in:
      source_file: run_rose/plot_points_pic
      target_filename:
        source: alignment_file
        valueFrom: $(self.location.split('/').slice(-1)[0].split('.').slice(0,-1).join('.')+"_default_s_enhcr.png")
    out: [target_file]
    run:
      cwlVersion: v1.0
      class: CommandLineTool
      requirements:
      - class: DockerRequirement
        dockerPull: biowardrobe2/scidap:v0.0.3
      inputs:
        source_file:
          type: File
          inputBinding:
            position: 5
          doc: source file to rename
        target_filename:
          type: string
          inputBinding:
            position: 6
          doc: filename to rename to
      outputs:
        target_file:
          type: File
          outputBinding:
            glob: "*"
      baseCommand: ["cp"]
      doc: Tool renames (copy) `source_file` to `target_filename`

  sort_bed:
    run: ../tools/linux-sort.cwl
    in:
      unsorted_file: run_rose/gateway_super_enhancers_bed
      key:
        default: ["1,1","2,2n","3,3n"]
    out: [sorted_file]

  reduce_bed:
    in:
      input_file: sort_bed/sorted_file
    out: [output_file]
    run:
      cwlVersion: v1.0
      class: CommandLineTool
      requirements:
      - class: DockerRequirement
        dockerPull: biowardrobe2/scidap:v0.0.3
      inputs:
        input_file:
          type: File
          inputBinding:
            position: 5
          doc: Input BED6 file to be reduced to BED4
      outputs:
        output_file:
          type: File
          outputBinding:
            glob: "*"
      baseCommand: [bash, '-c']
      arguments:
      - cat $0 | cut -f 1-4 > `basename $0`
      doc: Tool converts BED6 to BED4 by reducing column numbers

  bed_to_bigbed:
    in:
      input_bed: reduce_bed/output_file
      chrom_length_file: chrom_length_file
      bed_type:
        default: "bed4"
      output_filename:
        source: alignment_file
        valueFrom: $(self.location.split('/').slice(-1)[0].split('.').slice(0,-1).join('.')+"_default_s_enhcr.bb")
    out: [bigbed_file]
    run:
      cwlVersion: v1.0
      class: CommandLineTool
      requirements:
      - class: DockerRequirement
        dockerPull: biowardrobe2/ucscuserapps:v358
      inputs:
        bed_type:
          type: string
          inputBinding:
            position: 5
            prefix: -type=
            separate: false
          doc: Type of BED file in a form of bedN[+[P]]. By default bed3 to three required BED fields
        input_bed:
          type: File
          inputBinding:
            position: 6
          doc: Input BED file
        chrom_length_file:
          type: File
          inputBinding:
            position: 7
          doc: Chromosome length files
        output_filename:
          type: string
          inputBinding:
            position: 8
          doc: Output filename
      outputs:
        bigbed_file:
          type: File
          outputBinding:
            glob: "*"
      baseCommand: ["bedToBigBed"]
      doc: Tool converts bed to bigBed

  bed_to_macs:
    in:
      input_file: sort_bed/sorted_file
    out: [output_file]
    run:
      cwlVersion: v1.0
      class: CommandLineTool
      requirements:
      - class: DockerRequirement
        dockerPull: biowardrobe2/scidap:v0.0.3
      inputs:
        input_file:
          type: File
          inputBinding:
            position: 5
          doc: Input file to be converted to MACS2 output format
      outputs:
        output_file:
          type: File
          outputBinding:
            glob: "*"
      baseCommand: [bash, '-c']
      arguments:
      - cat $0 | grep -v "#" | awk
        'BEGIN {print "chr\tstart\tend\tlength\tabs_summit\tpileup\t-log10(pvalue)\tfold_enrichment\t-log10(qvalue)\tname"}
        {print $1"\t"$2"\t"$3"\t"$3-$2+1"\t0\t0\t0\t0\t0\t"$4}' > `basename $0`
      doc: Tool converts `input_file` to the format compatible with the input of iaintersect from `assign_genes` step

  assign_genes:
    run: ../tools/iaintersect.cwl
    in:
      input_filename: bed_to_macs/output_file
      annotation_filename: annotation_file
      promoter_bp: promoter_distance
      upstream_bp: upstream_distance
    out: [result_file]

  add_island_names:
    in:
      input_file: [assign_genes/result_file, sort_bed/sorted_file]
      param:
        source: alignment_file
        valueFrom: $(self.location.split('/').slice(-1)[0].split('.').slice(0,-1).join('.')+"_default_s_enhcr.tsv")
    out: [output_file]
    run:
      cwlVersion: v1.0
      class: CommandLineTool
      requirements:
      - class: DockerRequirement
        dockerPull: biowardrobe2/scidap:v0.0.3
      inputs:
        input_file:
          type: File[]
          inputBinding:
            position: 5
          doc: TSV file to add extra columns too
        param:
          type: string
          inputBinding:
            position: 6
          doc: Param to set output filename
      outputs:
        output_file:
          type: File
          outputBinding:
            glob: "*"
      baseCommand: [bash, '-c']
      arguments:
      - echo -e "refseq_id\tgene_id\ttxStart\ttxEnd\tstrand\tchrom\tstart\tend\tlength\tregion\tname\tscore" > `basename $2`;
        cat $0 | grep -v refseq_id | paste - $1 | cut -f 1-9,15,19,20 >> `basename $2`


$namespaces:
  s: http://schema.org/

$schemas:
- https://github.com/schemaorg/schemaorg/raw/main/data/releases/11.01/schemaorg-current-http.rdf

s:name: "ROSE: rank ordering of super-enhancers"
label: "ROSE: rank ordering of super-enhancers"
s:alternateName: "ROSE: rank ordering of super-enhancers"

s:downloadUrl: https://raw.githubusercontent.com/datirium/workflows/master/workflows/super-enhancer.cwl
s:codeRepository: https://github.com/datirium/workflows
s:license: http://www.apache.org/licenses/LICENSE-2.0

s:isPartOf:
  class: s:CreativeWork
  s:name: Common Workflow Language
  s:url: http://commonwl.org/

s:creator:
- class: s:Organization
  s:legalName: "Cincinnati Children's Hospital Medical Center"
  s:location:
  - class: s:PostalAddress
    s:addressCountry: "USA"
    s:addressLocality: "Cincinnati"
    s:addressRegion: "OH"
    s:postalCode: "45229"
    s:streetAddress: "3333 Burnet Ave"
    s:telephone: "+1(513)636-4200"
  s:logo: "https://www.cincinnatichildrens.org/-/media/cincinnati%20childrens/global%20shared/childrens-logo-new.png"
  s:department:
  - class: s:Organization
    s:legalName: "Allergy and Immunology"
    s:department:
    - class: s:Organization
      s:legalName: "Barski Research Lab"
      s:member:
      - class: s:Person
        s:name: Michael Kotliar
        s:email: mailto:misha.kotliar@gmail.com
        s:sameAs:
        - id: http://orcid.org/0000-0002-6486-3898


# doc:
#   $include: ../descriptions/super-enhancer.md


doc: |
  Super-enhancers, consist of clusters of enhancers that are densely occupied by
  the master regulators and Mediator. Super-enhancers differ from typical enhancers
  in size, transcription factor density and content, ability to activate transcription,
  and sensitivity to perturbation. Use to create stitched enhancers, and to separate
  super-enhancers from typical enhancers using sequencing data (.bam) given a file of
  previously identified constituent enhancers (.gff)