In [1]:
!mkdir -p input
!mkdir -p workflows
!echo "A" > input/sample_A.fastq
!echo "B" > input/sample_B.fastq
!echo "C" > input/sample_C.fastq

In [12]:
%%file workflows/Snakefile
SAMPLES = ["A", "B", "C"]

rule all:
    input:
        "genotype_calls/all_samples.vcf"

rule bwa_map:
    input:
        "input/sample_{sample}.fastq"
    output:
        "mapped_reads/sample_{sample}.bam"
    shell:
        "cat {input}  > {output}; sleep 10"
rule samtools_sort:
    input:
        "mapped_reads/sample_{sample}.bam"
    output:
        "sorted_bams/sample_sorted_{sample}.bam"
    shell:
        "cat {input}  > {output}; sleep 10"
rule samtools_index:
    input:
        "sorted_bams/sample_sorted_{sample}.bam"
    output:
        "sorted_bams/sample_sorted_{sample}.bam.bai"
    shell:
        "cat {input}  > {output};sleep 10"
rule genotype_calling:
    input:
        bams=expand("sorted_bams/sample_sorted_{sample}.bam", sample=SAMPLES),
        bais=expand("sorted_bams/sample_sorted_{sample}.bam.bai", sample=SAMPLES)
    output:
        "genotype_calls/all_samples.vcf"
    shell:
        "cat {input.bams} > {output}; sleep 10"

Overwriting workflows/Snakefile


## Follow these steps:
* Open terminal from 'New Launcher' menu
* Load conda env (e.g., `conda activate notebook-env`)
* Change to present directory
* Run `ls -l` and check if you can see the `Snakefile` or not
* Check **Snakenake** pipeline using the following command
  `snakemake -c1 -n -s workflows/Snakefile`
  where:
  * -c number: Number of cores to use
  * -n: Don't execute anything, get a summary
* Check **dag** using the following command
  `snakemake -s workflows/Snakefile --dag|dot -Tsvg > dot.svg`
* **Optional**: Check dag using Gui
  `snakemake -s workflows/Snakefile -c1 --gui`
  Then open a new tab in browser after replacing `/lab/` with `/proxy/8000/`

In [10]:
%%file workflows/Snakefile
SAMPLES = ["A", "B", "C"]

rule all:
    input:
        expand("log/notebook/notebook_{sample}.ipynb", sample=SAMPLES)
        
rule bwa_map:
    input:
        "input/sample_{sample}.fastq"
    output:
        "mapped_reads/sample_{sample}.bam"
    shell:
        "cat {input}  > {output}; sleep 5"


rule test_notebook:
    input:
        "mapped_reads/sample_{sample}.bam"
    log:
        notebook="log/notebook/notebook_{sample}.ipynb"
    notebook:
        "/home/vmuser/notebook/test.py.ipynb"

Overwriting workflows/Snakefile


In [2]:
%%file workflows/Snakefile

rule test_nextflow:
    params:
        pipeline="hello",
    wrapper:
        "v1.14.1/utils/nextflow"

Overwriting workflows/Snakefile


In [12]:
%%file workflows/Snakefile
rule multiqc_dir:
    input: "/home/vmuser/se.settings"
    params:
        extra="",  # Optional: extra parameters for multiqc.
        use_input_files_only=True
    log:
        "logs/multiqc.log"
    output:
        "qc/multiqc.html"
    wrapper:
        "v1.14.1/bio/multiqc"

Overwriting workflows/Snakefile
