Permalink
Browse files

Added check

  • Loading branch information...
gkarthik committed Nov 22, 2016
1 parent 033ac98 commit f2d1673f1e1e2beded74f7f44280da3e69cbb146
Showing with 32 additions and 25 deletions.
  1. +32 −25 align_reads/Snakefile
View
@@ -1,81 +1,88 @@
import os
from datetime import datetime
import time
import re
in_dir="/gpfs/group/andersen/raw_data/161025_M01244_0088_000000000-ANRN2/Unaligned" # Input directory
ext="fastq.gz" # The extension supplied using .fastq.gz or .fastq
in_dir="/gpfs/group/andersen/raw_data/2016.11.18" # Input directory
ext="fastq.gz"
gz = True if ".gz" in ext else False
zika_reference="/gpfs/group/andersen/gkarthik/db/viruses/zika_dc_2016.nix"
primer_path="/gpfs/group/andersen/gkarthik/db/viruses/amplicons_prefix.fa"
s = datetime.now().strftime("%Y.%m.%d."+str(int(time.time())))
out_dir="/gpfs/group/andersen/gkarthik/analysis/"+s
READS1 = []
READS2 = []
SAMPLES = []
for dirname, dirnames, filenames in os.walk(in_dir):
filenames.sort()
for f in range(0, len(filenames)-1, 2):
if ext in filenames[f] and ext in filenames[f+1]:
READS1.append(filenames[f].replace("."+ext, ""))
READS2.append(filenames[f+1].replace("."+ext, ""))
new_name1 = re.sub('_S\d{1}_L\d{3}_R', '_R', filenames[f])
new_name1 = re.sub('_\d{3}', '', new_name1)
os.rename(os.path.join(dirname,filenames[f]), os.path.join(dirname, new_name1))
new_name2 = re.sub('_S\d{1}_L\d{3}_R', '_R', filenames[f+1])
new_name2 = re.sub('_\d{3}', '', new_name2)
os.rename(os.path.join(dirname,filenames[f+1]), os.path.join(dirname, new_name2))
SAMPLES.append(new_name1.replace("."+ext, "").replace("_R1", ""))
def get_read2(wildcards):
print(wildcards)
rule all:
input:
expand("{out_dir}/_aligned_bams/{read1}.trimmed.aligned.sorted.bam", out_dir = out_dir, read1 = READS1, read2 = READS2)
expand("{out_dir}/_aligned_bams/{sample}.trimmed.aligned.sorted.bam", out_dir = out_dir, sample = SAMPLES)
rule align_reads:
input:
"{out_dir}/_trimmed/{read1}.trimmed.fastq",
"{out_dir}/_trimmed/{read2}.trimmed.fastq",
"{out_dir}/_trimmed/{sample}_R1.trimmed.fastq",
"{out_dir}/_trimmed/{sample}_R2.trimmed.fastq",
"{ref}".format(ref = zika_reference)
output:
"{out_dir}/_aligned_bams/{read1}.trimmed.aligned.bam"
"{out_dir}/_aligned_bams/{sample}.trimmed.aligned.bam"
shell:
"mkdir -p $(dirname {output}) &&"
"module load samtools &&"
"/gpfs/home/gkarthik/bin/novoalign/novocraft3/novoalign -f {input[0]} {input[1]} -c 16 -r Random -l 40 -g 40 -x 20 -t 502 -d {input[2]} -o SAM | samtools view -F 4 -Sb -o {output}"
rule sort_aligned_bam:
input:
"{out_dir}/_aligned_bams/{read1}.trimmed.aligned.bam"
"{out_dir}/_aligned_bams/{sample}.trimmed.aligned.bam"
output:
"{out_dir}/_aligned_bams/{read1}.trimmed.aligned.sorted.bam"
"{out_dir}/_aligned_bams/{sample}.trimmed.aligned.sorted.bam"
shell:
"mkdir -p $(dirname {output[0]}) &&"
"module load samtools &&"
"samtools sort -T $PBSTMPDIR -o {output} {input}"
rule trim_reads:
input:
read1="{out_dir}/_reads/{read1}.fastq",
read2="{out_dir}/_reads/{read2}.fastq",
read1="{out_dir}/_reads/{sample}_R1.fastq",
read2="{out_dir}/_reads/{sample}_R2.fastq",
primer="{primer}".format(primer=primer_path)
output:
trim1="{out_dir}/_trimmed/{read1}.trimmed.fastq",
trim2="{out_dir}/_trimmed/{read2}.trimmed.fastq"
trim1="{out_dir}/_trimmed/{sample}_R1.trimmed.fastq",
trim2="{out_dir}/_trimmed/{sample}_R2.trimmed.fastq",
trim_unpaired1="{out_dir}/_trimmed/{sample}_R1.trimmed_unpaired.fastq",
trim_unpaired2="{out_dir}/_trimmed/{sample}_R2.trimmed_unpaired.fastq"
shell:
"mkdir -p $(dirname {output.trim1})/ && "
"unpaired1=${{output.trim1}/.trimmed.fastq/} && "
"unpaired2=${{output.trim2}/.trimmed.fastq/} && "
"java -Xmx2g -classpath /gpfs/home/gkarthik/bin/trimmomatic/trimmomatic-0.35.jar org.usadellab.trimmomatic.TrimmomaticPE -threads 16 {input.read1} {input.read2} {output.trim1} $unpaired1.trimmed_unpaired.fastq {output.trim2} $unpaired2.trimmed_unpaired.fastq ILLUMINACLIP:{input.primer}:2:30:12 LEADING:20 TRAILING:20 SLIDINGWINDOW:4:25 MINLEN:30 HEADCROP:22"
"java -Xmx2g -classpath /gpfs/home/gkarthik/bin/trimmomatic/trimmomatic-0.35.jar org.usadellab.trimmomatic.TrimmomaticPE -threads 16 {input.read1} {input.read2} {output.trim1} {output.trim_unpaired1} {output.trim2} {output.trim_unpaired2} ILLUMINACLIP:{input.primer}:2:30:12 LEADING:20 TRAILING:20 SLIDINGWINDOW:4:25 MINLEN:30 HEADCROP:22"
rule extract_read_1:
input:
"{in_dir}".format(in_dir = in_dir)+"/{read1}.fastq.gz"
"{in_dir}".format(in_dir = in_dir)+"/{sample}_R1.fastq.gz"
output:
"{out_dir}/_reads/{read1}.fastq"
"{out_dir}/_reads/{sample}_R1.fastq"
shell:
"mkdir -p $(dirname {output})/ &&"
'if [ "${gz}" == true ]; then gunzip -c {input} > {output}; else cp {input} {output};'
'if [[ {input} == *.gz ]]; then gunzip -c {input} > {output}; else cp {input} {output};fi;'
rule extract_read_2:
input:
"{in_dir}/{read2}.fastq.gz"
"{in_dir}".format(in_dir = in_dir)+"/{sample}_R2.fastq.gz"
output:
"{out_dir}/_reads/{read2}.fastq"
"{out_dir}/_reads/{sample}_R2.fastq"
shell:
"mkdir -p $(dirname {output})/ &&"
'if [ "${gz}" == true ]; then gunzip -c {input} > {output}; else cp {input} {output};'
'if [[ {input} == *.gz ]]; then gunzip -c {input} > {output}; else cp {input} {output}; fi'

0 comments on commit f2d1673

Please sign in to comment.