-
Notifications
You must be signed in to change notification settings - Fork 0
/
pb_gen.smk
110 lines (86 loc) · 2.9 KB
/
pb_gen.smk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import glob
from collections import defaultdict
import os
import itertools
import sys
from pprint import pprint
minRQccs="0.9"
minRQpostCcs="0.99"
minPasses="1"
RUNIDS,= glob_wildcards( "raw/{runId}.subreads.bam")
shell.prefix("source ~/.bashrc; set +eu; conda deactivate; set -euo pipefail; ")
pprint(RUNIDS)
rule all:
input:
expand("ccs/fastq/{runId}.min-rq" + minRQpostCcs + ".min-passes" + minPasses + ".fastq.gz", runId=RUNIDS)
rule makeCCS:
input:
bam="raw/{runId}.subreads.bam",
output:
"ccs/{runId}.min-rq" + minRQccs + ".min-passes" + minPasses +".ccs.bam"
threads: 8
conda: "envs/pacBio_env.yml"
shell:
'''
uuid=$(uuidgen)
ccs --minLength=10 --min-rq={minRQccs} --min-passes={minPasses} --max-length 1000000 -j {threads} --report-file ccs/{wildcards.runId}.min-rq{minRQccs}.min-passes{minPasses}.ccs_report.txt {input.bam} {config[TMPDIR]}/$uuid.bam
mv {config[TMPDIR]}/$uuid.bam {output}
'''
rule rqFilterCcs:
input: "ccs/{runId}.min-rq" + minRQccs + ".min-passes" + minPasses + ".ccs.bam"
output: temp("ccs/rqFilter/{runId}.min-rq" + minRQpostCcs + ".min-passes" + minPasses + ".ccs.bam")
conda: "envs/pacBio_env.yml"
shell:
'''
uuid=$(uuidgen)
bamtools filter -in {input} -out {config[TMPDIR]}/$uuid -tag "rq":">={minRQpostCcs}"
mv {config[TMPDIR]}/$uuid {output}
'''
rule removePrimers:
input:
bam="ccs/rqFilter/{runId}.min-rq" + minRQpostCcs + ".min-passes" + minPasses + ".ccs.bam",
pb_adapters=config["PB_ADAPT"]
output:
temp("ccs/clipped/{runId}.min-rq" + minRQpostCcs + ".min-passes" + minPasses + ".clipped.primer_5p--primer_3p.bam")
conda: "envs/pacBio_env.yml"
threads: 8
shell:
'''
uuid=$(uuidgen)
wd=$PWD
mkdir {config[TMPDIR]}/$uuid
lima --isoseq --min-score-lead 0 -j {threads} {input.bam} {input.pb_adapters} {config[TMPDIR]}/$uuid/$(basename {output[0]} .primer_5p--primer_3p.bam).bam
cd {config[TMPDIR]}/$uuid/
gzip *.report
gzip *.clips
cd $wd
mv {config[TMPDIR]}/$uuid/* $(dirname {output[0]})
'''
rule removeChimeras:
input:
bam="ccs/clipped/{runId}.min-rq" + minRQpostCcs + ".min-passes" + minPasses + ".clipped.primer_5p--primer_3p.bam",
pb_adapters=config["PB_ADAPT"]
output: temp("ccs/nc/{runId}.min-rq" + minRQpostCcs + ".min-passes" + minPasses + ".nc.ccs.bam")
conda: "envs/pacBio_env.yml"
threads: 8
shell:
'''
uuid=$(uuidgen)
wd=$PWD
mkdir {config[TMPDIR]}/$uuid
isoseq3 refine -j {threads} {input.bam} {input.pb_adapters} {config[TMPDIR]}/$uuid/$(basename {output})
cd {config[TMPDIR]}/$uuid/
gzip *report.csv
cd $wd
mv {config[TMPDIR]}/$uuid/* $(dirname {output})
'''
rule bamToFastq:
input: "ccs/nc/{runId}.min-rq" + minRQpostCcs + ".min-passes" + minPasses + ".nc.ccs.bam"
output: "ccs/fastq/{runId}.min-rq" + minRQpostCcs + ".min-passes" + minPasses + ".fastq.gz"
conda: "envs/pacBio_env.yml"
shell:
'''
uuid=$(uuidgen)
bamtools convert -format fastq -in {input} | gzip > {config[TMPDIR]}/$uuid
mv {config[TMPDIR]}/$uuid {output}
'''