-
Notifications
You must be signed in to change notification settings - Fork 0
/
Snakefile
executable file
·116 lines (88 loc) · 2.55 KB
/
Snakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
configfile: "config.yaml"
import pandas as pd
sample_table = pd.read_table(config["runtable"], sep="\t")
samples = list(sample_table["sampleName"].unique())
RENAME = list(sample_table["sraName"].unique())
Rplot = sample_table['bedgraph'] == "x"
Rplot = sample_table[Rplot]
Rplot = list(Rplot["sampleName"].unique())
# everything that need network connection for
localrules:
rule all:
input:
expand("data/RSEM/{SRS}.genes.results", SRS=samples),
expand("data/bedgraph/{SRS}.BedGraph", SRS=samples),
expand("data/bedgraph/{SRS}.txt", SRS=Rplot)
rule index:
input:
fasta="genomeFiles/ASM294v2.27.fasta",
gtf="genomeFiles/sortedStrandedAllLTRs.gtf"
output:
"data/index/Genome",
"data/index/genome.transcripts.fa",
"data/index/chrNameLength.txt"
shell:
"""
rsem-prepare-reference --gtf {input.gtf} --star {input.fasta} data/index/genome
"""
rule rename:
input:
expand("data/sraFiles/{rename}.fastq.gz", rename=RENAME)
output:
"data/sraFiles/{SRS}.fastq.gz"
shell:
"""
python rename.py
"""
rule STAR:
input:
file1="data/sraFiles/{SRS}.fastq.gz",
index="data/index/Genome"
output:
temp("data/SAMBAM/{SRS}.Aligned.toTranscriptome.out.bam"),
temp("data/SAMBAM/{SRS}.Aligned.sortedByCoord.out.bam")
threads: 16
shell:
"""
STAR --genomeDir data/index \
--runThreadN {threads} \
--readFilesIn <(gunzip -c {input.file1}) \
--sjdbGTFfile genomeFiles/sortedStrandedAllLTRs.gtf \
--outFileNamePrefix data/SAMBAM/{wildcards.SRS}. \
--quantMode GeneCounts TranscriptomeSAM \
--outSAMtype BAM SortedByCoordinate
"""
rule RSEM:
input:
alignedBam="data/SAMBAM/{SRS}.Aligned.toTranscriptome.out.bam",
index="data/index/Genome"
output:
"data/RSEM/{SRS}.genes.results",
temp("data/RSEM/{SRS}.transcript.bam")
threads: 16
shell:
"""
rsem-calculate-expression --strandedness reverse --bam {input.alignedBam} -p {threads} data/index/genome data/RSEM/{wildcards.SRS}
"""
rule bedtools:
input:
"data/SAMBAM/{SRS}.Aligned.sortedByCoord.out.bam"
output:
"data/bedgraph/{SRS}.BedGraph"
shell:
"""
TmpScale=$(bc <<< "scale=6;1000000/$(samtools view -f 0 -c {input})")
bedtools genomecov -ibam {input} -bg -split -scale $TmpScale > data/bedgraph/{wildcards.SRS}.BedGraph
"""
rule bedtoolsForR:
input:
"data/SAMBAM/{SRS}.Aligned.sortedByCoord.out.bam"
output:
"data/bedgraph/{SRS}.txt"
shell:
"""
TmpScale=$(bc <<< "scale=6;1000000/$(samtools view -f 0 -c {input})")
bedtools genomecov -ibam {input} -d -split -scale $TmpScale > data/bedgraph/{wildcards.SRS}.txt
"""
onsuccess:
shell("rm *.out")