Permalink
Switch branches/tags
Nothing to show
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
215 lines (162 sloc) 4.74 KB
/* Tools:
* Bowtie2
* SAMtools
* Bismark
* SRAtools
* Sherman
* FastQC
* cutadapt
* trim galore
* bsseq
*/
// TASK DEFINITION
// untar
deftask untar( <out( File )> : tar( File ) )in bash *{
tar xf $tar
out=`tar tf $tar`
}*
// cat
deftask cat( out( File ) : <file( File )> )in bash *{
cat ${file[@]} > $out
}*
// fastq-dump
deftask fastq-dump( fastq1( File ) fastq2( File ) : sra( File ) )in bash *{
fastq=$sra.fastq
fastq-dump --split-3 $sra
base=${sra##*/}
base=${base::-4}
fastq1=${base}_1.fastq
fastq2=${base}_2.fastq
}*
// gunzip
deftask gunzip( out( File ) : gz( File ) )in bash *{
out=${gz%.gz}
gzip -c -d $gz > $out
}*
// fastqc
deftask fastqc( zip( File ) : fastq( File ) )in bash *{
fastqc -f fastq --noextract -o ./ $fastq
zip=`ls *.zip`
}*
// bismark
deftask bismark-prepare( idx( File ) : fa( File ) )in bash *{
mkdir ref
(cd ref; ln -s ../$fa)
bismark_genome_preparation --bowtie2 ref
idx=idx.tar
tar chf $idx ref
}*
deftask bismark-align( sam( File ) : idx( File ) fastq( File ) )in bash *{
tar xf $idx
bismark --sam --bowtie2 -N 1 -L 20 --phred33-quals ref $fastq
sam=${fastq}_bismark_bt2.sam
}*
deftask bismark-extract( bismark_cov( File ) bedgraph( File ) : sam( File ) )in bash *{
bismark_methylation_extractor -s --comprehensive --bedGraph $sam
bedgraph=`ls *.bedGraph.gz`
bismark_cov=`ls *.bismark.cov.gz`
}*
// sherman
deftask sherman( fastq( File ): len nread cr e <fa( File )> )in bash *{
mkdir ref
(cd ref; for i in ${fa[@]}; do ln -s ../$i; done)
Sherman -cr $cr -e $e -l $len -n $nread --genome_folder ref
fastq=simulated.fastq
}*
deftask trim-galore( trimmed( File ) : fastq( File ) )in bash *{
trim_galore --rrbs --illumina $fastq
trimmed=`ls *_trimmed.*`
}*
// samtools
deftask samtools-view( bam( File ) : sam( File ) )in bash *{
bam=${sam%.sam}.bam
samtools view -bS $sam > $bam
}*
deftask samtools-sort( sortedbam( File ) : bam( File ) )in bash *{
sortedbam=sorted.bam
samtools sort -m 2G $bam -o $sortedbam
}*
deftask samtools-index( bai( File ): bam( File ) )in bash *{
bai=$bam.bai
samtools index $bam
}*
deftask report( pdf_file( File ) : <cov_ctl( File )> <cov_case( File )> )in r *{
library("bsseq")
pdf_file = "differentially_methylated_regions.pdf"
input_coverage_data = read.bismark(
c(
cov_ctl,
cov_case
),
sampleNames = basename( c(
cov_ctl,
cov_case
) )
)
index_control = 1:length( cov_ctl )
index_case = index_control+length( cov_ctl )
BS.cov = getCoverage(input_coverage_data)
# now filter for positions with sufficient coverage
keepLoci.ex = which(rowSums( BS.cov[, index_control ] >= 2 ) >= 2 & rowSums(BS.cov[, index_case ] >= 2) >= 2)
input_coverage_data_filt = input_coverage_data[keepLoci.ex,]
# now smoothen
input_coverage_data_filt = BSmooth( input_coverage_data_filt, mc.cores = 1, verbose = T )
input_coverage_data.tstat = BSmooth.tstat(
input_coverage_data_filt[1:10**5,],
group1 = basename( cov_ctl ),
group2 = basename( cov_case ),
estimate.var = "group2",
#local.correct = T,
verbose = T
)
dmrs0 = dmrFinder( input_coverage_data.tstat, cutoff = c( -1, 1 ) )
dmrs = subset(dmrs0, n >= 3 & abs(meanDiff) >= 0.1)
col = rep(c("red", "blue"), each = length(cov_ctl) ) # assuming only fair number of samples
pData( input_coverage_data_filt)$col = col
pdf(file = pdf_file, width = 10, height = 5)
plotManyRegions( input_coverage_data_filt, dmrs[ 1 : dim( dmrs )[ 1 ], ], extend = 5000, addRegions = dmrs)
dev.off()
}*
// INPUT DATA
nsplit = 1;
len = 80;
cr = 90;
e = "0.01";
// fa-tar = "hg19/hg19.tar";
fa-gz = "hg19/chr22.fa.gz";
// WORKFLOW DEFINITION
// fa = untar( tar: fa-tar );
fa = gunzip( gz: fa-gz );
idx = bismark-prepare( fa: fa );
fastq_case = sherman(
len: len,
nread: 2000000 2000001,
fa: fa,
cr: cr,
e: e );
fastq_ctl = sherman(
len: len,
nread: 2000010 2000011,
fa: fa,
cr: cr,
e: e );
trimmed_case = trim-galore( fastq: fastq_case );
trimmed_ctl = trim-galore( fastq: fastq_ctl );
sam_case = bismark-align(
idx: idx,
fastq: trimmed_case );
sam_ctl = bismark-align(
idx: idx,
fastq: trimmed_ctl );
bam_case = samtools-sort( bam: samtools-view( sam: sam_case ) );
bam_ctl = samtools-sort( bam: samtools-view( sam: sam_ctl ) );
bai_case = samtools-index( bam: bam_case );
bai_ctl = samtools-index( bam: bam_ctl );
cov_case bedgraph_case = bismark-extract( sam: sam_case );
cov_ctl bedgraph_ctl = bismark-extract( sam: sam_ctl );
qc = fastqc( fastq: fastq_case fastq_ctl );
pdf = report(
cov_ctl: cov_ctl,
cov_case: cov_case );
// QUERY
pdf qc bam_case bai_case bam_ctl bai_ctl;