Permalink
Switch branches/tags
Nothing to show
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
156 lines (121 sloc) 4.28 KB
/** ChIP-Seq
*
* Tools and versions:
*
* sra-tools 2.6.2
* FastQC 0.11.5
* Bowtie 1.1.2
* MACS 1.4.2-1
* deepTools 2.2.3
* bedtools 2.25.0
* SAMtools 1.3.1
*
*/
%% =========================================================
%% TASK DEFINITIONS
%% =========================================================
% sra-tools
deftask fastq-dump( fastq( File ) : sra( File ) )in bash *{
fastq=$sra.fastq
fastq-dump -Z $sra > $fastq
}*
% FastQC
deftask fastqc( zip( File ) : fq( File ) )in bash *{
fastqc -f fastq --noextract -o ./ $fq
zip=`ls *.zip`
}*
deftask bowtie-build( idx( File ) : fa( File ) )in bash *{
bowtie-build $fa btidx
idx=idx.tar
tar cf $idx btidx.* --remove-files
}*
deftask bowtie-align( sam( File ) : idx( File ) fq( File ) )in bash *{
tar xf $idx
sam=$fq.sam
bowtie btidx -q $fq -v 2 -m 1 -3 1 -S -p 2 > $sam
}*
deftask macs(
peaks( File ) summits( File ) <xls( File )>
bedgraph_tag( File ) bedgraph_ctl( File )
: tag_sam( File ) ctl_sam( File ) )in bash *{
macs14 -t $tag_sam -c $ctl_sam --format SAM --gsize 4639675 --name "macs14" \
--bw 400 --keep-dup 1 --bdg --single-profile --diag
peaks=macs14_peaks.bed
summits=macs14_summits.bed
xls=(macs14_diag.xls macs14_negative_peaks.xls)
bedgraph_tag=macs14_MACS_bedGraph/treat/macs14_treat_afterfiting_all.bdg.gz
bedgraph_ctl=macs14_MACS_bedGraph/control/macs14_control_afterfiting_all.bdg.gz
}*
deftask samtools-sort( sorted_bam( File ) : sam( File ) )in bash *{
sorted_bam=sorted.bam
samtools view -bS $sam | samtools sort -o $sorted_bam -
}*
deftask samtools-rmdup( dedup_bam( File ) : bam( File ) )in bash *{
dedup_bam=dedup.bam
samtools rmdup -s $bam $dedup_bam
}*
deftask samtools-index( bai( File ) : bam( File ) )in bash *{
bai=$bam.bai
samtools index $bam $bai
}*
deftask samtools-faidx( fai( File ) : fa( File ) )in bash *{
fai=$fa.fai
samtools faidx $fa
}*
deftask bamcoverage( bedgraph( File ) : bam( File ) bai( File ) )in bash *{
bedgraph=$bam.bedgraph
ln -sf $bai $bam.bai
bamCoverage --bam $bam --outFileName $bedgraph --normalizeTo1x 4639675 --outFileFormat bedgraph
}*
deftask deeptools( bedgraph( File ) : sam( File ) ) {
sorted_bam = samtools-sort( sam: sam );
dedup_sorted_bam = samtools-rmdup( bam: sorted_bam );
dedup_sorted_bai = samtools-index( bam: dedup_sorted_bam );
bedgraph = bamcoverage( bam: dedup_sorted_bam, bai: dedup_sorted_bai );
}
deftask bedtools-getfasta( bed_fa( File ) : fa( File ) fai( File ) bed( File ) )in bash *{
bed_fa=$bed.fa
ln -sf $fai $fa.fai
bedtools getfasta -fi $fa -bed $bed -fo $bed_fa
}*
deftask restrict-peaks( restricted_bed( File ) : bed( File ) )in bash *{
restricted_bed=$bed.100.bed
perl -lane '$start=$F[1]+100; $end = $F[2]-100 ; print "$F[0]\t$start\t$end"' \
$bed > $restricted_bed
}*
%% =========================================================
%% INPUT DATA
%% =========================================================
tag_sra = "sra/SRR576933.sra";
ctl_sra = "sra/SRR576938.sra";
fa = "ref/Escherichia_coli_K_12_MG1655.fasta";
%% =========================================================
%% WORKFLOW DEFINITION
%% =========================================================
% extract fastq data
tag_fq = fastq-dump( sra: tag_sra );
ctl_fq = fastq-dump( sra: ctl_sra );
% quality control
qc = fastqc( fq: tag_fq ctl_fq );
fai = samtools-faidx( fa: fa );
% read mapping
idx = bowtie-build( fa: fa );
tag_sam = bowtie-align( idx: idx, fq: tag_fq );
ctl_sam = bowtie-align( idx: idx, fq: ctl_fq );
% peak calling with macs
peaks summits xls tag_macs_bedgraph ctl_macs_bedgraph = macs(
tag_sam: tag_sam, ctl_sam: ctl_sam );
% coverage visualization with deeptools
tag_deeptools_bedgraph = deeptools( sam: tag_sam );
ctl_deeptools_bedgraph = deeptools( sam: ctl_sam );
% restrict regions clipping 100 bps off both ends of the peak region
peaks_100 = restrict-peaks( bed: peaks );
% extract fasta
peaks_fa = bedtools-getfasta( fa: fa, fai: fai, bed: peaks );
peaks_100_fa = bedtools-getfasta( fa: fa, fai: fai, bed: peaks_100 );
%% =========================================================
%% QUERY
%% =========================================================
qc peaks peaks_fa peaks_100_fa
tag_macs_bedgraph ctl_macs_bedgraph
tag_deeptools_bedgraph ctl_deeptools_bedgraph;