Permalink
Browse files

Provide automated coverage files as BigWig, uploaded to Galaxy

  • Loading branch information...
1 parent a9a555a commit e205ec78b5a801183c984b86b87b36726358a8f1 @chapmanb committed Sep 15, 2010
@@ -60,7 +60,8 @@ def main(picard_dir, align_bam, ref_file, fastq_one, fastq_pair=None,
summary_table.insert(0, ("Reference organism",
ref_org.replace("_", " "), ""))
tmpl = Template(section_template)
- sample_name = "%s (%s)" % (sample_name, base.replace("_", " "))
+ sample_name = "%s (%s)" % (sample_name.replace("_", "\_"),
+ base.replace("_", " "))
section = tmpl.render(name=sample_name, summary=None,
summary_table=summary_table,
figures=[(f, c) for (f, c) in metrics_graphs + fastq_graphs if f],
@@ -105,6 +105,7 @@ def process_lane(info, fastq_dir, fc_name, fc_date, config, config_file):
print info['lane'], "Converting to sorted BAM file"
base_bam, sort_bam = sam_to_sort_bam(sam_file, sam_ref, fastq1, fastq2,
sample_name, config_file)
+ bam_to_wig(sort_bam, config, config_file)
if config["algorithm"]["recalibrate"]:
print info['lane'], "Recalibrating with GATK"
dbsnp_file = get_dbsnp_file(sam_ref)
@@ -222,10 +223,18 @@ def sam_to_sort_bam(sam_file, ref_file, fastq1, fastq2, sample_name,
config_file, sam_file, ref_file, fastq1]
if fastq2:
cl.append(fastq2)
- child = subprocess.Popen(cl)
- child.wait()
+ subprocess.check_call(cl)
return bam_file, sort_bam_file
+def bam_to_wig(bam_file, config, config_file):
+ """Provide a BigWig coverage file of the sorted alignments.
+ """
+ wig_file = "%s.bigwig" % os.path.splitext(bam_file)[0]
+ if not os.path.exists(wig_file):
+ cl = [config["analysis"]["towig_script"], bam_file, config_file]
+ subprocess.check_call(cl)
+ return wig_file
+
def generate_align_summary(bam_file, fastq1, fastq2, sam_ref, config,
sample_name, do_sort=False):
"""Run alignment summarizing script to produce a pdf with align details.
@@ -0,0 +1,104 @@
+#!/usr/bin/env python
+"""Convert BAM files to wiggle file format in a specified region.
+
+Usage:
+ bam_to_wiggle.py <BAM file> [<YAML config>] [<chrom> <start> <end>]
+
+chrom start and end are optional, in which case they default to everything.
+
+The config file is in YAML format and specifies the locations of samtools and
+the wigToBigWig program from UCSC:
+
+program:
+ samtools: samtools
+ ucsc_bigwig: wigToBigWig
+
+If not specified, these will default to
+
+The script requires:
+ pysam
+ samtools
+ wigToBigWig from UCSC
+"""
+import os
+import sys
+import subprocess
+from contextlib import contextmanager
+
+import yaml
+import pysam
+
+def main(bam_file, config_file=None, chrom='all', start=0, end=None):
+ if config_file:
+ with open(config_file) as in_handle:
+ config = yaml.load(in_handle)
+ else:
+ config = {"program": dict(samtools="samtools",
+ ucsc_bigwig="wigToBigWig")}
+ if start > 0:
+ start = int(start) - 1
+ if end is not None:
+ end = int(end)
+ regions = [(chrom, start, end)]
+ wig_file = "%s.wig" % os.path.splitext(bam_file)[0]
+ with open(wig_file, "w") as out_handle:
+ chr_sizes = write_bam_track(bam_file, regions, config, out_handle)
+ try:
+ convert_to_bigwig(wig_file, chr_sizes, config)
+ finally:
+ os.remove(wig_file)
+
+@contextmanager
+def indexed_bam(bam_file, config):
+ if not os.path.exists(bam_file + ".bai"):
+ cl = [config["program"]["samtools"], "index", bam_file]
+ child = subprocess.Popen(cl)
+ child.wait()
+ sam_reader = pysam.Samfile(bam_file, "rb")
+ yield sam_reader
+ sam_reader.close()
+
+def write_bam_track(bam_file, regions, config, out_handle):
+ out_handle.write("track %s\n" % " ".join(["type=wiggle_0",
+ "name=%s" % os.path.splitext(os.path.split(bam_file)[-1])[0],
+ "visibility=full",
+ ]))
+ sizes = []
+ with indexed_bam(bam_file, config) as work_bam:
+ for ref_info in work_bam.header.get("SQ", []):
+ sizes.append((ref_info["SN"], ref_info["LN"]))
+ if len(regions) == 1 and regions[0][0] == "all":
+ regions = []
+ for ref_info in work_bam.header.get("SQ", []):
+ regions.append((ref_info["SN"], 0, None))
+ for chrom, start, end in regions:
+ if end is None:
+ for ref_info in work_bam.header.get("SQ", []):
+ if ref_info["SN"] == chrom:
+ end = int(ref_info["LN"])
+ break
+ assert end is not None, "Could not find %s in header" % chrom
+ out_handle.write("variableStep chrom=%s\n" % chrom)
+ for col in work_bam.pileup(chrom, start, end):
+ out_handle.write("%s %s\n" % (col.pos+1, col.n))
+ return sizes
+
+def convert_to_bigwig(wig_file, chr_sizes, config):
+ bw_file = "%s.bigwig" % (os.path.splitext(wig_file)[0])
+ size_file = "%s-sizes.txt" % (os.path.splitext(wig_file)[0])
+ with open(size_file, "w") as out_handle:
+ for chrom, size in chr_sizes:
+ out_handle.write("%s\t%s\n" % (chrom, size))
+ try:
+ cl = [config["program"]["ucsc_bigwig"], wig_file, size_file, bw_file]
+ subprocess.check_call(cl)
+ finally:
+ os.remove(size_file)
+ return bw_file
+
+if __name__ == "__main__":
+ if len(sys.argv) < 2:
+ print "Incorrect arguments"
+ print __doc__
+ sys.exit()
+ main(*sys.argv[1:])
@@ -98,6 +98,9 @@ def select_upload_files(lane, fc_dir, analysis_dir):
for bam_file in glob.glob(os.path.join(analysis_dir,
"%s_*-sort-dup.bam" % lane)):
yield (bam_file, _name_with_ext(bam_file, ".bam"))
+ for wig_file in glob.glob(os.path.join(analysis_dir,
+ "%s_*-sort.bigwig" % lane)):
+ yield (wig_file, _name_with_ext(wig_file, "-coverage.bigwig"))
# upload any recalibrated BAM files used for SNP calling
found_recal = False
for bam_file in glob.glob(os.path.join(analysis_dir,
View
@@ -16,6 +16,7 @@
'scripts/analyze_finished_sqn.py',
'scripts/analyze_quality_recal.py',
'scripts/automated_initial_analysis.py',
+ 'scripts/bam_to_wiggle.py',
'scripts/gatk_genotyper.py',
'scripts/illumina_finished_msg.py',
'scripts/picard_gatk_recalibrate.py',

0 comments on commit e205ec7

Please sign in to comment.