Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100644 76 lines (68 sloc) 2.769 kb
6b9897c3 »
2011-07-27 Move variation effect calculation into reusable module for pipeline; …
1 """Calculate potential effects of variations using external programs.
2
3 Supported:
4 snpEff: http://sourceforge.net/projects/snpeff/
5 """
6 import os
7 import csv
8 import subprocess
9
10 from bcbio.utils import file_transaction
11
12 # ## snpEff support
13
14 # remap Galaxy genome names to the ones used by snpEff. Not nice code.
15 SNPEFF_GENOME_REMAP = {
16 "GRCh37": "hg37.61",
17 "hg19" : "hg37.61",
18 "mm9" : "mm37.61",
19 "araTha_tair9": "athalianaTair10",
20 "araTha_tair10": "athalianaTair10",
21 }
22
a83ff34f »
2011-08-21 Pass java memory argument to snpEff; thanks to Warren Emmett for repo…
23 def snpeff_effects(snpeff_jar, vcf_in, genome, interval_file=None,
24 java_memory=None):
6b9897c3 »
2011-07-27 Move variation effect calculation into reusable module for pipeline; …
25 """Prepare tab-delimited file for variant effects using snpEff.
26 """
27 if _vcf_has_items(vcf_in):
28 se_interval = (_convert_to_snpeff_interval(interval_file, vcf_in)
29 if interval_file else None)
30 try:
31 genome = SNPEFF_GENOME_REMAP[genome]
a83ff34f »
2011-08-21 Pass java memory argument to snpEff; thanks to Warren Emmett for repo…
32 out_file = _run_snpeff(vcf_in, genome, snpeff_jar, se_interval,
33 java_memory)
6b9897c3 »
2011-07-27 Move variation effect calculation into reusable module for pipeline; …
34 finally:
35 for fname in [se_interval]:
36 if fname and os.path.exists(fname):
37 os.remove(fname)
38 return out_file
39
a83ff34f »
2011-08-21 Pass java memory argument to snpEff; thanks to Warren Emmett for repo…
40 def _run_snpeff(snp_in, genome, snpeff_jar, se_interval, java_memory):
6b9897c3 »
2011-07-27 Move variation effect calculation into reusable module for pipeline; …
41 snpeff_config = "%s.config" % os.path.splitext(snpeff_jar)[0]
42 out_file = "%s-effects.tsv" % (os.path.splitext(snp_in)[0])
43 if not os.path.exists(out_file):
a83ff34f »
2011-08-21 Pass java memory argument to snpEff; thanks to Warren Emmett for repo…
44 cl = ["java"]
45 if java_memory:
46 cl += ["-Xmx%s" % java_memory]
47 cl += ["-jar", snpeff_jar, "-1", "-vcf4", "-pass", "-c", snpeff_config,
48 genome, snp_in]
6b9897c3 »
2011-07-27 Move variation effect calculation into reusable module for pipeline; …
49 if se_interval:
50 cl.extend(["-filterInterval", se_interval])
51 print " ".join(cl)
52 with file_transaction(out_file):
53 with open(out_file, "w") as out_handle:
54 subprocess.check_call(cl, stdout=out_handle)
55 return out_file
56
57 def _vcf_has_items(in_file):
58 if os.path.exists(in_file):
59 with open(in_file) as in_handle:
60 for line in in_handle:
61 if line.strip() and not line.startswith("#"):
62 return True
63 return False
64
65 def _convert_to_snpeff_interval(in_file, base_file):
66 """Handle wide variety of BED-like inputs, converting to BED-3.
67 """
68 out_file = "%s-snpeff-intervals.bed" % os.path.splitext(base_file)[0]
69 if not os.path.exists(out_file):
70 with open(out_file, "w") as out_handle:
71 writer = csv.writer(out_handle, dialect="excel-tab")
72 with open(in_file) as in_handle:
73 for line in (l for l in in_handle if not l.startswith(("@", "#"))):
74 parts = line.split()
75 writer.writerow(parts[:3])
76 return out_file
Something went wrong with that request. Please try again.