Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100644 77 lines (68 sloc) 2.769 kb
6b9897c Brad Chapman Move variation effect calculation into reusable module for pipeline; all...
authored
1 """Calculate potential effects of variations using external programs.
2
3 Supported:
4 snpEff: http://sourceforge.net/projects/snpeff/
5 """
6 import os
7 import csv
8 import subprocess
9
10 from bcbio.utils import file_transaction
11
12 # ## snpEff support
13
14 # remap Galaxy genome names to the ones used by snpEff. Not nice code.
15 SNPEFF_GENOME_REMAP = {
16 "GRCh37": "hg37.61",
17 "hg19" : "hg37.61",
18 "mm9" : "mm37.61",
19 "araTha_tair9": "athalianaTair10",
20 "araTha_tair10": "athalianaTair10",
21 }
22
a83ff34 Brad Chapman Pass java memory argument to snpEff; thanks to Warren Emmett for reporti...
authored
23 def snpeff_effects(snpeff_jar, vcf_in, genome, interval_file=None,
24 java_memory=None):
6b9897c Brad Chapman Move variation effect calculation into reusable module for pipeline; all...
authored
25 """Prepare tab-delimited file for variant effects using snpEff.
26 """
27 if _vcf_has_items(vcf_in):
28 se_interval = (_convert_to_snpeff_interval(interval_file, vcf_in)
29 if interval_file else None)
30 try:
31 genome = SNPEFF_GENOME_REMAP[genome]
a83ff34 Brad Chapman Pass java memory argument to snpEff; thanks to Warren Emmett for reporti...
authored
32 out_file = _run_snpeff(vcf_in, genome, snpeff_jar, se_interval,
33 java_memory)
6b9897c Brad Chapman Move variation effect calculation into reusable module for pipeline; all...
authored
34 finally:
35 for fname in [se_interval]:
36 if fname and os.path.exists(fname):
37 os.remove(fname)
38 return out_file
39
a83ff34 Brad Chapman Pass java memory argument to snpEff; thanks to Warren Emmett for reporti...
authored
40 def _run_snpeff(snp_in, genome, snpeff_jar, se_interval, java_memory):
6b9897c Brad Chapman Move variation effect calculation into reusable module for pipeline; all...
authored
41 snpeff_config = "%s.config" % os.path.splitext(snpeff_jar)[0]
42 out_file = "%s-effects.tsv" % (os.path.splitext(snp_in)[0])
43 if not os.path.exists(out_file):
a83ff34 Brad Chapman Pass java memory argument to snpEff; thanks to Warren Emmett for reporti...
authored
44 cl = ["java"]
45 if java_memory:
46 cl += ["-Xmx%s" % java_memory]
47 cl += ["-jar", snpeff_jar, "-1", "-vcf4", "-pass", "-c", snpeff_config,
48 genome, snp_in]
6b9897c Brad Chapman Move variation effect calculation into reusable module for pipeline; all...
authored
49 if se_interval:
50 cl.extend(["-filterInterval", se_interval])
51 print " ".join(cl)
52 with file_transaction(out_file):
53 with open(out_file, "w") as out_handle:
54 subprocess.check_call(cl, stdout=out_handle)
55 return out_file
56
57 def _vcf_has_items(in_file):
58 if os.path.exists(in_file):
59 with open(in_file) as in_handle:
60 for line in in_handle:
61 if line.strip() and not line.startswith("#"):
62 return True
63 return False
64
65 def _convert_to_snpeff_interval(in_file, base_file):
66 """Handle wide variety of BED-like inputs, converting to BED-3.
67 """
68 out_file = "%s-snpeff-intervals.bed" % os.path.splitext(base_file)[0]
69 if not os.path.exists(out_file):
70 with open(out_file, "w") as out_handle:
71 writer = csv.writer(out_handle, dialect="excel-tab")
72 with open(in_file) as in_handle:
73 for line in (l for l in in_handle if not l.startswith(("@", "#"))):
74 parts = line.split()
75 writer.writerow(parts[:3])
76 return out_file
Something went wrong with that request. Please try again.