forked from biopython/biopython
-
Notifications
You must be signed in to change notification settings - Fork 0
/
_io.py
89 lines (71 loc) · 2.7 KB
/
_io.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# Copyright (C) 2009 by Eric Talevich (eric.talevich@gmail.com)
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
"""I/O function wrappers for phylogenetic tree formats.
This API follows the same semantics as Biopython's `SeqIO` and `AlignIO`.
"""
# For with on Python/Jython 2.5
from __future__ import with_statement
__docformat__ = "restructuredtext en"
from Bio import File
from Bio.Phylo import (
BaseTree,
NewickIO,
NexusIO,
PhyloXMLIO,
NeXMLIO,
)
supported_formats = {
'newick': NewickIO,
'nexus': NexusIO,
'phyloxml': PhyloXMLIO,
'nexml': NeXMLIO,
}
try:
from Bio.Phylo import CDAOIO
supported_formats['cdao'] = CDAOIO
except: pass
def parse(file, format, **kwargs):
"""Iteratively parse a file and return each of the trees it contains.
If a file only contains one tree, this still returns an iterable object that
contains one element.
Example
-------
>>> trees = parse('../../Tests/PhyloXML/apaf.xml', 'phyloxml')
>>> for tree in trees:
... print tree.rooted
True
"""
with File.as_handle(file, 'r') as fp:
for tree in getattr(supported_formats[format], 'parse')(fp, **kwargs):
yield tree
def read(file, format, **kwargs):
"""Parse a file in the given format and return a single tree.
Raises a `ValueError` if there are zero or multiple trees -- if this occurs,
use `parse` instead to get the complete sequence of trees.
"""
try:
tree_gen = parse(file, format, **kwargs)
tree = tree_gen.next()
except StopIteration:
raise ValueError("There are no trees in this file.")
try:
tree_gen.next()
except StopIteration:
return tree
else:
raise ValueError(
"There are multiple trees in this file; use parse() instead.")
def write(trees, file, format, **kwargs):
"""Write a sequence of trees to file in the given format."""
if isinstance(trees, BaseTree.Tree) or isinstance(trees, BaseTree.Clade):
# Passed a single tree instead of an iterable -- that's OK
trees = [trees]
with File.as_handle(file, 'w+') as fp:
n = getattr(supported_formats[format], 'write')(trees, fp, **kwargs)
return n
def convert(in_file, in_format, out_file, out_format, parse_args={}, **kwargs):
"""Convert between two tree file formats."""
trees = parse(in_file, in_format, **parse_args)
return write(trees, out_file, out_format, **kwargs)