-
Notifications
You must be signed in to change notification settings - Fork 1.7k
/
gen_results.py
171 lines (155 loc) · 6.39 KB
/
gen_results.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
# Copyright (C) 2012, 2013 by Brandon Invergo (b.invergo@gmail.com)
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
import os.path
import sys
VERSIONS = ["4_1", "4_3", "4_4", "4_4c", "4_5", "4_6", "4_7"]
def codeml(vers=None, verbose=False):
from Bio.Phylo.PAML import codeml
if vers is not None:
versions = [vers]
else:
versions = VERSIONS
tests = [("aa_model0", "aa_alignment.phylip", "species.tree"),
("aa_pairwise", "aa_alignment.phylip", "species.tree"),
("all_NSsites", "alignment.phylip", "species.tree"),
("branchsiteA", "alignment.phylip", "species.tree"),
("clademodelC", "alignment.phylip", "species.tree"),
("freeratio", "alignment.phylip", "species.tree"),
("ngene2_mgene02", "lysinYangSwanson2002.nuc", "lysin.trees"),
("ngene2_mgene34", "lysinYangSwanson2002.nuc", "lysin.trees"),
("pairwise", "alignment.phylip", "species.tree"),
("SE", "alignment.phylip", "species.tree")]
for test in tests:
print test[0]
cml = codeml.Codeml()
cml.working_dir = "temp"
ctl_file = os.path.join("Control_files",
"codeml",
'.'.join([test[0], "ctl"]))
alignment = os.path.join("Alignments", test[1])
tree = os.path.join("Trees", test[2])
cml.read_ctl_file(ctl_file)
cml.alignment = alignment
cml.tree = tree
for version in versions:
print "\t{0}".format(version.replace('_', '.'))
if test[0] in ["ngene2_mgene02", "ngene2_mgene34"] and \
version == "4_6":
cml.tree = ".".join([cml.tree, "4.6"])
out_file = '.'.join(['-'.join([test[0], version]), "out"])
cml.out_file = os.path.join("Results", "codeml", test[0], out_file)
bin = ''.join(["codeml", version])
cml.run(command=bin, verbose=verbose)
def baseml(vers=None, verbose=False):
from Bio.Phylo.PAML import baseml
if vers is not None:
versions = [vers]
else:
versions = VERSIONS
tests = [("model", range(0, 9)), ("nhomo", [1, 3, 4]),
("nparK", range(1, 5)), ("alpha1rho1", None), ("SE", None)]
alignment = os.path.join("Alignments", "alignment.phylip")
tree = os.path.join("Trees", "species.tree")
for test in tests:
print test[0]
bml = baseml.Baseml()
for version in versions:
print "\t{0}".format(version.replace('_', '.'))
if test[1] is not None:
for n in test[1]:
if (version in ["4_3", "4_4", "4_4c", "4_5"] and
test[0] == "nparK" and n in [3, 4]):
continue
print "\t\tn = {0}".format(n)
ctl_file = os.path.join("Control_files", "baseml",
"{0}{1}.ctl".format(test[0], n))
bml.read_ctl_file(ctl_file)
bml.alignment = alignment
bml.tree = tree
out_file = "{0}{1}-{2}.out".format(test[0], n, version)
bml.out_file = os.path.join("Results", "baseml", test[0],
out_file)
bin = "baseml{0}".format(version)
bml.run(command=bin, verbose=verbose)
else:
if (version in ["4_3", "4_4", "4_4c", "4_5"] and
test[0] == "alpha1rho1"):
continue
ctl_file = os.path.join("Control_files", "baseml",
"{0}.ctl".format(test[0]))
bml.read_ctl_file(ctl_file)
bml.alignment = alignment
bml.tree = tree
out_file = "{0}-{1}.out".format(test[0], version)
bml.out_file = os.path.join("Results", "baseml", test[0],
out_file)
bin = "baseml{0}".format(version)
bml.run(command=bin, verbose=verbose)
def yn00(vers=None, verbose=False):
from Bio.Phylo.PAML import yn00
if vers is not None:
versions = [vers]
else:
versions = VERSIONS
tests = ["yn00"]
alignment = os.path.join("Alignments", "alignment.phylip")
for test in tests:
print test[0]
yn = yn00.Yn00()
for version in versions:
print "\t{0}".format(version.replace('_', '.'))
ctl_file = os.path.join("Control_files", "yn00",
"{0}.ctl".format(test))
yn.read_ctl_file(ctl_file)
yn.alignment = alignment
out_file = "{0}-{1}.out".format(test, version)
yn.out_file = os.path.join("Results", "yn00", out_file)
bin = "yn00{0}".format(version)
yn.run(command=bin, verbose=verbose)
def print_usage():
versions = ", ".join([vers.replace("_", ".") for vers in VERSIONS])
usage = \
'''Usage: gen_results.py [-v] PROGRAM [VERSION]
Generate result files to be used in Bio.Phylo.PAML unit tests.
-v Use verbose output
PROGRAM codeml, baseml or yn00
VERSION %s
To use this, the PAML programs must be in your executable path and
they must be named programX_Y, where X and Y are the version numbers
(i.e. baseml4_5 or codeml4_4c). If VERSION is not specified, test
results will be generated for all versions listed above.
'''%(versions)
sys.exit(usage)
if __name__ == "__main__":
programs = ["codeml", "baseml", "yn00"]
prog = None
verbose = False
vers = None
if len(sys.argv) < 2:
print_usage()
for arg in sys.argv[1:]:
if arg == "-v":
verbose = True
elif arg in programs:
if prog is not None:
print "Only one program at a time, please."
print_usage()
prog = arg
elif arg.replace(".", "_") in VERSIONS:
if vers is not None:
print "Only one version at a time, sorry."
vers = arg.replace(".", "_")
else:
print "Unrecognized argument"
print_usage()
if prog is None:
print "No program specified"
print_usage()
if prog == "codeml":
codeml(vers, verbose)
elif prog == "baseml":
baseml(vers, verbose)
elif prog == "yn00":
yn00(vers, verbose)