Skip to content

Commit

Permalink
Merge pull request #551 from GavinHuttley/develop
Browse files Browse the repository at this point in the history
app.result bug fix, tests and maintenance
  • Loading branch information
GavinHuttley committed Mar 3, 2020
2 parents b27000a + 158278b commit cc58162
Show file tree
Hide file tree
Showing 3 changed files with 273 additions and 61 deletions.
132 changes: 73 additions & 59 deletions src/cogent3/app/result.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def __repr__(self):
num = len(self)
types = [f"{repr(k)}: {self[k].__class__.__name__}" for k in self.keys()[:4]]
types = ", ".join(types)
result = f"{len(self)}x {name}({types})"
result = f"{num}x {name}({types})"
return result

def __str__(self):
Expand Down Expand Up @@ -91,6 +91,8 @@ def deserialised_values(self):
if "cogent3" in type_:
object = deserialise_object(value)
self[key] = object
elif hasattr(value, "deserialised_values"):
value.deserialised_values()


@total_ordering
Expand Down Expand Up @@ -137,8 +139,8 @@ def __init__(
self._unique_Q = unique_Q

def _get_repr_data_(self):
self.lf # making sure we're fully reloaded
attrs = ["lnL", "nfp", "DLC", "unique_Q"]
self.deserialised_values() # making sure we're fully reloaded
attrs = list(self._stat_attrs)
header = ["key"] + attrs[:]
rows = [[""] + [getattr(self, attr) for attr in attrs]]
if len(self) > 1:
Expand All @@ -159,33 +161,23 @@ def __repr__(self):
return repr(table)

def __setitem__(self, key, lf):
super(self.__class__, self).__setitem__(key, lf)
if type(lf) != dict:
lf.set_name(key)
lnL = lf.lnL
nfp = lf.nfp
DLC = lf.all_psubs_DLC()
try:
unique_Q = lf.all_rate_matrices_unique()
except (NotImplementedError, KeyError):
# KeyError happens on discrete time model
unique_Q = None # non-primary root issue
else:
lnL = lf.get("lnL")
nfp = lf.get("nfp")
DLC = lf.get("DLC")
unique_Q = lf.get("unique_Q")

if self._lnL is not None:
self._DLC = all([DLC, self.DLC])
self._unique_Q = all([unique_Q, self.unique_Q])
self._lnL = self._stat([lnL, self.lnL])
self._nfp = self._stat([nfp, self.nfp])
if isinstance(lf, dict):
type_name = lf.get("type", None)
type_name = type_name or ""
else:
self._lnL = lnL
self._nfp = nfp
self._DLC = DLC
self._unique_Q = unique_Q
type_name = lf.__class__.__name__

if "AlignmentLikelihoodFunction" not in type_name:
msg = f"{type_name} not a supported type"
raise TypeError(msg)

super(self.__class__, self).__setitem__(key, lf)
self._init_stats()

def _init_stats(self):
"""reset the values for stat attr to None, triggers recalc in properties"""
for attr in self._stat_attrs:
setattr(self, f"_{attr}", None)

@property
def num_evaluations(self):
Expand All @@ -211,6 +203,7 @@ def name(self):
return self._name

def simulate_alignment(self):
self.deserialised_values()
if len(self) == 1:
aln = self.lf.simulate_alignment()
return aln
Expand Down Expand Up @@ -240,18 +233,8 @@ def __lt__(self, other):

@property
def lf(self):
result = list(self.values())
if type(result[0]) == dict:
from cogent3.util import deserialise

# we reset the stat attributes to None
for attr in self._stat_attrs:
setattr(self, attr, None)

for k, v in self.items():
v = deserialise.deserialise_likelihood_function(v)
self[k] = v

self.deserialised_values()
self._init_stats()
if len(self) == 1:
result = list(self.values())[0]
else:
Expand All @@ -266,35 +249,66 @@ def lf(self):

@property
def lnL(self):
return self._lnL
if self._lnL is None:
lnL = 0.0
for v in self.values():
if isinstance(v, dict):
l = v.get("lnL")
else:
l = v.lnL
lnL = self._stat([l, lnL])

@lnL.setter
def lnL(self, value):
self._lnL = value
self._lnL = lnL
return self._lnL

@property
def nfp(self):
return self._nfp
if self._nfp is None:
nfp = 0
for v in self.values():
if isinstance(v, dict):
n = v.get("nfp")
else:
n = v.nfp
nfp = self._stat([n, nfp])

self._nfp = nfp

@nfp.setter
def nfp(self, value):
self._nfp = value
return self._nfp

@property
def DLC(self):
return self._DLC
if self._DLC is None:
DLC = []
for v in self.values():
if isinstance(v, dict):
d = v.get("DLC")
else:
d = v.all_psubs_DLC()
DLC.append(d)

self._DLC = all(DLC)

@DLC.setter
def DLC(self, value):
self._DLC = value
return self._DLC

@property
def unique_Q(self):
return self._unique_Q
if self._unique_Q is None:
unique = []
for v in self.values():
if isinstance(v, dict):
u = v.get("unique_Q")
else:
try:
u = v.all_rate_matrices_unique()
except (NotImplementedError, KeyError):
# KeyError happens on discrete time model
u = None # non-primary root issue
unique.append(u)

self._unique_Q = all(unique)

@unique_Q.setter
def unique_Q(self, value):
self._unique_Q = value
return self._unique_Q

def total_length(self, length_as=None):
"""sum of all branch lengths on tree. If split codons, sums across trees
Expand Down Expand Up @@ -385,7 +399,7 @@ def _get_repr_data_(self):
rows = []
attrs = ["lnL", "nfp", "DLC", "unique_Q"]
for key, member in self.items():
member.lf # making sure we're fully reloaded
member.deserialised_values() # making sure we're fully reloaded
row = [repr(key)] + [getattr(member, a) for a in attrs]
rows.append(row)

Expand Down Expand Up @@ -487,7 +501,7 @@ def _get_repr_data_(self):
rows = []
attrs = ["lnL", "nfp", "DLC", "unique_Q"]
for key, member in self.items():
member.lf # making sure we're fully reloaded
member.deserialised_values() # making sure we're fully reloaded
if key == self._name_of_null:
status_name = ["null", repr(key)]
else:
Expand Down
99 changes: 98 additions & 1 deletion tests/test_app/test_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@

from cogent3 import make_aligned_seqs
from cogent3.app import evo as evo_app
from cogent3.app.result import generic_result, model_collection_result
from cogent3.app.result import (
generic_result,
model_collection_result,
model_result,
)
from cogent3.util.deserialise import deserialise_object


Expand Down Expand Up @@ -42,6 +46,24 @@ def test_deserialised_values(self):
got = result["key"]
self.assertEqual(got, data)

def test_repr_str(self):
"""it works"""
data = {"type": "cogent3.core.moltype.MolType", "moltype": "dna"}
result = generic_result(source="blah.json")
result["key"] = data
r = repr(result)
s = str(result)

def test_keys(self):
"""it works"""
data = {"type": "cogent3.core.moltype.MolType", "moltype": "dna"}
result = generic_result(source="blah.json")
result["key"] = data
keys = result.keys()
self.assertEqual(keys, ["key"])


class TestModelResult(TestCase):
def test_model_result_alignment(self):
"""returns alignment from lf"""
_data = {
Expand Down Expand Up @@ -79,6 +101,23 @@ def test_model_result_alignment_split_pos_model(self):
expect = aln[i - 1 :: 3]
self.assertEqual(got.to_dict(), expect.to_dict())

def test_model_result_repr_split_pos_model(self):
"""repr works for model_result of split codon positions"""
_data = {
"Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG",
"Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG",
"Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG",
}
aln = make_aligned_seqs(data=_data, moltype="dna")
mod = evo_app.model(
"F81",
split_codons=True,
show_progress=False,
opt_args=dict(max_evaluations=55, limit_action="ignore"),
)
result = mod(aln)
s = repr(result)

def test_model_result_tree_split_pos_model(self):
"""returns tree from lf with split codon positions"""
_data = {
Expand All @@ -101,6 +140,25 @@ def test_model_result_tree_split_pos_model(self):
lengths.add(t.total_length())
self.assertTrue(len(lengths) > 1)

def test_model_result_simulate_alignment(self):
"""returns tree from lf with split codon positions"""
_data = {
"Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG",
"Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG",
"Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG",
}
aln = make_aligned_seqs(data=_data, moltype="dna")
mod = evo_app.model(
"F81",
split_codons=True,
show_progress=False,
opt_args=dict(max_evaluations=55, limit_action="ignore"),
)
result = mod(aln)
got = result.simulate_alignment()
self.assertEqual(len(aln), len(got))
self.assertNotEqual(aln.to_dict(), got.to_dict())

def test_model_result_tree_discrete_time(self):
"""returns paralinear lengths"""

Expand All @@ -119,6 +177,25 @@ def test_model_result_tree_discrete_time(self):
got.children[0].params["length"], got.children[0].params["paralinear"]
)

def test_model_result_setitem(self):
"""TypeError if value a likelihood function, or a dict with correct type"""
v = dict(type="arbitrary")
r = model_result(name="one", source="two")
with self.assertRaises(TypeError):
r["name"] = v

with self.assertRaises(TypeError):
r["name"] = 4

_data = {
"Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG",
"Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG",
"Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG",
}
aln = make_aligned_seqs(data=_data, moltype="dna")
with self.assertRaises(TypeError):
r["name"] = aln


class TestModelCollectionResult(TestCase):
_model_results = {}
Expand Down Expand Up @@ -188,5 +265,25 @@ def test_json_roundtrip(self):
self.assertEqual(got.source, coll.source)


class TestHypothesisResult(TestCase):
def test_pvalue(self):
"""hypothesis test p-value property"""
_data = {
"Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG",
"Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG",
"Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG",
}
aln = make_aligned_seqs(data=_data, moltype="dna")
model1 = evo_app.model(
"F81", opt_args=dict(max_evaluations=25, limit_action="ignore")
)
model2 = evo_app.model(
"HKY85", opt_args=dict(max_evaluations=25, limit_action="ignore")
)
hyp = evo_app.hypothesis(model1, model2)
result = hyp(aln)
self.assertTrue(0 <= result.pvalue <= 1)


if __name__ == "__main__":
main()

0 comments on commit cc58162

Please sign in to comment.