Merge pull request #551 from GavinHuttley/develop

app.result bug fix, tests and maintenance
cogent3 · Mar 3, 2020 · cc58162 · cc58162
2 parents b27000a + 158278b
commit cc58162
Show file tree

Hide file tree

Showing 3 changed files with 273 additions and 61 deletions.
diff --git a/src/cogent3/app/result.py b/src/cogent3/app/result.py
@@ -51,7 +51,7 @@ def __repr__(self):
         num = len(self)
         types = [f"{repr(k)}: {self[k].__class__.__name__}" for k in self.keys()[:4]]
         types = ", ".join(types)
-        result = f"{len(self)}x {name}({types})"
+        result = f"{num}x {name}({types})"
         return result
 
     def __str__(self):
@@ -91,6 +91,8 @@ def deserialised_values(self):
                 if "cogent3" in type_:
                     object = deserialise_object(value)
                     self[key] = object
+            elif hasattr(value, "deserialised_values"):
+                value.deserialised_values()
 
 
 @total_ordering
@@ -137,8 +139,8 @@ def __init__(
         self._unique_Q = unique_Q
 
     def _get_repr_data_(self):
-        self.lf  # making sure we're fully reloaded
-        attrs = ["lnL", "nfp", "DLC", "unique_Q"]
+        self.deserialised_values()  # making sure we're fully reloaded
+        attrs = list(self._stat_attrs)
         header = ["key"] + attrs[:]
         rows = [[""] + [getattr(self, attr) for attr in attrs]]
         if len(self) > 1:
@@ -159,33 +161,23 @@ def __repr__(self):
         return repr(table)
 
     def __setitem__(self, key, lf):
-        super(self.__class__, self).__setitem__(key, lf)
-        if type(lf) != dict:
-            lf.set_name(key)
-            lnL = lf.lnL
-            nfp = lf.nfp
-            DLC = lf.all_psubs_DLC()
-            try:
-                unique_Q = lf.all_rate_matrices_unique()
-            except (NotImplementedError, KeyError):
-                # KeyError happens on discrete time model
-                unique_Q = None  # non-primary root issue
-        else:
-            lnL = lf.get("lnL")
-            nfp = lf.get("nfp")
-            DLC = lf.get("DLC")
-            unique_Q = lf.get("unique_Q")
-
-        if self._lnL is not None:
-            self._DLC = all([DLC, self.DLC])
-            self._unique_Q = all([unique_Q, self.unique_Q])
-            self._lnL = self._stat([lnL, self.lnL])
-            self._nfp = self._stat([nfp, self.nfp])
+        if isinstance(lf, dict):
+            type_name = lf.get("type", None)
+            type_name = type_name or ""
         else:
-            self._lnL = lnL
-            self._nfp = nfp
-            self._DLC = DLC
-            self._unique_Q = unique_Q
+            type_name = lf.__class__.__name__
+
+        if "AlignmentLikelihoodFunction" not in type_name:
+            msg = f"{type_name} not a supported type"
+            raise TypeError(msg)
+
+        super(self.__class__, self).__setitem__(key, lf)
+        self._init_stats()
+
+    def _init_stats(self):
+        """reset the values for stat attr to None, triggers recalc in properties"""
+        for attr in self._stat_attrs:
+            setattr(self, f"_{attr}", None)
 
     @property
     def num_evaluations(self):
@@ -211,6 +203,7 @@ def name(self):
         return self._name
 
     def simulate_alignment(self):
+        self.deserialised_values()
         if len(self) == 1:
             aln = self.lf.simulate_alignment()
             return aln
@@ -240,18 +233,8 @@ def __lt__(self, other):
 
     @property
     def lf(self):
-        result = list(self.values())
-        if type(result[0]) == dict:
-            from cogent3.util import deserialise
-
-            # we reset the stat attributes to None
-            for attr in self._stat_attrs:
-                setattr(self, attr, None)
-
-            for k, v in self.items():
-                v = deserialise.deserialise_likelihood_function(v)
-                self[k] = v
-
+        self.deserialised_values()
+        self._init_stats()
         if len(self) == 1:
             result = list(self.values())[0]
         else:
@@ -266,35 +249,66 @@ def lf(self):
 
     @property
     def lnL(self):
-        return self._lnL
+        if self._lnL is None:
+            lnL = 0.0
+            for v in self.values():
+                if isinstance(v, dict):
+                    l = v.get("lnL")
+                else:
+                    l = v.lnL
+                lnL = self._stat([l, lnL])
 
-    @lnL.setter
-    def lnL(self, value):
-        self._lnL = value
+            self._lnL = lnL
+        return self._lnL
 
     @property
     def nfp(self):
-        return self._nfp
+        if self._nfp is None:
+            nfp = 0
+            for v in self.values():
+                if isinstance(v, dict):
+                    n = v.get("nfp")
+                else:
+                    n = v.nfp
+                nfp = self._stat([n, nfp])
+
+            self._nfp = nfp
 
-    @nfp.setter
-    def nfp(self, value):
-        self._nfp = value
+        return self._nfp
 
     @property
     def DLC(self):
-        return self._DLC
+        if self._DLC is None:
+            DLC = []
+            for v in self.values():
+                if isinstance(v, dict):
+                    d = v.get("DLC")
+                else:
+                    d = v.all_psubs_DLC()
+                DLC.append(d)
+
+            self._DLC = all(DLC)
 
-    @DLC.setter
-    def DLC(self, value):
-        self._DLC = value
+        return self._DLC
 
     @property
     def unique_Q(self):
-        return self._unique_Q
+        if self._unique_Q is None:
+            unique = []
+            for v in self.values():
+                if isinstance(v, dict):
+                    u = v.get("unique_Q")
+                else:
+                    try:
+                        u = v.all_rate_matrices_unique()
+                    except (NotImplementedError, KeyError):
+                        # KeyError happens on discrete time model
+                        u = None  # non-primary root issue
+                unique.append(u)
+
+            self._unique_Q = all(unique)
 
-    @unique_Q.setter
-    def unique_Q(self, value):
-        self._unique_Q = value
+        return self._unique_Q
 
     def total_length(self, length_as=None):
         """sum of all branch lengths on tree. If split codons, sums across trees
@@ -385,7 +399,7 @@ def _get_repr_data_(self):
         rows = []
         attrs = ["lnL", "nfp", "DLC", "unique_Q"]
         for key, member in self.items():
-            member.lf  # making sure we're fully reloaded
+            member.deserialised_values()  # making sure we're fully reloaded
             row = [repr(key)] + [getattr(member, a) for a in attrs]
             rows.append(row)
 
@@ -487,7 +501,7 @@ def _get_repr_data_(self):
         rows = []
         attrs = ["lnL", "nfp", "DLC", "unique_Q"]
         for key, member in self.items():
-            member.lf  # making sure we're fully reloaded
+            member.deserialised_values()  # making sure we're fully reloaded
             if key == self._name_of_null:
                 status_name = ["null", repr(key)]
             else:

diff --git a/tests/test_app/test_result.py b/tests/test_app/test_result.py
@@ -2,7 +2,11 @@
 
 from cogent3 import make_aligned_seqs
 from cogent3.app import evo as evo_app
-from cogent3.app.result import generic_result, model_collection_result
+from cogent3.app.result import (
+    generic_result,
+    model_collection_result,
+    model_result,
+)
 from cogent3.util.deserialise import deserialise_object
 
 
@@ -42,6 +46,24 @@ def test_deserialised_values(self):
         got = result["key"]
         self.assertEqual(got, data)
 
+    def test_repr_str(self):
+        """it works"""
+        data = {"type": "cogent3.core.moltype.MolType", "moltype": "dna"}
+        result = generic_result(source="blah.json")
+        result["key"] = data
+        r = repr(result)
+        s = str(result)
+
+    def test_keys(self):
+        """it works"""
+        data = {"type": "cogent3.core.moltype.MolType", "moltype": "dna"}
+        result = generic_result(source="blah.json")
+        result["key"] = data
+        keys = result.keys()
+        self.assertEqual(keys, ["key"])
+
+
+class TestModelResult(TestCase):
     def test_model_result_alignment(self):
         """returns alignment from lf"""
         _data = {
@@ -79,6 +101,23 @@ def test_model_result_alignment_split_pos_model(self):
             expect = aln[i - 1 :: 3]
             self.assertEqual(got.to_dict(), expect.to_dict())
 
+    def test_model_result_repr_split_pos_model(self):
+        """repr works for model_result of split codon positions"""
+        _data = {
+            "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG",
+            "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG",
+            "Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG",
+        }
+        aln = make_aligned_seqs(data=_data, moltype="dna")
+        mod = evo_app.model(
+            "F81",
+            split_codons=True,
+            show_progress=False,
+            opt_args=dict(max_evaluations=55, limit_action="ignore"),
+        )
+        result = mod(aln)
+        s = repr(result)
+
     def test_model_result_tree_split_pos_model(self):
         """returns tree from lf with split codon positions"""
         _data = {
@@ -101,6 +140,25 @@ def test_model_result_tree_split_pos_model(self):
             lengths.add(t.total_length())
         self.assertTrue(len(lengths) > 1)
 
+    def test_model_result_simulate_alignment(self):
+        """returns tree from lf with split codon positions"""
+        _data = {
+            "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG",
+            "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG",
+            "Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG",
+        }
+        aln = make_aligned_seqs(data=_data, moltype="dna")
+        mod = evo_app.model(
+            "F81",
+            split_codons=True,
+            show_progress=False,
+            opt_args=dict(max_evaluations=55, limit_action="ignore"),
+        )
+        result = mod(aln)
+        got = result.simulate_alignment()
+        self.assertEqual(len(aln), len(got))
+        self.assertNotEqual(aln.to_dict(), got.to_dict())
+
     def test_model_result_tree_discrete_time(self):
         """returns paralinear lengths"""
 
@@ -119,6 +177,25 @@ def test_model_result_tree_discrete_time(self):
             got.children[0].params["length"], got.children[0].params["paralinear"]
         )
 
+    def test_model_result_setitem(self):
+        """TypeError if value a likelihood function, or a dict with correct type"""
+        v = dict(type="arbitrary")
+        r = model_result(name="one", source="two")
+        with self.assertRaises(TypeError):
+            r["name"] = v
+
+        with self.assertRaises(TypeError):
+            r["name"] = 4
+
+        _data = {
+            "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG",
+            "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG",
+            "Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG",
+        }
+        aln = make_aligned_seqs(data=_data, moltype="dna")
+        with self.assertRaises(TypeError):
+            r["name"] = aln
+
 
 class TestModelCollectionResult(TestCase):
     _model_results = {}
@@ -188,5 +265,25 @@ def test_json_roundtrip(self):
         self.assertEqual(got.source, coll.source)
 
 
+class TestHypothesisResult(TestCase):
+    def test_pvalue(self):
+        """hypothesis test p-value property"""
+        _data = {
+            "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG",
+            "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG",
+            "Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG",
+        }
+        aln = make_aligned_seqs(data=_data, moltype="dna")
+        model1 = evo_app.model(
+            "F81", opt_args=dict(max_evaluations=25, limit_action="ignore")
+        )
+        model2 = evo_app.model(
+            "HKY85", opt_args=dict(max_evaluations=25, limit_action="ignore")
+        )
+        hyp = evo_app.hypothesis(model1, model2)
+        result = hyp(aln)
+        self.assertTrue(0 <= result.pvalue <= 1)
+
+
 if __name__ == "__main__":
     main()