Merge 850796e into 12fbf40

biosustain · May 18, 2015 · 6817c83 · 6817c83
2 parents 12fbf40 + 850796e
commit 6817c83
Show file tree

Hide file tree

Showing 4 changed files with 142 additions and 43 deletions.
diff --git a/cameo/strain_design/heuristic/archivers.py b/cameo/strain_design/heuristic/archivers.py
@@ -27,17 +27,18 @@ def __init__(self):
 
     def __call__(self, random, population, archive, args):
         self.archive = archive
+        maximize = args.get("maximize", True)
         size = args.get('max_archive_size', 100)
-        [self.add(individual.candidate, individual.fitness, size) for individual in population]
+        [self.add(individual.candidate, individual.fitness, size, maximize) for individual in population]
         return self.archive
 
-    def add(self, candidate, fitness, max_size):
+    def add(self, candidate, fitness, max_size, maximize=True):
         if self.worst_fitness is None:
             self.worst_fitness = fitness
 
         if fitness >= self.worst_fitness:
 
-            candidate = SolutionTuple(candidate, fitness)
+            candidate = SolutionTuple(candidate, fitness, maximize)
             add = True
             for c in self.archive:
                 if c == candidate:
@@ -67,16 +68,17 @@ def __iter__(self):
 
 
 class SolutionTuple(object):
-        def __init__(self, candidate, fitness):
+        def __init__(self, candidate, fitness, maximize=True):
             self.candidate = set(candidate)
             self.fitness = fitness
+            self.maximize = maximize
 
         def __eq__(self, other):
             return self.candidate == other.candidate and self.fitness == other.fitness
 
         def __cmp__(self, other):
             if self.fitness > other.fitness:
-                return -1
+                return -1 if self.maximize else 1
             elif self.fitness == other.fitness:
                 if self.improves(other):
                     return -1
@@ -85,11 +87,11 @@ def __cmp__(self, other):
                 else:
                     return 1
             else:
-                return 1
+                return 1 if self.maximize else -1
 
         def __lt__(self, other):
             if self.fitness > other.fitness:
-                return True
+                return self.maximize
             elif self.fitness == other.fitness:
                 if self.improves(other):
                     return True
@@ -98,11 +100,11 @@ def __lt__(self, other):
                 else:
                     return False
             else:
-                return False
+                return not self.maximize
 
         def __gt__(self, other):
             if self.fitness > other.fitness:
-                return False
+                return not self.maximize
             elif self.fitness == other.fitness:
                 if self.improves(other):
                     return False
@@ -111,10 +113,11 @@ def __gt__(self, other):
                 else:
                     return True
             else:
-                return True
+                return self.maximize
 
         def __str__(self):
-            return "%s - %s" % (list(self.candidate), self.fitness)
+            sense = "max" if self.maximize else "min"
+            return "%s - %s sense: %s" % (list(self.candidate), self.fitness, sense)
 
         def __repr__(self):
             return "SolutionTuple #%s: %s" % (id(self), self.__str__())
@@ -127,4 +130,11 @@ def symmetric_difference(self, other):
 
         def improves(self, other):
             assert isinstance(other, SolutionTuple)
-            return self.issubset(other) and len(self.symmetric_difference(other)) > 0 and self.fitness >= other.fitness
+            if self.maximize:
+                return self.issubset(other) and \
+                       len(self.symmetric_difference(other)) > 0 and \
+                       self.fitness >= other.fitness
+            else:
+                return self.issubset(other) and \
+                       len(self.symmetric_difference(other)) > 0 and \
+                       self.fitness <= other.fitness
diff --git a/cameo/strain_design/heuristic/decoders.py b/cameo/strain_design/heuristic/decoders.py
@@ -42,8 +42,8 @@ class ReactionKnockoutDecoder(KnockoutDecoder):
     model : SolverBasedModel
 
     """
-    def __init__(self, *args, **kwargs):
-        super(ReactionKnockoutDecoder, self).__init__(*args, **kwargs)
+    def __init__(self, representation, model, *args, **kwargs):
+        super(ReactionKnockoutDecoder, self).__init__(representation, model, *args, **kwargs)
 
     def __call__(self, individual):
         """
@@ -73,8 +73,8 @@ class GeneKnockoutDecoder(KnockoutDecoder):
         genes to knockout
     model : SolverBasedModel
     """
-    def __init__(self, *args, **kwargs):
-        super(GeneKnockoutDecoder, self).__init__(*args, **kwargs)
+    def __init__(self, representation, model, *args, **kwargs):
+        super(GeneKnockoutDecoder, self).__init__(representation, model, *args, **kwargs)
 
     def __call__(self, individual):
         """

diff --git a/cameo/strain_design/heuristic/optimization.py b/cameo/strain_design/heuristic/optimization.py
@@ -496,10 +496,7 @@ def _build_solutions(self, solutions):
         reactions = []
         for solution in solutions:
             mo = isinstance(solution.fitness, Pareto)
-            if mo:
-                proceed = True
-            else:
-                proceed = solution.fitness > 0
+            proceed = True if mo else solution.fitness > 0
 
             if proceed:
                 decoded_solution = self.decoder(solution.candidate)
@@ -511,26 +508,32 @@ def _build_solutions(self, solutions):
                 size = len(decoded_solution[1])
 
                 if self.biomass:
-                    biomass.append(simulation_result.get_primal_by_id(self.biomass))
+                    biomass.append(simulation_result[self.biomass])
                 fitness.append(solution.fitness)
                 knockouts.append(frozenset([v.id for v in decoded_solution[1]]))
                 reactions.append(frozenset([v.id for v in decoded_solution[0]]))
                 sizes.append(size)
 
-                if isinstance(self.product, (list, tuple)):
-                    products.append([simulation_result.get_primal_by_id(p) for p in self.product])
-                elif not self.product is None:
-                    products.append(simulation_result.get_primal_by_id(self.product))
+                if isinstance(self.product, (list, tuple, set)):
+                    products.append([simulation_result[p] for p in self.product])
+                elif self.product is not None:
+                    products.append(simulation_result[self.product])
+
+        assert len(knockouts) == len(fitness)
+        assert len(sizes) == len(knockouts)
         if self.ko_type == REACTION_KNOCKOUT_TYPE:
             data_frame = DataFrame({KNOCKOUTS: knockouts, FITNESS: fitness, SIZE: sizes})
         else:
             data_frame = DataFrame({KNOCKOUTS: knockouts, REACTIONS: reactions, FITNESS: fitness, SIZE: sizes})
-        if not self.biomass is None:
+        if self.biomass is not None:
+            assert len(biomass) == len(knockouts)
             data_frame[BIOMASS] = biomass
         if isinstance(self.product, str):
+            assert len(biomass) == len(products)
             data_frame[self.product] = products
-        elif isinstance(self.product, (list, tuple)):
+        elif isinstance(self.product, (list, tuple, set)):
             for i in range(self.product):
+                assert len(biomass) == len(products[i:])
                 data_frame[self.product[i]] = products[i:]
 
         return data_frame

diff --git a/tests/test_strain_design_heuristics.py b/tests/test_strain_design_heuristics.py
@@ -26,7 +26,7 @@
 from cameo.strain_design.heuristic.variators import _do_set_n_point_crossover
 from cameo.util import RandomGenerator as Random
 from cameo.strain_design.heuristic.optimization import HeuristicOptimization, ReactionKnockoutOptimization, \
-    set_distance_function
+    set_distance_function, KnockoutOptimizationResult
 from cameo.strain_design.heuristic.archivers import SolutionTuple, BestSolutionArchiver
 from cameo.strain_design.heuristic.decoders import ReactionKnockoutDecoder, KnockoutDecoder, GeneKnockoutDecoder
 from cameo.strain_design.heuristic.generators import set_generator, unique_set_generator, \
@@ -66,11 +66,11 @@ def test_solution_string(self):
         sol1 = SolutionTuple(SOLUTIONS[0][0], SOLUTIONS[0][1])
         sol2 = SolutionTuple(SOLUTIONS[1][0], SOLUTIONS[1][1])
         sol3 = SolutionTuple(SOLUTIONS[2][0], SOLUTIONS[2][1])
-        self.assertEqual(sol1.__str__(), "[1, 2, 3] - 0.1")
-        self.assertEqual(sol2.__str__(), "[1, 2, 3, 4] - 0.1")
-        self.assertEqual(sol3.__str__(), "[2, 3, 4] - 0.45")
+        self.assertEqual(sol1.__str__(), "[1, 2, 3] - 0.1 sense: max")
+        self.assertEqual(sol2.__str__(), "[1, 2, 3, 4] - 0.1 sense: max")
+        self.assertEqual(sol3.__str__(), "[2, 3, 4] - 0.45 sense: max")
 
-    def test_solution_comparison(self):
+    def test_solution_comparison_maximization(self):
         sol1 = SolutionTuple(SOLUTIONS[0][0], SOLUTIONS[0][1])
         sol2 = SolutionTuple(SOLUTIONS[1][0], SOLUTIONS[1][1])
         sol3 = SolutionTuple(SOLUTIONS[2][0], SOLUTIONS[2][1])
@@ -115,6 +115,50 @@ def test_solution_comparison(self):
         self.assertFalse(sol2.improves(sol1), msg="Solution 2 does not improve Solution 1")
         self.assertFalse(sol2.improves(sol3), msg="Solution 2 does not improve Solution 3")
 
+    def test_solution_comparison_minimization(self):
+        sol1 = SolutionTuple(SOLUTIONS[0][0], SOLUTIONS[0][1], maximize=False)
+        sol2 = SolutionTuple(SOLUTIONS[1][0], SOLUTIONS[1][1], maximize=False)
+        sol3 = SolutionTuple(SOLUTIONS[2][0], SOLUTIONS[2][1], maximize=False)
+
+        #test ordering
+        self.assertEqual(sol1.__cmp__(sol2), -1)
+        self.assertEqual(sol1.__cmp__(sol1), 0)
+        self.assertEqual(sol1.__cmp__(sol3), -1)
+
+        self.assertTrue(sol1 < sol2)
+        self.assertTrue(sol1 == sol1)
+        self.assertTrue(sol1 < sol3)
+
+        #test gt and lt
+        self.assertTrue(sol1.__lt__(sol2))
+        self.assertTrue(sol1.__lt__(sol3))
+        self.assertFalse(sol1.__gt__(sol1))
+        self.assertFalse(sol1.__lt__(sol1))
+        self.assertTrue(sol2.__gt__(sol1))
+        self.assertFalse(sol3.__lt__(sol1))
+
+        #testing issubset
+        self.assertTrue(sol1.issubset(sol2), msg="Solution 1 is subset of Solution 2")
+        self.assertFalse(sol2.issubset(sol1), msg="Solution 2 is not subset of Solution 1")
+        self.assertTrue(sol3.issubset(sol2), msg="Solution 3 is subset of Solution 2")
+        self.assertFalse(sol2.issubset(sol3), msg="Solution 2 is not subset of Solution 3")
+        self.assertFalse(sol1.issubset(sol3), msg="Solution 1 is subset of Solution 3")
+        self.assertFalse(sol2.issubset(sol3), msg="Solution 3 is not subset of Solution 1")
+
+        #test difference
+        l = len(sol2.symmetric_difference(sol1))
+        self.assertEqual(l, 1, msg="Difference between Solution 2 and 1 is (%s)" % sol2.symmetric_difference(sol1))
+        l = len(sol3.symmetric_difference(sol2))
+        self.assertEqual(l, 1, msg="Difference between Solution 3 and 1 is (%s)" % sol3.symmetric_difference(sol2))
+        l = len(sol3.symmetric_difference(sol1))
+        self.assertEqual(l, 2, msg="Difference between Solution 1 and 3 is (%s)" % sol3.symmetric_difference(sol1))
+
+        self.assertTrue(sol1.improves(sol2), msg="Solution 1 is better than Solution 2")
+        self.assertFalse(sol3.improves(sol2), msg="Solution 3 is not better than Solution 2")
+        self.assertFalse(sol3.improves(sol1), msg="Solution 3 does not improve Solution 1")
+        self.assertFalse(sol2.improves(sol1), msg="Solution 2 does not improve Solution 1")
+        self.assertFalse(sol2.improves(sol3), msg="Solution 2 does not improve Solution 3")
+
     def test_add_greater_solution_with_same_fitness(self):
         size = 1
         pool = BestSolutionArchiver()
@@ -139,6 +183,14 @@ def test_add_smaller_solution_with_same_fitness(self):
         self.assertEqual(sol.candidate, solution, msg="Best solution must be the first (%s)" % sol.candidate)
         self.assertEqual(sol.fitness, fitness, msg="Best fitness must be the first (%s)" % sol.fitness)
 
+    def test_uniqueness_of_solutions(self):
+        size = 2
+        pool = BestSolutionArchiver()
+        pool.add(SOLUTIONS[1][0], SOLUTIONS[1][1], size)
+        pool.add(SOLUTIONS[1][0], SOLUTIONS[1][1], size)
+
+        self.assertEqual(pool.length(), 1, "Added repeated solution")
+
     def test_pool_size_limit(self):
         size = 1
         pool = BestSolutionArchiver()
@@ -328,10 +380,8 @@ def test_multiple_chromossome_set_generator(self):
         candidate = multiple_chromosome_set_generator(random, args)
 
         self.assertEqual(len(candidate['test_key_1']), 3)
-
         self.assertEqual(len(candidate['test_key_2']), 5)
 
-
     def test_fixed_size_generator(self):
         self.args.setdefault('variable_candidate_size', False)
 
@@ -501,6 +551,43 @@ def test_set_distance_function(self):
         self.assertEqual(d, 1)
 
 
+class TestKnockoutOptimizationResult(unittest.TestCase):
+    def setUp(self):
+        self.model = TEST_MODEL
+        self.representation = [r.id for r in self.model.reactions]
+        random = Random(SEED)
+        args = {"representation": self.representation}
+        self.solutions = BestSolutionArchiver()
+        for _ in range(10000):
+            self.solutions.add(set_generator(random, args), random.random(), 100)
+        self.decoder = ReactionKnockoutDecoder(self.representation, self.model)
+
+    def test_result(self):
+        result = KnockoutOptimizationResult(
+            model=self.model,
+            heuristic_method=None,
+            simulation_method=fba,
+            solutions=self.solutions,
+            objective_function=None,
+            ko_type="reaction",
+            decoder=self.decoder,
+            product="EX_ac_LPAREN_e_RPAREN_",
+            biomass="Biomass_Ecoli_core_N_LPAREN_w_FSLASH_GAM_RPAREN__Nmet2",
+            seed=SEED,
+            reference=None)
+
+        self.assertEqual(result.ko_type, "reaction")
+
+        individuals = []
+        for index, row in result.solutions.iterrows():
+            individual = SolutionTuple(set(self.representation.index(r) for r in row["Knockouts"]), row["Fitness"])
+            self.assertNotIn(individual, individuals, msg="%s is repeated on result")
+            individuals.append(individual)
+            self.assertIn(individual, self.solutions.archive)
+            self.assertEqual(len(row["Knockouts"]), row["Size"])
+            self.assertEqual(self.solutions.archive.count(individual), 1, msg="%s is unique in archive" % individual)
+
+
 class TestReactionKnockoutOptimization(unittest.TestCase):
     def setUp(self):
         self.model = TEST_MODEL
@@ -535,11 +622,11 @@ def test_run_single_objective(self):
 
         self.assertEqual(rko.random.random(), 0.04225378600400298)
 
-        # with open(result_file, 'w') as f:
-        #     pickle.dump(results, f)
+        # with open(result_file, 'w') as out_file:
+        #     pickle.dump(results, out_file)
 
-        with open(result_file, 'r') as f:
-            expected_results = pickle.load(f)
+        with open(result_file, 'r') as in_file:
+            expected_results = pickle.load(in_file)
 
         assert_frame_equal(results.solutions, expected_results.solutions)
 
@@ -562,12 +649,11 @@ def test_run_multiobjective(self):
 
         results = rko.run(max_evaluations=3000, pop_size=10, view=SequentialView())
 
-        with open(result_file, 'w') as file:
-            pickle.dump(results, file)
-
-        with open(result_file, 'r') as file:
-            expected_results = pickle.load(file)
+        with open(result_file, 'w') as out_file:
+            pickle.dump(results, out_file)
 
+        with open(result_file, 'r') as in_file:
+            expected_results = pickle.load(in_file)
 
         assert_frame_equal(results.solutions, expected_results.solutions)