Merge pull request #8 from bradendubois/beta

Beta
bradendubois · Feb 24, 2021 · a03a015 · a03a015
2 parents 26183fd + dd46fc9
commit a03a015
Show file tree

Hide file tree

Showing 15 changed files with 295 additions and 113 deletions.
diff --git a/.github/workflows/test_and_release.yml b/.github/workflows/test_and_release.yml
@@ -11,13 +11,21 @@ jobs:
   test:
     strategy:
       matrix:
+        report-coverage: [ false ]
         python-version: [ 3.8, 3.9 ]
         os:
         - ubuntu-latest
         - ubuntu-18.04
         - ubuntu-16.04
         - macos-latest
         # - windows-latest
+
+        # Special matrix job to report coverage only once
+        include:
+          - python-version: 3.9
+            os: ubuntu-latest
+            report-coverage: true
+
     runs-on: ${{ matrix.os }}
 
     steps:
@@ -47,7 +55,7 @@ jobs:
 
     - name: Report Coverage w/Coveralls
       uses: AndreMiras/coveralls-python-action@develop
-      if: ${{ runner.os == 'Linux' }}
+      if: ${{ matrix.report-coverage }}
 
   release:
     runs-on: [ ubuntu-latest ]

diff --git a/API.py b/API.py
@@ -12,7 +12,7 @@
 from src.probability.structures.ConditionalProbabilityTable import ConditionalProbabilityTable
 from src.probability.structures.VariableStructures import Variable
 
-from src.util.ModelLoader import parse_graph_file_data
+from src.util.ModelLoader import parse_model
 from src.util.OutputLogger import OutputLogger
 
 
@@ -50,7 +50,7 @@ def load_model(self, data: dict):
         @param data: A dictionary conforming to the required causal model specification to be loaded
             into the API.
         """
-        d = parse_graph_file_data(data)
+        d = parse_model(data)
 
         self._cg = CausalGraph(output=self._output, **d)
         self._g = d["graph"]

diff --git a/setup.cfg b/setup.cfg
@@ -10,6 +10,14 @@ exclude =
 [coverage:run]
 relative_files = True
 source = src/
+omit =
+    src/REPL.py
+    src/util/OutputLogger.py
+
+[coverage:report]
+exclude_lines =
+    def __str__
+    coverage: skip
 
 [tool:pytest]
 minversion = 6.0

diff --git a/src/REPL.py b/src/REPL.py
@@ -1,4 +1,4 @@
-from json import load
+from yaml import safe_load as load
 from os import path, listdir
 
 from src.api.backdoor_paths import api_backdoor_paths_parse
@@ -68,12 +68,12 @@ def skip(*args, **kwargs):
         if f in list_options:
             assert path.isdir(graph_location), \
                 "The specified directory for causal graph models {} does not exist!".format(graph_location)
-            print("Options", "\n- ".join(filter(lambda g: g.endswith(".json"), sorted(listdir(graph_location)))))
+            print("Options", "\n- ".join(filter(lambda g: g.endswith(".yml"), sorted(listdir(graph_location)))))
             continue
 
         # Parse and load a model into the API
         if f in load_options:
-            s = arg + (".json" if not arg.endswith(".json") else "")
+            s = arg + (".yml" if not arg.endswith(".yml") else "")
             assert path.isfile(full_path := graph_location + "/" + s), \
                 "File: {} does not exist!".format(s)
 

diff --git a/src/api/probability_query.py b/src/api/probability_query.py
@@ -17,7 +17,7 @@ def api_probability_query_parse(query: str) -> (tuple, tuple):
 
     return {
         "y": parse_outcomes_and_interventions(query),
-        "x": []
+        "x": set()
     }
 
 

diff --git a/src/probability/structures/BackdoorController.py b/src/probability/structures/BackdoorController.py
@@ -151,7 +151,7 @@ def all_dcf_sets(self, src: set, dst: set) -> list:
         # Minimize the sets, if enabled
         # TODO - Revisit configuration detail implementation
         if access("minimize_backdoor_sets"):
-            valid_deconfounding_sets = minimal_sets(valid_deconfounding_sets)
+            valid_deconfounding_sets = minimal_sets(*valid_deconfounding_sets)
 
         return list(valid_deconfounding_sets)
 

diff --git a/src/probability/structures/Probability_Engine.py b/src/probability/structures/Probability_Engine.py
@@ -111,7 +111,7 @@ def _compute(self, head: list, body: list, depth=0) -> float:
                 self.output.detail(rep, "=", result, x=depth)
                 self._store_computation(rep, result)
                 return result
-            except ProbabilityException:
+            except ProbabilityException:    # coverage: skip
                 self.output.detail("Failed to resolve by reverse product rule.", x=depth)
 
         ###############################################
@@ -135,9 +135,7 @@ def _compute(self, head: list, body: list, depth=0) -> float:
         ##################################################################
 
         if set(head).issubset(set(body)):
-            self.output.detail(f"Identity rule: {rep} = 1.0", x=depth)
-            if len(head) > len(body):
-                self.output.detail(f"Therefore, {rep} = 1.0", x=depth)
+            self.output.detail(f"Identity rule: X|X, therefore {rep} = 1.0", x=depth)
             return 1.0
 
         #################################################
@@ -177,7 +175,7 @@ def _compute(self, head: list, body: list, depth=0) -> float:
                 self._store_computation(rep, result)
                 return result
 
-            except ProbabilityException:
+            except ProbabilityException:    # coverage: skip
                 self.output.detail("Failed to resolve by Bayes", x=depth)
 
         #######################################################################################################
@@ -217,7 +215,7 @@ def _compute(self, head: list, body: list, depth=0) -> float:
                     self._store_computation(rep, total)
                     return total
 
-                except ProbabilityException:
+                except ProbabilityException:    # coverage: skip
                     self.output.detail("Failed to resolve by Jeffrey's Rule", x=depth)
 
         ###############################################
@@ -247,14 +245,14 @@ def _compute(self, head: list, body: list, depth=0) -> float:
                     self._store_computation(rep, result)
                     return result
 
-                except ProbabilityException:
+                except ProbabilityException:    # coverage: skip
                     pass
 
         ###############################################
         #               Cannot compute                #
         ###############################################
 
-        raise ProbabilityIndeterminableException
+        raise ProbabilityIndeterminableException    # coverage: skip
 
     def _store_computation(self, string_representation: str, result: float):
         """
@@ -270,7 +268,7 @@ def _store_computation(self, string_representation: str, result: float):
                 self._stored_computations[string_representation] = result
 
             # Stored already but with a different value - something fishy is going on...
-            elif self._stored_computations[string_representation] != result:
+            elif self._stored_computations[string_representation] != result:    # coverage: skip
                 print("Uh-oh:", string_representation, "has already been cached, but with a different value...")
 
 

diff --git a/src/probability/structures/VariableStructures.py b/src/probability/structures/VariableStructures.py
@@ -31,6 +31,12 @@ def __str__(self) -> str:
     def __hash__(self) -> int:
         return hash(self.name + self.outcome)
 
+    def __copy__(self):
+        return Outcome(self.name, self.outcome)
+
+    def copy(self):
+        return self.__copy__()
+
     def __eq__(self, other) -> bool:
         if isinstance(other, str):
             return self.name == other

diff --git a/src/util/ModelLoader.py b/src/util/ModelLoader.py
@@ -31,7 +31,7 @@ def parse_model(file: dict or str):
 
         else:
             print(f"Unknown extension for file: {file}, needs to end with .yml, .yaml, or .json")
-            raise Exception
+            raise FileNotFoundError
 
         with open(file) as f:
             data = loader(f)

diff --git a/src/util/helpers.py b/src/util/helpers.py
@@ -14,13 +14,13 @@ def power_set(variable_list: list or set, allow_empty_set=True) -> Iterator[any]
     return chain.from_iterable(combinations(p_set, r) for r in range(base, len(p_set)+1))
 
 
-def minimal_sets(set_of_sets: list) -> list:
+def minimal_sets(*sets) -> list:
     """
     Take a set of sets, and return only the minimal sets
-    @param set_of_sets: A set of sets, each set containing strings
+    @param sets: An arbitrary number of sets, each set containing strings
     @return: A list of minimal sets; that is, all sets such that there is no superset
     """
-    sorted_sets = sorted(map(set, set_of_sets), key=len)
+    sorted_sets = sorted(map(set, list(sets)), key=len)
     minimal_subsets = []
     for s in sorted_sets:
         if not any(minimal_subset.issubset(s) for minimal_subset in minimal_subsets):

diff --git a/src/validation/backdoors/backdoor_path_tests.py b/src/validation/backdoors/backdoor_path_tests.py
@@ -28,21 +28,34 @@ def model_backdoor_validation(bc: BackdoorController, test_data: dict) -> (bool,
 
     for test in test_data["tests"]:
 
-        expected_paths = list(map(sorted, test["expect"]))
+        if test["type"] == "backdoor-paths":
 
-        paths = []
-        for s, t in itertools.product(test["src"], test["dst"]):
-            paths.extend(bc.backdoor_paths_pair(s, t, test["dcf"] if "dcf" in test else {}))
+            expected_paths = list(map(sorted, test["expect"]))
 
-        # Sort each path to improve some sor
-        paths = list(map(sorted, paths))
+            paths = []
+            for s, t in itertools.product(test["src"], test["dst"]):
+                paths.extend(bc.backdoor_paths_pair(s, t, test["dcf"] if "dcf" in test else {}))
 
-        if test["exhaustive"] and len(paths) != len(expected_paths):
-            return False, f"{len(paths)} found, but expected {len(expected_paths)}: {paths} vs. Exp: {expected_paths}"
+            # Sort each path to improve some sor
+            paths = list(map(sorted, paths))
 
-        if not all(map(lambda p: p in paths, expected_paths)):
-            missing = list(filter(lambda p: p not in paths, expected_paths))
-            return False, f"Missing {len(missing)} paths: {missing}"
+            if test["exhaustive"] and len(paths) != len(expected_paths):    # coverage: skip
+                return False, f"{len(paths)} found, expected {len(expected_paths)}: {paths} vs. Exp: {expected_paths}"
+
+            if not all(map(lambda p: p in paths, expected_paths)):  # coverage: skip
+                missing = list(filter(lambda p: p not in paths, expected_paths))
+                return False, f"Missing {len(missing)} paths: {missing}"
+
+        elif test["type"] == "independence":
+
+            expected = test["expect"]
+            src = test["src"]
+            dst = test["dst"]
+            dcf = test["dcf"] if "dcf" in test else set()
+            independent = bc.independent(src, dst, dcf)
+
+            if independent != expected:     # coverage: skip
+                return False, f"{src} -> {dst} | {dcf}: {independent}, expected {expected}"
 
     return True, "Backdoor tests passed."
 

diff --git a/src/validation/backdoors/test_files/xi_xj.yml b/src/validation/backdoors/test_files/xi_xj.yml
@@ -1,10 +1,31 @@
 graph_filename: pearl-3.4.yml
 tests:
-  - src: [ Xi ]
+  - type: backdoor-paths
+    src: [ Xi ]
     dst: [ Xj ]
     expect: [
       [ Xi, X2, X4, X5, Xj ],
       [ Xi, X4, X2, X5, Xj ],
       [ Xi, X3, X1, X4, Xj ]
     ]
     exhaustive: true
+
+  - type: independence
+    src: [ Xi ]
+    dst: [ Xj ]
+    expect: false
+
+  - type: independence
+    src: [ X1 ]
+    dst: [ X2 ]
+    expect: true
+
+  - type: independence
+    src: [ X1 ]
+    dst: [ Xj ]
+    expect: false
+
+  - type: independence
+    src: [ Xi ]
+    dst: [ X2 ]
+    expect: false
diff --git a/src/validation/inference/inference_tests.py b/src/validation/inference/inference_tests.py
@@ -1,13 +1,16 @@
 from yaml import safe_load as load
 from os import listdir
+from os.path import dirname, abspath
 
 from src.config.config_manager import access
 from src.probability.structures.CausalGraph import CausalGraph, Outcome
 
 from src.util.ProbabilityExceptions import *
-from src.util.ModelLoader import parse_model
+from src.util.ModelLoader import parse_model, parse_outcomes_and_interventions
 from src.validation.test_util import print_test_result
 
+test_file_directory = dirname(abspath(__file__)) + "/test_files"
+
 
 def within_precision(a: float, b: float) -> bool:
     """
@@ -34,15 +37,15 @@ def model_inference_validation(cg: CausalGraph) -> (bool, str):
             assert within_precision(total, 1.0), f"{variable} does not sum to 1.0 across its outcomes ({total})."
 
         # Probability failed to compute entirely
-        except ProbabilityIndeterminableException:
+        except ProbabilityIndeterminableException:      # coverage: skip
             return False, f"Probability indeterminable for the graph. Variable {variable}"
 
         # Indicates an invalid table, missing some row, etc.
-        except MissingTableRow as e:
+        except MissingTableRow as e:        # coverage: skip
             return False, f"Invalid table for the graph: {e}"
 
         # Didn't match the expected total
-        except AssertionError as e:
+        except AssertionError as e:     # coverage: skip
             return False, f"Failed assertion: {e}"
 
     return True, "Basic tests passed."
@@ -55,23 +58,58 @@ def inference_tests(graph_location: str) -> (bool, str):
     @return: True if all tests are successful, False otherwise, along with a string summary message.
     """
 
-    files = sorted(list(filter(lambda x: x.endswith(".yml"), listdir(graph_location))))
+    model_files = sorted(list(filter(lambda x: x.endswith(".yml"), listdir(graph_location))))
+    test_files = sorted(list(filter(lambda x: x.endswith(".yml"), listdir(test_file_directory))))
+
+    assert len(model_files) > 0, "Models not found"
+    assert len(test_files) > 0, "Inference test files not found"
+
     all_successful = True
 
     # TODO - Threading to handle all the tests
 
-    for test_file in files:
+    for model in model_files:
 
-        with open(graph_location + "/" + test_file) as f:
+        with open(graph_location + "/" + model) as f:
             yml_model = load(f)
 
         parsed_model = parse_model(yml_model)
         causal_graph = CausalGraph(**parsed_model)
 
         success, msg = model_inference_validation(causal_graph)
-        print_test_result(success, msg if not success else f"All tests in {test_file} passed")
+        print_test_result(success, msg if not success else f"All tests in {model} passed")
+
+        if not success:     # coverage: skip
+            all_successful = False
+
+    for test_file in test_files:
+
+        with open(f"{test_file_directory}/{test_file}") as f:
+            yml_test_data = load(f)
+
+        graph_filename = yml_test_data["graph_filename"]
+        with open(f"{graph_location}/{graph_filename}") as f:
+            graph_data = load(f)
+
+        cg = CausalGraph(**parse_model(graph_data))
+
+        test_file_success = True
+
+        for test in yml_test_data["tests"]:
+
+            head = parse_outcomes_and_interventions(test["head"])
+            body = parse_outcomes_and_interventions(test["body"]) if "body" in test else set()
+
+            result = cg.probability_query(head, body)
+            expected = test["expect"]
+
+            if expected != "failure" and not within_precision(result, expected):    # coverage: skip
+                print_test_result(False, f"Got {result} but expected {expected} in {graph_filename}")
+                test_file_success = False
 
-        if not success:
+        if test_file_success:
+            print_test_result(True, f"All tests in {test_file}|{graph_filename} passed")
+        else:   # coverage: skip
             all_successful = False
 
     return all_successful, "Inference module passed" if all_successful else "Inference module encountered errors"