Merge pull request #16 from bradendubois/beta

Beta
bradendubois · Mar 10, 2021 · 3d48dfe · 3d48dfe
2 parents a03a015 + b81cb49
commit 3d48dfe
Show file tree

Hide file tree

Showing 29 changed files with 374 additions and 588 deletions.
diff --git a/.github/workflows/test_and_release.yml b/.github/workflows/test_and_release.yml
@@ -18,7 +18,7 @@ jobs:
         - ubuntu-18.04
         - ubuntu-16.04
         - macos-latest
-        # - windows-latest
+        - windows-latest
 
         # Special matrix job to report coverage only once
         include:
@@ -40,7 +40,7 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         python -m pip install flake8 pytest coverage coveralls
-        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+        pip install -r requirements.txt
 
     - name: Lint with flake8
       run: |
@@ -58,7 +58,7 @@ jobs:
       if: ${{ matrix.report-coverage }}
 
   release:
-    runs-on: [ ubuntu-latest ]
+    runs-on: ubuntu-latest
     needs: test
     if: github.ref == 'refs/heads/main' || github.ref == 'refs/heads/beta'
     steps:

diff --git a/.gitignore b/.gitignore
@@ -13,4 +13,4 @@
 src/logs/
 
 # Config file is generated on run
-src/config/config.json
+src/config/config.*
diff --git a/API.py b/API.py
@@ -2,6 +2,9 @@
 #                   probability-code API                  #
 ###########################################################
 
+from typing import Union
+from pathlib import Path
+
 from src.api.backdoor_paths import api_backdoor_paths
 from src.api.deconfounding_sets import api_deconfounding_sets
 from src.api.joint_distribution_table import api_joint_distribution_table
@@ -44,7 +47,7 @@ def __init__(self, model: dict or None, print_detail=False, print_result=False,
     #                       API Modifications                      #
     ################################################################
 
-    def load_model(self, data: dict):
+    def load_model(self, data: Union[str, dict, Path]):
         """
         Load a model into the API.
         @param data: A dictionary conforming to the required causal model specification to be loaded
@@ -104,7 +107,7 @@ def p(self, y: set, x: set) -> float:
 
         return result
 
-    def joint_distribution_table(self) -> list:
+    def joint_distribution_table(self) -> ConditionalProbabilityTable:
         """
         Compute a joint distribution table across the entire model loaded.
         @return: A list of tuples, (Outcomes, P), where Outcomes is a unique set of Outcome objects for the model, and
@@ -114,12 +117,8 @@ def joint_distribution_table(self) -> list:
 
         if self._print_result:
             keys = sorted(self._cg.variables.keys())
-            rows = [[",".join(map(str, outcomes)), [], p] for outcomes, p in result]
-            rows.append(["Total:", [], sum(map(lambda r: r[1], result))])
-            cpt = ConditionalProbabilityTable(Variable(",".join(keys), [], []), [], rows)
-
             self._output.result(f"Joint Distribution Table for: {','.join(keys)}")
-            self._output.result(f"{cpt}")
+            self._output.result(f"{result}")
 
         return result
 

diff --git a/README.md b/README.md
@@ -1,13 +1,13 @@
 <h1 align="center" style="border-bottom: none;">probability-code</h1>
 <h3 align="center">A Python implementation of the <i>do-calculus</i> of Judea Pearl et. al.</h3>
 <p align="center">
-    <a href="https://github.com/semantic-release/semantic-release/actions?query=workflow%3ATest+branch%3Amaster">
+    <a href="https://github.com/bradendubois/probability-code/actions?query=workflow%3ATest+branch%3Amain">
         <img alt="Test Workflows" src="https://github.com/bradendubois/probability-code/workflows/Test and Release/badge.svg">
     </a>
-    <a href='https://coveralls.io/github/bradendubois/probability-code?branch=develop'>
-        <img src='https://coveralls.io/repos/github/bradendubois/probability-code/badge.svg?branch=develop' alt='Coverage Status' />
+    <a href='https://coveralls.io/github/bradendubois/probability-code?branch=main'>
+        <img src='https://coveralls.io/repos/github/bradendubois/probability-code/badge.svg?branch=main' alt='Coverage Status' />
     </a>
-    <a href="#badge">
+    <a href="https://github.com/semantic-release/semantic-release">
         <img alt="semantic-release" src="https://img.shields.io/badge/%20%20%F0%9F%93%A6%F0%9F%9A%80-semantic--release-e10079.svg">
     </a>
 </p>

diff --git a/doc/Configuration.md b/doc/Configuration.md
@@ -0,0 +1,53 @@
+# Configuration File Settings
+
+Settings for the project are stored in ``src/config/config.yml``.
+- **Note**: This file will be created if it does not exist, when the project is run.
+
+## Output Control
+
+Control what information is output; the computational steps of queries or regression tests, on launch, whether to minimize acceptable sets Z in backdoor paths.
+
+#### Output Levels of Precision
+
+How many digits of precision to output a result to.
+
+| Setting Name | Options | Default Value |
+|:-:|:-:|:-:|
+| ``output_levels_of_precision`` | any positive integer | 5 |
+
+#### Minimize Backdoor Sets
+
+If enabled, when sets X and Y are given, and all feasible sets Z to ensure causal independence are created, only minimal sets will be shown.
+
+| Setting Name | Options | Default Value |
+|:-:|:-:|:-:|
+| ``minimize_backdoor_sets`` | [True, False] | True |
+
+## Accuracy / Formatting / Precision Rules
+
+Regards settings on the accuracy/settings of regression tests, computation caching, and noisein function evaluations.
+
+#### Cache Computation Results
+
+If enabled, any time a specific query is computed, its results will be cached; if the same query is required in any subsequent queries, its cached result will be reused instead of computing the same result from scratch. This can yield a large performance increase in larger causal graphs.
+
+| Setting Name | Options | Default Value |
+|:-:|:-:|:-:|
+| ``cache_computation_results`` | [True, False] | True |
+
+#### Topological Sort Variables
+
+If enabled, to avoid Bayes rule as much as possible, the head and body of queries can be topologically sorted.
+
+| Setting Name | Options | Default Value |
+|:-:|:-:|:-:|
+| ``topological_sort_variables`` | [True, False] | True |
+
+#### Regression Test Result Precision
+
+In a regression test (see: ``Regression Tests``) where an 'expected value' is provided, this is how many digits of precision the computed value must meet within. Higher requires more accuracy, but also a longer/more detailed hand-computed 'expected result'.
+
+| Setting Name | Options | Default Value |
+|:-:|:-:|:-:|
+| ``regression_levels_of_precision`` | any positive integer | 5 |
+
diff --git a/main.py b/main.py
@@ -9,50 +9,7 @@
 #                                                       #
 #########################################################
 
-# Main libraries can always be loaded
-import os
-from sys import argv
-
 from src.REPL import run_repl
-from src.validation.backdoors.backdoor_path_tests import backdoor_tests
-from src.validation.inference.inference_tests import inference_tests
-from test_driver import graph_location
-
-# TODO - Examine if necessary after re-works; should always set cwd to root of file itself
-os.chdir(os.path.dirname(os.path.abspath(__file__)))
-
-#######################################
-#             Parse Input             #
-#######################################
-
-# TODO - cleaner way of integrating tests with workflow
-
-if len(argv) > 1 and argv[1].lower() == "inference":
-    inference_bool, inference_msg = inference_tests(graph_location)
-    assert inference_bool, f"Inference module has failed: {inference_msg}"
-    exit(0)
-
-if len(argv) > 1 and argv[1].lower() == "backdoor":
-    backdoor_bool, backdoor_msg = backdoor_tests(graph_location)
-    assert backdoor_bool, f"Backdoor module has failed: {backdoor_msg}"
-    exit(0)
-
-run_debug = len(argv) >= 2 and argv[1].lower() == "debug"
-
-#######################################
-#     Test Software (if specified)    #
-#######################################
-
-if run_debug:
-    from test_driver import run_all_tests
-    from src.validation.test_util import print_test_result
-
-    index = argv.index("debug")
-    extreme = len(argv) > index+1 and argv[index+1].lower() == "extreme"
-
-    # Boolean result returned: True if all tests are successful, False otherwise
-    success = run_all_tests(extreme)
-    print_test_result(success, "[All Tests Passed]" if success else "[Some Errors Occurred]")
 
 #######################################
 #                 REPL                #

diff --git a/setup.cfg b/setup.cfg
@@ -13,6 +13,7 @@ source = src/
 omit =
     src/REPL.py
     src/util/OutputLogger.py
+    src/graphs/dataset_generator
 
 [coverage:report]
 exclude_lines =

diff --git a/src/REPL.py b/src/REPL.py
@@ -1,5 +1,6 @@
 from yaml import safe_load as load
 from os import path, listdir
+from pathlib import Path
 
 from src.api.backdoor_paths import api_backdoor_paths_parse
 from src.api.deconfounding_sets import api_deconfounding_sets_parse
@@ -10,7 +11,9 @@
 
 # TODO - Change graph_location to allow a specific graph to be given and loaded, or specify a user directory without
 #   there being path issues depending on the working directory
-def run_repl(graph_location="src/graphs/full"):
+
+
+def run_repl(graph_location=Path(".", "src", "graphs", "full")):
     """
     Run an interactive IO prompt allowing full use of the causality software.
     @param graph_location: A string of the path from the working directory to a directory of graphs
@@ -66,18 +69,20 @@ def skip(*args, **kwargs):
 
         # List all possible graphs (ignores the generated models used for debugging / testing)
         if f in list_options:
-            assert path.isdir(graph_location), \
-                "The specified directory for causal graph models {} does not exist!".format(graph_location)
-            print("Options", "\n- ".join(filter(lambda g: g.endswith(".yml"), sorted(listdir(graph_location)))))
+            assert graph_location.is_dir(), \
+                "The specified directory for causal graph models {} does not exist!".format(graph_location.name)
+
+            files = filter(lambda g: g.suffix.lower() == ".yml", sorted(graph_location.iterdir()))
+            print("Options", *list(map(lambda file: file.stem, files)), sep="\n- ")
             continue
 
         # Parse and load a model into the API
         if f in load_options:
             s = arg + (".yml" if not arg.endswith(".yml") else "")
-            assert path.isfile(full_path := graph_location + "/" + s), \
+            assert (full_path := graph_location / s).is_file(), \
                 "File: {} does not exist!".format(s)
 
-            with open(full_path) as f:
+            with full_path.open("r") as f:
                 api.load_model(load(f))
             continue
 

diff --git a/src/api/backdoor_paths.py b/src/api/backdoor_paths.py
@@ -11,15 +11,15 @@ def api_backdoor_paths_parse(query: str) -> (set, set):
         of the arrow, and the third as all vertices are the right of the bar, respectively.
     """
     def clean(x):
-        return set(map(lambda y: y.strip(), x.strip().split(" ")))
+        return set(map(lambda y: y.strip(), x.strip().split(",")))
 
     l, r = query.split("->")
 
     if "|" in r:
         s = r.split("|")
         r, dcf = clean(s[0]), clean(s[1])
     else:
-        r, dcf = clean(r), {}
+        r, dcf = clean(r), set()
 
     return {
         "src": clean(l),

diff --git a/src/api/deconfounding_sets.py b/src/api/deconfounding_sets.py
@@ -9,7 +9,7 @@ def api_deconfounding_sets_parse(query: str) -> (set, set):
         right sides of the arrow, respectively.
     """
     def clean(x):
-        return set(map(lambda y: y.strip(), x.strip().split(" ")))
+        return set(map(lambda y: y.strip(), x.strip().split(",")))
 
     src, dst = map(clean, query.split("->"))
 

diff --git a/src/api/joint_distribution_table.py b/src/api/joint_distribution_table.py
@@ -1,9 +1,10 @@
 from itertools import product
 from src.probability.structures.CausalGraph import CausalGraph
-from src.probability.structures.VariableStructures import Outcome
+from src.probability.structures.ConditionalProbabilityTable import ConditionalProbabilityTable
+from src.probability.structures.VariableStructures import Outcome, Variable
 
 
-def api_joint_distribution_table(cg: CausalGraph) -> list:
+def api_joint_distribution_table(cg: CausalGraph) -> ConditionalProbabilityTable:
     """
     Compute and return a joint distribution table for the given model.
     @param cg: A CausalGraph to compute the JDT for.
@@ -17,4 +18,9 @@ def api_joint_distribution_table(cg: CausalGraph) -> list:
         outcomes = {Outcome(x, cross[i]) for i, x in enumerate(sorted_keys)}
         results.append((outcomes, cg.probability_query(outcomes, set())))
 
-    return results
+    keys = sorted(cg.variables.keys())
+    rows = [[",".join(map(str, outcomes)), [], p] for outcomes, p in results]
+    rows.append(["Total:", [], sum(map(lambda r: r[1], results))])
+    cpt = ConditionalProbabilityTable(Variable(",".join(keys), [], []), [], rows)
+
+    return cpt