Skip to content

Commit

Permalink
Merge pull request #8 from bradendubois/beta
Browse files Browse the repository at this point in the history
Beta
  • Loading branch information
bradendubois authored Feb 24, 2021
2 parents 26183fd + dd46fc9 commit a03a015
Show file tree
Hide file tree
Showing 15 changed files with 295 additions and 113 deletions.
10 changes: 9 additions & 1 deletion .github/workflows/test_and_release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,21 @@ jobs:
test:
strategy:
matrix:
report-coverage: [ false ]
python-version: [ 3.8, 3.9 ]
os:
- ubuntu-latest
- ubuntu-18.04
- ubuntu-16.04
- macos-latest
# - windows-latest

# Special matrix job to report coverage only once
include:
- python-version: 3.9
os: ubuntu-latest
report-coverage: true

runs-on: ${{ matrix.os }}

steps:
Expand Down Expand Up @@ -47,7 +55,7 @@ jobs:
- name: Report Coverage w/Coveralls
uses: AndreMiras/coveralls-python-action@develop
if: ${{ runner.os == 'Linux' }}
if: ${{ matrix.report-coverage }}

release:
runs-on: [ ubuntu-latest ]
Expand Down
4 changes: 2 additions & 2 deletions API.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from src.probability.structures.ConditionalProbabilityTable import ConditionalProbabilityTable
from src.probability.structures.VariableStructures import Variable

from src.util.ModelLoader import parse_graph_file_data
from src.util.ModelLoader import parse_model
from src.util.OutputLogger import OutputLogger


Expand Down Expand Up @@ -50,7 +50,7 @@ def load_model(self, data: dict):
@param data: A dictionary conforming to the required causal model specification to be loaded
into the API.
"""
d = parse_graph_file_data(data)
d = parse_model(data)

self._cg = CausalGraph(output=self._output, **d)
self._g = d["graph"]
Expand Down
8 changes: 8 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,14 @@ exclude =
[coverage:run]
relative_files = True
source = src/
omit =
src/REPL.py
src/util/OutputLogger.py

[coverage:report]
exclude_lines =
def __str__
coverage: skip

[tool:pytest]
minversion = 6.0
Expand Down
6 changes: 3 additions & 3 deletions src/REPL.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from json import load
from yaml import safe_load as load
from os import path, listdir

from src.api.backdoor_paths import api_backdoor_paths_parse
Expand Down Expand Up @@ -68,12 +68,12 @@ def skip(*args, **kwargs):
if f in list_options:
assert path.isdir(graph_location), \
"The specified directory for causal graph models {} does not exist!".format(graph_location)
print("Options", "\n- ".join(filter(lambda g: g.endswith(".json"), sorted(listdir(graph_location)))))
print("Options", "\n- ".join(filter(lambda g: g.endswith(".yml"), sorted(listdir(graph_location)))))
continue

# Parse and load a model into the API
if f in load_options:
s = arg + (".json" if not arg.endswith(".json") else "")
s = arg + (".yml" if not arg.endswith(".yml") else "")
assert path.isfile(full_path := graph_location + "/" + s), \
"File: {} does not exist!".format(s)

Expand Down
2 changes: 1 addition & 1 deletion src/api/probability_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def api_probability_query_parse(query: str) -> (tuple, tuple):

return {
"y": parse_outcomes_and_interventions(query),
"x": []
"x": set()
}


Expand Down
2 changes: 1 addition & 1 deletion src/probability/structures/BackdoorController.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ def all_dcf_sets(self, src: set, dst: set) -> list:
# Minimize the sets, if enabled
# TODO - Revisit configuration detail implementation
if access("minimize_backdoor_sets"):
valid_deconfounding_sets = minimal_sets(valid_deconfounding_sets)
valid_deconfounding_sets = minimal_sets(*valid_deconfounding_sets)

return list(valid_deconfounding_sets)

Expand Down
16 changes: 7 additions & 9 deletions src/probability/structures/Probability_Engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def _compute(self, head: list, body: list, depth=0) -> float:
self.output.detail(rep, "=", result, x=depth)
self._store_computation(rep, result)
return result
except ProbabilityException:
except ProbabilityException: # coverage: skip
self.output.detail("Failed to resolve by reverse product rule.", x=depth)

###############################################
Expand All @@ -135,9 +135,7 @@ def _compute(self, head: list, body: list, depth=0) -> float:
##################################################################

if set(head).issubset(set(body)):
self.output.detail(f"Identity rule: {rep} = 1.0", x=depth)
if len(head) > len(body):
self.output.detail(f"Therefore, {rep} = 1.0", x=depth)
self.output.detail(f"Identity rule: X|X, therefore {rep} = 1.0", x=depth)
return 1.0

#################################################
Expand Down Expand Up @@ -177,7 +175,7 @@ def _compute(self, head: list, body: list, depth=0) -> float:
self._store_computation(rep, result)
return result

except ProbabilityException:
except ProbabilityException: # coverage: skip
self.output.detail("Failed to resolve by Bayes", x=depth)

#######################################################################################################
Expand Down Expand Up @@ -217,7 +215,7 @@ def _compute(self, head: list, body: list, depth=0) -> float:
self._store_computation(rep, total)
return total

except ProbabilityException:
except ProbabilityException: # coverage: skip
self.output.detail("Failed to resolve by Jeffrey's Rule", x=depth)

###############################################
Expand Down Expand Up @@ -247,14 +245,14 @@ def _compute(self, head: list, body: list, depth=0) -> float:
self._store_computation(rep, result)
return result

except ProbabilityException:
except ProbabilityException: # coverage: skip
pass

###############################################
# Cannot compute #
###############################################

raise ProbabilityIndeterminableException
raise ProbabilityIndeterminableException # coverage: skip

def _store_computation(self, string_representation: str, result: float):
"""
Expand All @@ -270,7 +268,7 @@ def _store_computation(self, string_representation: str, result: float):
self._stored_computations[string_representation] = result

# Stored already but with a different value - something fishy is going on...
elif self._stored_computations[string_representation] != result:
elif self._stored_computations[string_representation] != result: # coverage: skip
print("Uh-oh:", string_representation, "has already been cached, but with a different value...")


Expand Down
6 changes: 6 additions & 0 deletions src/probability/structures/VariableStructures.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,12 @@ def __str__(self) -> str:
def __hash__(self) -> int:
return hash(self.name + self.outcome)

def __copy__(self):
return Outcome(self.name, self.outcome)

def copy(self):
return self.__copy__()

def __eq__(self, other) -> bool:
if isinstance(other, str):
return self.name == other
Expand Down
2 changes: 1 addition & 1 deletion src/util/ModelLoader.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def parse_model(file: dict or str):

else:
print(f"Unknown extension for file: {file}, needs to end with .yml, .yaml, or .json")
raise Exception
raise FileNotFoundError

with open(file) as f:
data = loader(f)
Expand Down
6 changes: 3 additions & 3 deletions src/util/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@ def power_set(variable_list: list or set, allow_empty_set=True) -> Iterator[any]
return chain.from_iterable(combinations(p_set, r) for r in range(base, len(p_set)+1))


def minimal_sets(set_of_sets: list) -> list:
def minimal_sets(*sets) -> list:
"""
Take a set of sets, and return only the minimal sets
@param set_of_sets: A set of sets, each set containing strings
@param sets: An arbitrary number of sets, each set containing strings
@return: A list of minimal sets; that is, all sets such that there is no superset
"""
sorted_sets = sorted(map(set, set_of_sets), key=len)
sorted_sets = sorted(map(set, list(sets)), key=len)
minimal_subsets = []
for s in sorted_sets:
if not any(minimal_subset.issubset(s) for minimal_subset in minimal_subsets):
Expand Down
35 changes: 24 additions & 11 deletions src/validation/backdoors/backdoor_path_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,21 +28,34 @@ def model_backdoor_validation(bc: BackdoorController, test_data: dict) -> (bool,

for test in test_data["tests"]:

expected_paths = list(map(sorted, test["expect"]))
if test["type"] == "backdoor-paths":

paths = []
for s, t in itertools.product(test["src"], test["dst"]):
paths.extend(bc.backdoor_paths_pair(s, t, test["dcf"] if "dcf" in test else {}))
expected_paths = list(map(sorted, test["expect"]))

# Sort each path to improve some sor
paths = list(map(sorted, paths))
paths = []
for s, t in itertools.product(test["src"], test["dst"]):
paths.extend(bc.backdoor_paths_pair(s, t, test["dcf"] if "dcf" in test else {}))

if test["exhaustive"] and len(paths) != len(expected_paths):
return False, f"{len(paths)} found, but expected {len(expected_paths)}: {paths} vs. Exp: {expected_paths}"
# Sort each path to improve some sor
paths = list(map(sorted, paths))

if not all(map(lambda p: p in paths, expected_paths)):
missing = list(filter(lambda p: p not in paths, expected_paths))
return False, f"Missing {len(missing)} paths: {missing}"
if test["exhaustive"] and len(paths) != len(expected_paths): # coverage: skip
return False, f"{len(paths)} found, expected {len(expected_paths)}: {paths} vs. Exp: {expected_paths}"

if not all(map(lambda p: p in paths, expected_paths)): # coverage: skip
missing = list(filter(lambda p: p not in paths, expected_paths))
return False, f"Missing {len(missing)} paths: {missing}"

elif test["type"] == "independence":

expected = test["expect"]
src = test["src"]
dst = test["dst"]
dcf = test["dcf"] if "dcf" in test else set()
independent = bc.independent(src, dst, dcf)

if independent != expected: # coverage: skip
return False, f"{src} -> {dst} | {dcf}: {independent}, expected {expected}"

return True, "Backdoor tests passed."

Expand Down
23 changes: 22 additions & 1 deletion src/validation/backdoors/test_files/xi_xj.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,31 @@
graph_filename: pearl-3.4.yml
tests:
- src: [ Xi ]
- type: backdoor-paths
src: [ Xi ]
dst: [ Xj ]
expect: [
[ Xi, X2, X4, X5, Xj ],
[ Xi, X4, X2, X5, Xj ],
[ Xi, X3, X1, X4, Xj ]
]
exhaustive: true

- type: independence
src: [ Xi ]
dst: [ Xj ]
expect: false

- type: independence
src: [ X1 ]
dst: [ X2 ]
expect: true

- type: independence
src: [ X1 ]
dst: [ Xj ]
expect: false

- type: independence
src: [ Xi ]
dst: [ X2 ]
expect: false
56 changes: 47 additions & 9 deletions src/validation/inference/inference_tests.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
from yaml import safe_load as load
from os import listdir
from os.path import dirname, abspath

from src.config.config_manager import access
from src.probability.structures.CausalGraph import CausalGraph, Outcome

from src.util.ProbabilityExceptions import *
from src.util.ModelLoader import parse_model
from src.util.ModelLoader import parse_model, parse_outcomes_and_interventions
from src.validation.test_util import print_test_result

test_file_directory = dirname(abspath(__file__)) + "/test_files"


def within_precision(a: float, b: float) -> bool:
"""
Expand All @@ -34,15 +37,15 @@ def model_inference_validation(cg: CausalGraph) -> (bool, str):
assert within_precision(total, 1.0), f"{variable} does not sum to 1.0 across its outcomes ({total})."

# Probability failed to compute entirely
except ProbabilityIndeterminableException:
except ProbabilityIndeterminableException: # coverage: skip
return False, f"Probability indeterminable for the graph. Variable {variable}"

# Indicates an invalid table, missing some row, etc.
except MissingTableRow as e:
except MissingTableRow as e: # coverage: skip
return False, f"Invalid table for the graph: {e}"

# Didn't match the expected total
except AssertionError as e:
except AssertionError as e: # coverage: skip
return False, f"Failed assertion: {e}"

return True, "Basic tests passed."
Expand All @@ -55,23 +58,58 @@ def inference_tests(graph_location: str) -> (bool, str):
@return: True if all tests are successful, False otherwise, along with a string summary message.
"""

files = sorted(list(filter(lambda x: x.endswith(".yml"), listdir(graph_location))))
model_files = sorted(list(filter(lambda x: x.endswith(".yml"), listdir(graph_location))))
test_files = sorted(list(filter(lambda x: x.endswith(".yml"), listdir(test_file_directory))))

assert len(model_files) > 0, "Models not found"
assert len(test_files) > 0, "Inference test files not found"

all_successful = True

# TODO - Threading to handle all the tests

for test_file in files:
for model in model_files:

with open(graph_location + "/" + test_file) as f:
with open(graph_location + "/" + model) as f:
yml_model = load(f)

parsed_model = parse_model(yml_model)
causal_graph = CausalGraph(**parsed_model)

success, msg = model_inference_validation(causal_graph)
print_test_result(success, msg if not success else f"All tests in {test_file} passed")
print_test_result(success, msg if not success else f"All tests in {model} passed")

if not success: # coverage: skip
all_successful = False

for test_file in test_files:

with open(f"{test_file_directory}/{test_file}") as f:
yml_test_data = load(f)

graph_filename = yml_test_data["graph_filename"]
with open(f"{graph_location}/{graph_filename}") as f:
graph_data = load(f)

cg = CausalGraph(**parse_model(graph_data))

test_file_success = True

for test in yml_test_data["tests"]:

head = parse_outcomes_and_interventions(test["head"])
body = parse_outcomes_and_interventions(test["body"]) if "body" in test else set()

result = cg.probability_query(head, body)
expected = test["expect"]

if expected != "failure" and not within_precision(result, expected): # coverage: skip
print_test_result(False, f"Got {result} but expected {expected} in {graph_filename}")
test_file_success = False

if not success:
if test_file_success:
print_test_result(True, f"All tests in {test_file}|{graph_filename} passed")
else: # coverage: skip
all_successful = False

return all_successful, "Inference module passed" if all_successful else "Inference module encountered errors"
Loading

0 comments on commit a03a015

Please sign in to comment.