Skip to content

Commit

Permalink
Merge pull request #16 from bradendubois/beta
Browse files Browse the repository at this point in the history
Beta
  • Loading branch information
bradendubois committed Mar 10, 2021
2 parents a03a015 + b81cb49 commit 3d48dfe
Show file tree
Hide file tree
Showing 29 changed files with 374 additions and 588 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/test_and_release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
- ubuntu-18.04
- ubuntu-16.04
- macos-latest
# - windows-latest
- windows-latest

# Special matrix job to report coverage only once
include:
Expand All @@ -40,7 +40,7 @@ jobs:
run: |
python -m pip install --upgrade pip
python -m pip install flake8 pytest coverage coveralls
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
pip install -r requirements.txt
- name: Lint with flake8
run: |
Expand All @@ -58,7 +58,7 @@ jobs:
if: ${{ matrix.report-coverage }}

release:
runs-on: [ ubuntu-latest ]
runs-on: ubuntu-latest
needs: test
if: github.ref == 'refs/heads/main' || github.ref == 'refs/heads/beta'
steps:
Expand Down
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@
src/logs/

# Config file is generated on run
src/config/config.json
src/config/config.*
13 changes: 6 additions & 7 deletions API.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
# probability-code API #
###########################################################

from typing import Union
from pathlib import Path

from src.api.backdoor_paths import api_backdoor_paths
from src.api.deconfounding_sets import api_deconfounding_sets
from src.api.joint_distribution_table import api_joint_distribution_table
Expand Down Expand Up @@ -44,7 +47,7 @@ def __init__(self, model: dict or None, print_detail=False, print_result=False,
# API Modifications #
################################################################

def load_model(self, data: dict):
def load_model(self, data: Union[str, dict, Path]):
"""
Load a model into the API.
@param data: A dictionary conforming to the required causal model specification to be loaded
Expand Down Expand Up @@ -104,7 +107,7 @@ def p(self, y: set, x: set) -> float:

return result

def joint_distribution_table(self) -> list:
def joint_distribution_table(self) -> ConditionalProbabilityTable:
"""
Compute a joint distribution table across the entire model loaded.
@return: A list of tuples, (Outcomes, P), where Outcomes is a unique set of Outcome objects for the model, and
Expand All @@ -114,12 +117,8 @@ def joint_distribution_table(self) -> list:

if self._print_result:
keys = sorted(self._cg.variables.keys())
rows = [[",".join(map(str, outcomes)), [], p] for outcomes, p in result]
rows.append(["Total:", [], sum(map(lambda r: r[1], result))])
cpt = ConditionalProbabilityTable(Variable(",".join(keys), [], []), [], rows)

self._output.result(f"Joint Distribution Table for: {','.join(keys)}")
self._output.result(f"{cpt}")
self._output.result(f"{result}")

return result

Expand Down
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
<h1 align="center" style="border-bottom: none;">probability-code</h1>
<h3 align="center">A Python implementation of the <i>do-calculus</i> of Judea Pearl et. al.</h3>
<p align="center">
<a href="https://github.com/semantic-release/semantic-release/actions?query=workflow%3ATest+branch%3Amaster">
<a href="https://github.com/bradendubois/probability-code/actions?query=workflow%3ATest+branch%3Amain">
<img alt="Test Workflows" src="https://github.com/bradendubois/probability-code/workflows/Test and Release/badge.svg">
</a>
<a href='https://coveralls.io/github/bradendubois/probability-code?branch=develop'>
<img src='https://coveralls.io/repos/github/bradendubois/probability-code/badge.svg?branch=develop' alt='Coverage Status' />
<a href='https://coveralls.io/github/bradendubois/probability-code?branch=main'>
<img src='https://coveralls.io/repos/github/bradendubois/probability-code/badge.svg?branch=main' alt='Coverage Status' />
</a>
<a href="#badge">
<a href="https://github.com/semantic-release/semantic-release">
<img alt="semantic-release" src="https://img.shields.io/badge/%20%20%F0%9F%93%A6%F0%9F%9A%80-semantic--release-e10079.svg">
</a>
</p>
Expand Down
53 changes: 53 additions & 0 deletions doc/Configuration.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# Configuration File Settings

Settings for the project are stored in ``src/config/config.yml``.
- **Note**: This file will be created if it does not exist, when the project is run.

## Output Control

Control what information is output; the computational steps of queries or regression tests, on launch, whether to minimize acceptable sets Z in backdoor paths.

#### Output Levels of Precision

How many digits of precision to output a result to.

| Setting Name | Options | Default Value |
|:-:|:-:|:-:|
| ``output_levels_of_precision`` | any positive integer | 5 |

#### Minimize Backdoor Sets

If enabled, when sets X and Y are given, and all feasible sets Z to ensure causal independence are created, only minimal sets will be shown.

| Setting Name | Options | Default Value |
|:-:|:-:|:-:|
| ``minimize_backdoor_sets`` | [True, False] | True |

## Accuracy / Formatting / Precision Rules

Regards settings on the accuracy/settings of regression tests, computation caching, and noisein function evaluations.

#### Cache Computation Results

If enabled, any time a specific query is computed, its results will be cached; if the same query is required in any subsequent queries, its cached result will be reused instead of computing the same result from scratch. This can yield a large performance increase in larger causal graphs.

| Setting Name | Options | Default Value |
|:-:|:-:|:-:|
| ``cache_computation_results`` | [True, False] | True |

#### Topological Sort Variables

If enabled, to avoid Bayes rule as much as possible, the head and body of queries can be topologically sorted.

| Setting Name | Options | Default Value |
|:-:|:-:|:-:|
| ``topological_sort_variables`` | [True, False] | True |

#### Regression Test Result Precision

In a regression test (see: ``Regression Tests``) where an 'expected value' is provided, this is how many digits of precision the computed value must meet within. Higher requires more accuracy, but also a longer/more detailed hand-computed 'expected result'.

| Setting Name | Options | Default Value |
|:-:|:-:|:-:|
| ``regression_levels_of_precision`` | any positive integer | 5 |

43 changes: 0 additions & 43 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,50 +9,7 @@
# #
#########################################################

# Main libraries can always be loaded
import os
from sys import argv

from src.REPL import run_repl
from src.validation.backdoors.backdoor_path_tests import backdoor_tests
from src.validation.inference.inference_tests import inference_tests
from test_driver import graph_location

# TODO - Examine if necessary after re-works; should always set cwd to root of file itself
os.chdir(os.path.dirname(os.path.abspath(__file__)))

#######################################
# Parse Input #
#######################################

# TODO - cleaner way of integrating tests with workflow

if len(argv) > 1 and argv[1].lower() == "inference":
inference_bool, inference_msg = inference_tests(graph_location)
assert inference_bool, f"Inference module has failed: {inference_msg}"
exit(0)

if len(argv) > 1 and argv[1].lower() == "backdoor":
backdoor_bool, backdoor_msg = backdoor_tests(graph_location)
assert backdoor_bool, f"Backdoor module has failed: {backdoor_msg}"
exit(0)

run_debug = len(argv) >= 2 and argv[1].lower() == "debug"

#######################################
# Test Software (if specified) #
#######################################

if run_debug:
from test_driver import run_all_tests
from src.validation.test_util import print_test_result

index = argv.index("debug")
extreme = len(argv) > index+1 and argv[index+1].lower() == "extreme"

# Boolean result returned: True if all tests are successful, False otherwise
success = run_all_tests(extreme)
print_test_result(success, "[All Tests Passed]" if success else "[Some Errors Occurred]")

#######################################
# REPL #
Expand Down
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ source = src/
omit =
src/REPL.py
src/util/OutputLogger.py
src/graphs/dataset_generator

[coverage:report]
exclude_lines =
Expand Down
17 changes: 11 additions & 6 deletions src/REPL.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from yaml import safe_load as load
from os import path, listdir
from pathlib import Path

from src.api.backdoor_paths import api_backdoor_paths_parse
from src.api.deconfounding_sets import api_deconfounding_sets_parse
Expand All @@ -10,7 +11,9 @@

# TODO - Change graph_location to allow a specific graph to be given and loaded, or specify a user directory without
# there being path issues depending on the working directory
def run_repl(graph_location="src/graphs/full"):


def run_repl(graph_location=Path(".", "src", "graphs", "full")):
"""
Run an interactive IO prompt allowing full use of the causality software.
@param graph_location: A string of the path from the working directory to a directory of graphs
Expand Down Expand Up @@ -66,18 +69,20 @@ def skip(*args, **kwargs):

# List all possible graphs (ignores the generated models used for debugging / testing)
if f in list_options:
assert path.isdir(graph_location), \
"The specified directory for causal graph models {} does not exist!".format(graph_location)
print("Options", "\n- ".join(filter(lambda g: g.endswith(".yml"), sorted(listdir(graph_location)))))
assert graph_location.is_dir(), \
"The specified directory for causal graph models {} does not exist!".format(graph_location.name)

files = filter(lambda g: g.suffix.lower() == ".yml", sorted(graph_location.iterdir()))
print("Options", *list(map(lambda file: file.stem, files)), sep="\n- ")
continue

# Parse and load a model into the API
if f in load_options:
s = arg + (".yml" if not arg.endswith(".yml") else "")
assert path.isfile(full_path := graph_location + "/" + s), \
assert (full_path := graph_location / s).is_file(), \
"File: {} does not exist!".format(s)

with open(full_path) as f:
with full_path.open("r") as f:
api.load_model(load(f))
continue

Expand Down
4 changes: 2 additions & 2 deletions src/api/backdoor_paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,15 @@ def api_backdoor_paths_parse(query: str) -> (set, set):
of the arrow, and the third as all vertices are the right of the bar, respectively.
"""
def clean(x):
return set(map(lambda y: y.strip(), x.strip().split(" ")))
return set(map(lambda y: y.strip(), x.strip().split(",")))

l, r = query.split("->")

if "|" in r:
s = r.split("|")
r, dcf = clean(s[0]), clean(s[1])
else:
r, dcf = clean(r), {}
r, dcf = clean(r), set()

return {
"src": clean(l),
Expand Down
2 changes: 1 addition & 1 deletion src/api/deconfounding_sets.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ def api_deconfounding_sets_parse(query: str) -> (set, set):
right sides of the arrow, respectively.
"""
def clean(x):
return set(map(lambda y: y.strip(), x.strip().split(" ")))
return set(map(lambda y: y.strip(), x.strip().split(",")))

src, dst = map(clean, query.split("->"))

Expand Down
12 changes: 9 additions & 3 deletions src/api/joint_distribution_table.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from itertools import product
from src.probability.structures.CausalGraph import CausalGraph
from src.probability.structures.VariableStructures import Outcome
from src.probability.structures.ConditionalProbabilityTable import ConditionalProbabilityTable
from src.probability.structures.VariableStructures import Outcome, Variable


def api_joint_distribution_table(cg: CausalGraph) -> list:
def api_joint_distribution_table(cg: CausalGraph) -> ConditionalProbabilityTable:
"""
Compute and return a joint distribution table for the given model.
@param cg: A CausalGraph to compute the JDT for.
Expand All @@ -17,4 +18,9 @@ def api_joint_distribution_table(cg: CausalGraph) -> list:
outcomes = {Outcome(x, cross[i]) for i, x in enumerate(sorted_keys)}
results.append((outcomes, cg.probability_query(outcomes, set())))

return results
keys = sorted(cg.variables.keys())
rows = [[",".join(map(str, outcomes)), [], p] for outcomes, p in results]
rows.append(["Total:", [], sum(map(lambda r: r[1], results))])
cpt = ConditionalProbabilityTable(Variable(",".join(keys), [], []), [], rows)

return cpt

0 comments on commit 3d48dfe

Please sign in to comment.