Skip to content

Commit

Permalink
documentation overhaul now that im starting to understand sphinx
Browse files Browse the repository at this point in the history
  • Loading branch information
dvatterott committed Jan 4, 2019
1 parent 8332867 commit 81cb1eb
Show file tree
Hide file tree
Showing 7 changed files with 182 additions and 89 deletions.
5 changes: 0 additions & 5 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,6 @@ This software is built to identify unexpected changes in a model output before e
* Documentation: https://predeval.readthedocs.io.


Features
--------

* TODO

Credits
-------

Expand Down
4 changes: 4 additions & 0 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,12 @@ ContinuousEvaluator
---------
.. automodule:: predeval.continuous
:members:
:inherited-members:
:show-inheritance:

CategoricalEvaluator
---------
.. automodule:: predeval.categorical
:members:
:inherited-members:
:show-inheritance:
8 changes: 4 additions & 4 deletions docs/source/predeval.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,23 +9,23 @@ predeval.categorical module

.. automodule:: predeval.categorical
:members:
:undoc-members:
:inherited-members:
:show-inheritance:

predeval.continuous module
--------------------------

.. automodule:: predeval.continuous
:members:
:undoc-members:
:inherited-members:
:show-inheritance:

predeval.parent module
----------------------

.. automodule:: predeval.parent
:members:
:undoc-members:
:inherited-members:
:show-inheritance:


Expand All @@ -34,5 +34,5 @@ Module contents

.. automodule:: predeval
:members:
:undoc-members:
:inherited-members:
:show-inheritance:
2 changes: 1 addition & 1 deletion docs/usage.rst
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
=====
Usage
Examples
=====

To use predeval in a project::
Expand Down
88 changes: 64 additions & 24 deletions predeval/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,14 @@
__license__ = 'MIT'


def chi2_test(reference, test_data):
def _chi2_test(reference, test_data):
"""Change chi2_contingency inputs for partial evaluation.
Parameters
----------
reference : list (ideally one-dimensional np.array)
reference : list or np.array
This the reference data that will be used for the comparison.
test_data : list (ideally one-dimensional np.array)
test_data : list or np.array
This the data compared to the reference data.
Returns
Expand All @@ -45,6 +45,43 @@ def chi2_test(reference, test_data):
class CategoricalEvaluator(ParentPredEval):
"""
Evaluator for categorical model outputs (e.g., classification models).
...
Parameters
----------
ref_data : list of int or float or np.array
This the reference data for all tests. All future data will be compared to this data.
assertions : list of str, optional
These are the assertion tests that will be created. Defaults is ['chi2_test', 'exist'].
verbose : bool, optional
Whether tests should print their output. Default is true
Attributes
----------
assertion_params : dict
dictionary of test names and values defining these tests
(e.g., test-statistic for chi2_test).
Default value for chi2_test is 0.2.
assertions : list of str
This list of strings describes the tests that will be run on comparison data.
Methods
-------
update_chi2_test(input_data)
Load chi2_test with input_data.
update_exist(input_data)
Update what expected categories are.
check_chi2(test_data)
Test whether categorical data distributed across categories as expected.
check_exist(test_data)
Test whether all categories exist.
check_data(test_data)
Run all tests declared in assertions on test_data.
update_param(param_key, param_value)
Update values in assertion_params (e.g., chi_test statistic)
"""
def __init__(
self,
Expand All @@ -65,44 +102,44 @@ def __init__(
self.assertion_params_['chi2_stat'] = kwargs.get('chi2_stat', 0.2)

# ---- create list of assertions to test ---- #
self.possible_assertions_ = {
'chi2_test': (self.create_chi2_test, self.check_chi2),
'exist': (self.create_exist, self.check_exist),
self._possible_assertions_ = {
'chi2_test': (self.update_chi2_test, self.check_chi2),
'exist': (self.update_exist, self.check_exist),
}

# ---- create list of assertions to test ---- #
assertions = ['chi2_test', 'exist'] if assertions is None else assertions
self.assertions_ = self.check_assertion_types(assertions)
self.assertions_ = self._check_assertion_types(assertions)

# ---- populate assertion tests with reference data ---- #
for i in self.assertions_:
self.possible_assertions[i][0](self.ref_data)
self._possible_assertions[i][0](self.ref_data)

# ---- populate list of tests to run and run tests ---- #
self.tests_ = [self.possible_assertions_[i][1] for i in self.assertions_]
self._tests_ = [self._possible_assertions_[i][1] for i in self.assertions_]

@property
def assertion_params(self):
return self.assertion_params_

@property
def possible_assertions(self):
return self.possible_assertions_
def _possible_assertions(self):
return self._possible_assertions_

@property
def assertions(self):
return self.assertions_

@property
def tests(self):
return self.tests_
def _tests(self):
return self._tests_

def create_chi2_test(self, input_data):
def update_chi2_test(self, input_data):
"""Create partially evaluated chi2 contingency test.
Parameters
----------
input_data : list (ideally one-dimensional np.array)
input_data : list or np.array
This the reference data for the ks-test. All future data will be compared to this data.
Returns
Expand All @@ -114,14 +151,14 @@ def create_chi2_test(self, input_data):
_, counts = np.unique(input_data, return_counts=True)
assert all([x >= 5 for x in counts]), \
'Not enough data of each type for reliable Chi2 Contingency test. Need at least 5.'
self.assertion_params['chi2_test'] = partial(chi2_test, np.array(counts))
self.assertion_params['chi2_test'] = partial(_chi2_test, np.array(counts))

def create_exist(self, input_data):
def update_exist(self, input_data):
"""Create input data for test checking whether all categorical outputs exist.
Parameters
----------
input_data : list (ideally one-dimensional np.array)
input_data : list or np.array
This the reference data for the check_exist. All future data will be compared to it.
Returns
Expand All @@ -133,12 +170,15 @@ def create_exist(self, input_data):
assert len(input_data.shape) == 1, 'Input data not a single vector'
self.assertion_params['cat_exists'] = np.unique(input_data)

def check_chi2(self, comparison_data=None):
def check_chi2(self, test_data):
"""Test whether test_data is similar to reference data.
If chi2-test-statistic exceeds the value in assertion_params,
then the test will produce a False (rather than True).
Parameters
----------
comparison_data : list (ideally one-dimensional np.array)
test_data : list or np.array
This the data that will be compared to the reference data.
Returns
Expand All @@ -148,7 +188,6 @@ def check_chi2(self, comparison_data=None):
"""
assert self.assertion_params['chi2_test'], 'Must input or load reference data chi2-test'
test_data = self.ref_data if comparison_data is None else comparison_data
assert len(test_data.shape) == 1, 'Input data not a single vector'
_, counts = np.unique(test_data, return_counts=True)
assert all([x >= 5 for x in counts]), \
Expand All @@ -161,12 +200,14 @@ def check_chi2(self, comparison_data=None):
print('{} chi2 check; test statistic={}, p={}'.format(pass_fail, test_stat, p_value))
return ('chi2', passed)

def check_exist(self, comparison_data=None):
def check_exist(self, test_data):
"""Check that all distinct values present in test_data.
If any values missing, then the function will return a False (rather than true).
Parameters
----------
comparison_data : list (ideally one-dimensional np.array)
test_data : list or np.array
This the data that will be compared to the reference data.
Returns
Expand All @@ -176,7 +217,6 @@ def check_exist(self, comparison_data=None):
"""
assert self.assertion_params['cat_exists'] is not None,\
'Must input or load reference minimum'
test_data = self.ref_data if comparison_data is None else comparison_data
assert len(test_data.shape) == 1, 'Input data not a single vector'
obs = np.unique(np.array(test_data))
exp = list(self.assertion_params['cat_exists'])
Expand Down

0 comments on commit 81cb1eb

Please sign in to comment.