Skip to content
This repository has been archived by the owner on Dec 18, 2023. It is now read-only.

Docs/evaluator pages #253

Merged
merged 28 commits into from Nov 22, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
4b9f4e3
Reorganized docs directory
fabrizio-credo Nov 15, 2022
f212ea0
Updating evaluator docstring
fabrizio-credo Nov 16, 2022
6c37f65
Minor fix and update to the evaluator docstrings
fabrizio-credo Nov 16, 2022
d8c82c0
Small update to governance init
fabrizio-credo Nov 16, 2022
d9e87c0
Testing changes to evaluator
fabrizio-credo Nov 16, 2022
27a7344
Continuing work on schema
fabrizio-credo Nov 17, 2022
1d6c096
Updating the schema doc
fabrizio-credo Nov 18, 2022
e9d13c0
Almost finished the how to
fabrizio-credo Nov 18, 2022
f8d706b
Added evaluator doc pages:
fabrizio-credo Nov 18, 2022
d7a6fc5
All evaluator pages generating as expected
fabrizio-credo Nov 18, 2022
df1b870
Evaluators pages generation added to conf.py
fabrizio-credo Nov 18, 2022
1e6046f
Finalized make_your_own
fabrizio-credo Nov 18, 2022
b8d2b3e
Merge branch 'develop' into docs/evaluator_pages
fabrizio-credo Nov 18, 2022
44a049a
Fixed requirements syntax
fabrizio-credo Nov 18, 2022
b5a377d
Fixing requirements for docs
fabrizio-credo Nov 18, 2022
7ccf663
sorting out requirements
fabrizio-credo Nov 18, 2022
c781d01
Updating identity_verification and remove some of the requirements
fabrizio-credo Nov 18, 2022
8c57e4d
Removing auto evaluation of all pages
fabrizio-credo Nov 18, 2022
279e64a
Reorganizing hierarchy of docs
fabrizio-credo Nov 21, 2022
5737b9b
Changed language assertiveness, updated readme
fabrizio-credo Nov 21, 2022
cbfccb0
Updated deepchecks naming
fabrizio-credo Nov 21, 2022
46119b9
minor updates
fabrizio-credo Nov 21, 2022
36287d5
Adding autodoc test to pytest, updating identity verification
fabrizio-credo Nov 21, 2022
1e6a2ea
Change copy button behavior
fabrizio-credo Nov 21, 2022
e18ea73
Proper title capitalization
fabrizio-credo Nov 22, 2022
1bc2029
Hyperlinking evaluators page
fabrizio-credo Nov 22, 2022
e05d67a
PR review fixes, abridged requirements
fabrizio-credo Nov 22, 2022
911b0b1
MInor restructure
fabrizio-credo Nov 22, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 2 additions & 1 deletion credoai/evaluators/data_fairness.py
Expand Up @@ -33,7 +33,8 @@


class DataFairness(Evaluator):
"""Data Fairness for Credo AI.
"""
Data Fairness for Credo AI.

This evaluator performs a fairness evaluation on the dataset. Given a sensitive feature,
it calculates a number of assessments:
Expand Down
3 changes: 2 additions & 1 deletion credoai/evaluators/data_profiler.py
Expand Up @@ -16,7 +16,8 @@


class DataProfiler(Evaluator):
"""Data profiling module for Credo AI.
"""
Data profiling module for Credo AI.

This evaluator runs the pandas profiler on a data. Pandas profiler calculates a number
of descriptive statistics about the data.
Expand Down
8 changes: 4 additions & 4 deletions credoai/evaluators/deepchecks.py
Expand Up @@ -11,7 +11,7 @@

class Deepchecks(Evaluator):
"""
deepchecks evaluator
Deepchecks evaluator

This evaluator enables running of deepchecks `checks` and passing the results to
the Governance platform in the form of a deepchecks SuiteResult, cast to JSON format.
Expand Down Expand Up @@ -49,7 +49,7 @@ def __init__(
checks: Optional[List[BaseCheck]] = DEFAULT_CHECKS,
):
super().__init__()
self.name = suite_name
self.suite_name = suite_name
self.checks = checks

def _setup(self):
Expand All @@ -75,7 +75,7 @@ def _setup_deepchecks(self):
if self.model:
self.deepchecks_model = self.model.model_like

self.suite = Suite(name=self.name)
self.suite = Suite(name=self.suite_name)
for check in self.checks:
self.suite.add(check)
# doing this as a for-loop list seems to be the only way
Expand All @@ -94,7 +94,7 @@ def evaluate(self):
self._setup_deepchecks()
self.run_suite()

self.results = [DeepchecksContainer(self.name, self.suite_results)]
self.results = [DeepchecksContainer(self.suite_name, self.suite_results)]

return self

Expand Down
22 changes: 22 additions & 0 deletions credoai/evaluators/equity.py
Expand Up @@ -318,6 +318,28 @@ def logit(x):


class ModelEquity(DataEquity):
"""
Evaluates the equity of a model's predictions.

This evaluator assesses whether model predictions are distributed equally across a sensitive
feature. Depending on the kind of outcome, different tests will be performed.

- Discrete: chi-squared contingency tests,
followed by bonferronni corrected posthoc chi-sq tests
- Continuous: One-way ANOVA, followed by Tukey HSD posthoc tests
- Proportion (Bounded [0-1] continuous outcome): outcome is transformed to logits, then
proceed as normal for continuous

Parameters
----------
use_predict_proba : bool, optional
Defines which predict method will be used, if True predict_proba will be used.
This methods outputs probabilities rather then class predictions. The availability
of predict_proba is dependent on the model under assessment. By default False
p_value : float, optional
The significance value to evaluate statistical tests, by default 0.01
"""

def __init__(self, use_predict_proba=False, p_value=0.01):
fabrizio-credo marked this conversation as resolved.
Show resolved Hide resolved
self.use_predict_proba = use_predict_proba
super().__init__(p_value)
Expand Down
87 changes: 59 additions & 28 deletions credoai/evaluators/evaluator.py
Expand Up @@ -11,6 +11,13 @@ class Evaluator(ABC):

Defines basic functions required from any evaluator object.

This class leverages the special method `__call__` to make artifacts
available in the class enclosure.

.. automethod:: __call__
.. automethod:: _init_artifacts
.. automethod:: _validate_arguments
.. automethod:: _setup
"""

def __init__(self):
Expand All @@ -21,10 +28,22 @@ def __init__(self):

@property
def name(self):
"""The name associated to the Evaluator, equals the class name."""
return self.__class__.__name__

@property
def results(self):
"""
Container for all results.
fabrizio-credo marked this conversation as resolved.
Show resolved Hide resolved

It is expected to be a list of EvidenceContainers. This is enforced in
the associated setter method.

Raises
------
NotRunError
It indicates that results are missing, the evaluator was not run.
"""
if self._results is not None:
return self._results
else:
Expand All @@ -34,6 +53,7 @@ def results(self):

@results.setter
def results(self, results):
"""Requires the results to be list of Evidence Containers"""
if not isinstance(results, list):
raise ValidationError("Results must be a list")
for result in results:
Expand All @@ -44,39 +64,32 @@ def results(self, results):
@property
@abstractmethod
def required_artifacts(self):
"""
The required artifacts necessary for the functioning of the evaluator

This set contains the :ref:`artifacts<credoai.artifacts>` that Lens can feed to
an evaluator, the accepted values are ``{"model", "assessment_data", "training_data", "data"}``.

The string "data" means that the evaluator can be run on assessment and/or training data
(DataProfiler is an example). Lens will iterate over all the available artifacts internally.

The set can also include the string "sensitive_feature". This is to indicate
that the evaluator depends on sensitive features. Lens will iterate over the available sensitive
features internally.
"""
pass

def __call__(self, **kwargs):
"""
This method is used to pass the model, assessment_data and training_data
artifacts to instantiated evaluator.

After objects are passed, it performs arguments validation and calls _setup

>>> pipeline = Lens(model = model, assessment_data = dataset1)

where a group of arguments shareable across multiple evaluators is passed.
This method inside a specific evaluator takes the required arguments and
makes them available to the evaluator instance.

Requirements
-------------
_shared_arg_assignment requires explicitly named arguments.

Returns
-------
self
The method is called internally by the Lens instance, which only passes the
artifacts specified in the property :meth:`required_artifacts<Evaluator.required_artifacts>`.

Implementation template
-----------------------
The following code template provides an example of what the internal of this
method could look like:

>>> self.model = kwargs['model']
>>> self.assessment_dataset = kwargs['assessment_dataset']

where model and assessment_dataset are Lens() arguments.
After the artifacts are passed, it performs arguments validation and calls :meth:`_setup<Evaluator._setup>`

At the end of these operation, the validated artifacts are available in the evaluator enclosure.
"""
self._init_artifacts(kwargs)
self._validate_arguments()
Expand All @@ -89,14 +102,23 @@ def evaluate(self):
Execute any data/model processing required for the evaluator.

Populates the self.results object.

Returns
-------
self
"""
return self

def get_container_info(self, labels: dict = None, metadata: dict = None):
"""
Expands the base labels and metadata used to populate evidences.

Parameters
----------
labels : dict, optional
The default labels can be expanded by the user when defining a new evaluator.
A label is in general any information necessary to identify evidences in the Credo AI Platform,
therefore, by default None.
metadata : dict, optional
Any extra info the user wants to associate to the evidences. Compared
to labels these are not necessary for evidence identification, by default None.
"""
info = self._base_container_info()
if labels:
info["labels"].update(labels)
Expand All @@ -105,13 +127,19 @@ def get_container_info(self, labels: dict = None, metadata: dict = None):
return info

def _base_container_info(self):
"""Extract basic info to populate labels and metadata."""
meta = {**self.metadata, **self._get_artifacts()}
labels = {"evaluator": self.name}
if "dataset_type" in meta:
labels["dataset_type"] = meta["dataset_type"]
return {"labels": labels, "metadata": meta}

def _get_artifacts(self):
"""
Extract artifacts that will be used by the evaluator.

The method also extract name info from the available artifacts.
fabrizio-credo marked this conversation as resolved.
Show resolved Hide resolved
"""
artifacts = {}
save_keys = {
"model": "model_name",
Expand Down Expand Up @@ -140,6 +168,9 @@ def _init_artifacts(self, artifacts):

@abstractmethod
def _setup(self):
"""
Contains any extra steps necessary to initialize the evaluator
"""
pass

@abstractmethod
Expand Down
86 changes: 42 additions & 44 deletions credoai/evaluators/identity_verification.py
Expand Up @@ -26,10 +26,11 @@


class IdentityVerification(Evaluator):
"""Pair-wise-comparison-based identity verification evaluator for Credo AI
"""
Pair-wise-comparison-based identity verification evaluator for Credo AI

This evaluator takes in identity verification data and
provides functionality to perform performance and fairness assessment
provides functionality to perform performance and fairness assessment

Parameters
----------
Expand Down Expand Up @@ -65,50 +66,47 @@ class IdentityVerification(Evaluator):

Example
--------
import pandas as pd
from credoai.lens import Lens
from credoai.artifacts import ComparisonData, ComparisonModel
from credoai.evaluators import IdentityVerification

evaluator = IdentityVerification(similarity_thresholds=[60, 99])

pairs = pd.DataFrame({
'source-subject-id': ['s0', 's0', 's0', 's0', 's1', 's1', 's1', 's1', 's1', 's2'],
'source-subject-data-sample': ['s00', 's00', 's00', 's00', 's10', 's10', 's10', 's11', 's11', 's20'],
'target-subject-id': ['s1', 's1', 's2', 's3', 's1', 's2', 's3', 's2', 's3', 's3'],
'target-subject-data-sample': ['s10', 's11', 's20', 's30', 's11', 's20', 's30', 's20', 's30', 's30']
})

subjects_sensitive_features = pd.DataFrame({
'subject-id': ['s0', 's1', 's2', 's3'],
'gender': ['female', 'male', 'female', 'female']
})

class FaceCompare:
# a dummy selfie comparison model
def compare(self, pairs):
similarity_scores = [31.5, 16.7, 20.8, 84.4, 12.0, 15.2, 45.8, 23.5, 28.5, 44.5]
return similarity_scores

face_compare = FaceCompare()

credo_data = ComparisonData(
name="face-data",
pairs=pairs,
subjects_sensitive_features=subjects_sensitive_features
)

credo_model = ComparisonModel(
name="face-compare",
model_like=face_compare
)

pipeline = Lens(model=credo_model, assessment_data=credo_data)

pipeline.add(evaluator)
>>> import pandas as pd
>>> from credoai.lens import Lens
>>> from credoai.artifacts import ComparisonData, ComparisonModel
>>> from credoai.evaluators import IdentityVerification
>>> evaluator = IdentityVerification(similarity_thresholds=[60, 99])
>>> import doctest
>>> doctest.ELLIPSIS_MARKER = '-etc-'
>>> pairs = pd.DataFrame({
... 'source-subject-id': ['s0', 's0', 's0', 's0', 's1', 's1', 's1', 's1', 's1', 's2'],
... 'source-subject-data-sample': ['s00', 's00', 's00', 's00', 's10', 's10', 's10', 's11', 's11', 's20'],
... 'target-subject-id': ['s1', 's1', 's2', 's3', 's1', 's2', 's3', 's2', 's3', 's3'],
... 'target-subject-data-sample': ['s10', 's11', 's20', 's30', 's11', 's20', 's30', 's20', 's30', 's30']
... })
>>> subjects_sensitive_features = pd.DataFrame({
... 'subject-id': ['s0', 's1', 's2', 's3'],
... 'gender': ['female', 'male', 'female', 'female']
... })
>>> class FaceCompare:
... # a dummy selfie comparison model
... def compare(self, pairs):
... similarity_scores = [31.5, 16.7, 20.8, 84.4, 12.0, 15.2, 45.8, 23.5, 28.5, 44.5]
... return similarity_scores
>>> face_compare = FaceCompare()
>>> credo_data = ComparisonData(
... name="face-data",
... pairs=pairs,
... subjects_sensitive_features=subjects_sensitive_features
... )
>>> credo_model = ComparisonModel(
... name="face-compare",
... model_like=face_compare
... )
>>> pipeline = Lens(model=credo_model, assessment_data=credo_data)
>>> pipeline.add(evaluator) # doctest: +ELLIPSIS
-etc-
>>> pipeline.run() # doctest: +ELLIPSIS
-etc-
>>> pipeline.get_results() # doctest: +ELLIPSIS
-etc-

pipeline.run()
pipeline.get_results()
"""

def __init__(
Expand Down
3 changes: 2 additions & 1 deletion credoai/evaluators/privacy.py
Expand Up @@ -72,7 +72,8 @@


class Privacy(Evaluator):
"""Privacy module for Credo AI.
"""
Privacy module for Credo AI.

This module takes in in classification model and data and provides functionality
to perform privacy assessment
Expand Down
3 changes: 2 additions & 1 deletion credoai/evaluators/ranking_fairness.py
Expand Up @@ -34,7 +34,8 @@


class RankingFairness(Evaluator):
"""Ranking fairness evaluator for Credo AI
"""
Ranking fairness evaluator for Credo AI

This module takes in ranking results and provides functionality to perform fairness assessment
The results should include rankings, sensitive features, and optionally, scores.
Expand Down
3 changes: 2 additions & 1 deletion credoai/evaluators/security.py
Expand Up @@ -30,7 +30,8 @@


class Security(Evaluator):
"""Security module for Credo AI.
"""
Security module for Credo AI.

This module takes in classification model and data and
provides functionality to perform security assessment
Expand Down