Skip to content
This repository has been archived by the owner on Dec 18, 2023. It is now read-only.

Commit

Permalink
Docs/evaluator pages (#253)
Browse files Browse the repository at this point in the history
- Created evaluator parser
- Restructured docs for better organization
- Created how to make your evaluator documentation
- Enabled docstring automatic testing
  • Loading branch information
fabrizio-credo committed Nov 22, 2022
1 parent d366fb7 commit fe82be1
Show file tree
Hide file tree
Showing 43 changed files with 1,045 additions and 129 deletions.
3 changes: 2 additions & 1 deletion credoai/evaluators/data_fairness.py
Expand Up @@ -33,7 +33,8 @@


class DataFairness(Evaluator):
"""Data Fairness for Credo AI.
"""
Data Fairness for Credo AI.
This evaluator performs a fairness evaluation on the dataset. Given a sensitive feature,
it calculates a number of assessments:
Expand Down
3 changes: 2 additions & 1 deletion credoai/evaluators/data_profiler.py
Expand Up @@ -16,7 +16,8 @@


class DataProfiler(Evaluator):
"""Data profiling module for Credo AI.
"""
Data profiling module for Credo AI.
This evaluator runs the pandas profiler on a data. Pandas profiler calculates a number
of descriptive statistics about the data.
Expand Down
8 changes: 4 additions & 4 deletions credoai/evaluators/deepchecks.py
Expand Up @@ -11,7 +11,7 @@

class Deepchecks(Evaluator):
"""
deepchecks evaluator
Deepchecks evaluator
This evaluator enables running of deepchecks `checks` and passing the results to
the Governance platform in the form of a deepchecks SuiteResult, cast to JSON format.
Expand Down Expand Up @@ -49,7 +49,7 @@ def __init__(
checks: Optional[List[BaseCheck]] = DEFAULT_CHECKS,
):
super().__init__()
self.name = suite_name
self.suite_name = suite_name
self.checks = checks

def _setup(self):
Expand All @@ -75,7 +75,7 @@ def _setup_deepchecks(self):
if self.model:
self.deepchecks_model = self.model.model_like

self.suite = Suite(name=self.name)
self.suite = Suite(name=self.suite_name)
for check in self.checks:
self.suite.add(check)
# doing this as a for-loop list seems to be the only way
Expand All @@ -94,7 +94,7 @@ def evaluate(self):
self._setup_deepchecks()
self.run_suite()

self.results = [DeepchecksContainer(self.name, self.suite_results)]
self.results = [DeepchecksContainer(self.suite_name, self.suite_results)]

return self

Expand Down
22 changes: 22 additions & 0 deletions credoai/evaluators/equity.py
Expand Up @@ -318,6 +318,28 @@ def logit(x):


class ModelEquity(DataEquity):
"""
Evaluates the equity of a model's predictions.
This evaluator assesses whether model predictions are distributed equally across a sensitive
feature. Depending on the kind of outcome, different tests will be performed.
- Discrete: chi-squared contingency tests,
followed by bonferronni corrected posthoc chi-sq tests
- Continuous: One-way ANOVA, followed by Tukey HSD posthoc tests
- Proportion (Bounded [0-1] continuous outcome): outcome is transformed to logits, then
proceed as normal for continuous
Parameters
----------
use_predict_proba : bool, optional
Defines which predict method will be used, if True predict_proba will be used.
This methods outputs probabilities rather then class predictions. The availability
of predict_proba is dependent on the model under assessment. By default False
p_value : float, optional
The significance value to evaluate statistical tests, by default 0.01
"""

def __init__(self, use_predict_proba=False, p_value=0.01):
self.use_predict_proba = use_predict_proba
super().__init__(p_value)
Expand Down
87 changes: 59 additions & 28 deletions credoai/evaluators/evaluator.py
Expand Up @@ -11,6 +11,13 @@ class Evaluator(ABC):
Defines basic functions required from any evaluator object.
This class leverages the special method `__call__` to make artifacts
available in the class enclosure.
.. automethod:: __call__
.. automethod:: _init_artifacts
.. automethod:: _validate_arguments
.. automethod:: _setup
"""

def __init__(self):
Expand All @@ -21,10 +28,22 @@ def __init__(self):

@property
def name(self):
"""The name associated to the Evaluator, equals the class name."""
return self.__class__.__name__

@property
def results(self):
"""
Container for all results.
It is expected to be a list of EvidenceContainers. This is enforced in
the associated setter method.
Raises
------
NotRunError
It indicates that results are missing, the evaluator was not run.
"""
if self._results is not None:
return self._results
else:
Expand All @@ -34,6 +53,7 @@ def results(self):

@results.setter
def results(self, results):
"""Requires the results to be list of Evidence Containers"""
if not isinstance(results, list):
raise ValidationError("Results must be a list")
for result in results:
Expand All @@ -44,39 +64,32 @@ def results(self, results):
@property
@abstractmethod
def required_artifacts(self):
"""
The required artifacts necessary for the functioning of the evaluator
This set contains the :ref:`artifacts<credoai.artifacts>` that Lens can feed to
an evaluator, the accepted values are ``{"model", "assessment_data", "training_data", "data"}``.
The string "data" means that the evaluator can be run on assessment and/or training data
(DataProfiler is an example). Lens will iterate over all the available artifacts internally.
The set can also include the string "sensitive_feature". This is to indicate
that the evaluator depends on sensitive features. Lens will iterate over the available sensitive
features internally.
"""
pass

def __call__(self, **kwargs):
"""
This method is used to pass the model, assessment_data and training_data
artifacts to instantiated evaluator.
After objects are passed, it performs arguments validation and calls _setup
>>> pipeline = Lens(model = model, assessment_data = dataset1)
where a group of arguments shareable across multiple evaluators is passed.
This method inside a specific evaluator takes the required arguments and
makes them available to the evaluator instance.
Requirements
-------------
_shared_arg_assignment requires explicitly named arguments.
Returns
-------
self
The method is called internally by the Lens instance, which only passes the
artifacts specified in the property :meth:`required_artifacts<Evaluator.required_artifacts>`.
Implementation template
-----------------------
The following code template provides an example of what the internal of this
method could look like:
>>> self.model = kwargs['model']
>>> self.assessment_dataset = kwargs['assessment_dataset']
where model and assessment_dataset are Lens() arguments.
After the artifacts are passed, it performs arguments validation and calls :meth:`_setup<Evaluator._setup>`
At the end of these operation, the validated artifacts are available in the evaluator enclosure.
"""
self._init_artifacts(kwargs)
self._validate_arguments()
Expand All @@ -89,14 +102,23 @@ def evaluate(self):
Execute any data/model processing required for the evaluator.
Populates the self.results object.
Returns
-------
self
"""
return self

def get_container_info(self, labels: dict = None, metadata: dict = None):
"""
Expands the base labels and metadata used to populate evidences.
Parameters
----------
labels : dict, optional
The default labels can be expanded by the user when defining a new evaluator.
A label is in general any information necessary to identify evidences in the Credo AI Platform,
therefore, by default None.
metadata : dict, optional
Any extra info the user wants to associate to the evidences. Compared
to labels these are not necessary for evidence identification, by default None.
"""
info = self._base_container_info()
if labels:
info["labels"].update(labels)
Expand All @@ -105,13 +127,19 @@ def get_container_info(self, labels: dict = None, metadata: dict = None):
return info

def _base_container_info(self):
"""Extract basic info to populate labels and metadata."""
meta = {**self.metadata, **self._get_artifacts()}
labels = {"evaluator": self.name}
if "dataset_type" in meta:
labels["dataset_type"] = meta["dataset_type"]
return {"labels": labels, "metadata": meta}

def _get_artifacts(self):
"""
Extract artifacts that will be used by the evaluator.
The method also extract name info from the available artifacts.
"""
artifacts = {}
save_keys = {
"model": "model_name",
Expand Down Expand Up @@ -140,6 +168,9 @@ def _init_artifacts(self, artifacts):

@abstractmethod
def _setup(self):
"""
Contains any extra steps necessary to initialize the evaluator
"""
pass

@abstractmethod
Expand Down
86 changes: 42 additions & 44 deletions credoai/evaluators/identity_verification.py
Expand Up @@ -26,10 +26,11 @@


class IdentityVerification(Evaluator):
"""Pair-wise-comparison-based identity verification evaluator for Credo AI
"""
Pair-wise-comparison-based identity verification evaluator for Credo AI
This evaluator takes in identity verification data and
provides functionality to perform performance and fairness assessment
provides functionality to perform performance and fairness assessment
Parameters
----------
Expand Down Expand Up @@ -65,50 +66,47 @@ class IdentityVerification(Evaluator):
Example
--------
import pandas as pd
from credoai.lens import Lens
from credoai.artifacts import ComparisonData, ComparisonModel
from credoai.evaluators import IdentityVerification
evaluator = IdentityVerification(similarity_thresholds=[60, 99])
pairs = pd.DataFrame({
'source-subject-id': ['s0', 's0', 's0', 's0', 's1', 's1', 's1', 's1', 's1', 's2'],
'source-subject-data-sample': ['s00', 's00', 's00', 's00', 's10', 's10', 's10', 's11', 's11', 's20'],
'target-subject-id': ['s1', 's1', 's2', 's3', 's1', 's2', 's3', 's2', 's3', 's3'],
'target-subject-data-sample': ['s10', 's11', 's20', 's30', 's11', 's20', 's30', 's20', 's30', 's30']
})
subjects_sensitive_features = pd.DataFrame({
'subject-id': ['s0', 's1', 's2', 's3'],
'gender': ['female', 'male', 'female', 'female']
})
class FaceCompare:
# a dummy selfie comparison model
def compare(self, pairs):
similarity_scores = [31.5, 16.7, 20.8, 84.4, 12.0, 15.2, 45.8, 23.5, 28.5, 44.5]
return similarity_scores
face_compare = FaceCompare()
credo_data = ComparisonData(
name="face-data",
pairs=pairs,
subjects_sensitive_features=subjects_sensitive_features
)
credo_model = ComparisonModel(
name="face-compare",
model_like=face_compare
)
pipeline = Lens(model=credo_model, assessment_data=credo_data)
pipeline.add(evaluator)
>>> import pandas as pd
>>> from credoai.lens import Lens
>>> from credoai.artifacts import ComparisonData, ComparisonModel
>>> from credoai.evaluators import IdentityVerification
>>> evaluator = IdentityVerification(similarity_thresholds=[60, 99])
>>> import doctest
>>> doctest.ELLIPSIS_MARKER = '-etc-'
>>> pairs = pd.DataFrame({
... 'source-subject-id': ['s0', 's0', 's0', 's0', 's1', 's1', 's1', 's1', 's1', 's2'],
... 'source-subject-data-sample': ['s00', 's00', 's00', 's00', 's10', 's10', 's10', 's11', 's11', 's20'],
... 'target-subject-id': ['s1', 's1', 's2', 's3', 's1', 's2', 's3', 's2', 's3', 's3'],
... 'target-subject-data-sample': ['s10', 's11', 's20', 's30', 's11', 's20', 's30', 's20', 's30', 's30']
... })
>>> subjects_sensitive_features = pd.DataFrame({
... 'subject-id': ['s0', 's1', 's2', 's3'],
... 'gender': ['female', 'male', 'female', 'female']
... })
>>> class FaceCompare:
... # a dummy selfie comparison model
... def compare(self, pairs):
... similarity_scores = [31.5, 16.7, 20.8, 84.4, 12.0, 15.2, 45.8, 23.5, 28.5, 44.5]
... return similarity_scores
>>> face_compare = FaceCompare()
>>> credo_data = ComparisonData(
... name="face-data",
... pairs=pairs,
... subjects_sensitive_features=subjects_sensitive_features
... )
>>> credo_model = ComparisonModel(
... name="face-compare",
... model_like=face_compare
... )
>>> pipeline = Lens(model=credo_model, assessment_data=credo_data)
>>> pipeline.add(evaluator) # doctest: +ELLIPSIS
-etc-
>>> pipeline.run() # doctest: +ELLIPSIS
-etc-
>>> pipeline.get_results() # doctest: +ELLIPSIS
-etc-
pipeline.run()
pipeline.get_results()
"""

def __init__(
Expand Down
3 changes: 2 additions & 1 deletion credoai/evaluators/privacy.py
Expand Up @@ -72,7 +72,8 @@


class Privacy(Evaluator):
"""Privacy module for Credo AI.
"""
Privacy module for Credo AI.
This module takes in in classification model and data and provides functionality
to perform privacy assessment
Expand Down
3 changes: 2 additions & 1 deletion credoai/evaluators/ranking_fairness.py
Expand Up @@ -34,7 +34,8 @@


class RankingFairness(Evaluator):
"""Ranking fairness evaluator for Credo AI
"""
Ranking fairness evaluator for Credo AI
This module takes in ranking results and provides functionality to perform fairness assessment
The results should include rankings, sensitive features, and optionally, scores.
Expand Down
3 changes: 2 additions & 1 deletion credoai/evaluators/security.py
Expand Up @@ -30,7 +30,8 @@


class Security(Evaluator):
"""Security module for Credo AI.
"""
Security module for Credo AI.
This module takes in classification model and data and
provides functionality to perform security assessment
Expand Down

0 comments on commit fe82be1

Please sign in to comment.