Docs/evaluator pages (#253)

- Created evaluator parser - Restructured docs for better organization - Created how to make your evaluator documentation - Enabled docstring automatic testing
credo-ai · Nov 22, 2022 · fe82be1 · fe82be1
1 parent d366fb7
commit fe82be1
Show file tree

Hide file tree

Showing 43 changed files with 1,045 additions and 129 deletions.
diff --git a/credoai/evaluators/data_fairness.py b/credoai/evaluators/data_fairness.py
@@ -33,7 +33,8 @@
 
 
 class DataFairness(Evaluator):
-    """Data Fairness for Credo AI.
+    """
+    Data Fairness for Credo AI.
 
     This evaluator performs a fairness evaluation on the dataset. Given a sensitive feature,
     it calculates a number of assessments:

diff --git a/credoai/evaluators/data_profiler.py b/credoai/evaluators/data_profiler.py
@@ -16,7 +16,8 @@
 
 
 class DataProfiler(Evaluator):
-    """Data profiling module for Credo AI.
+    """
+    Data profiling module for Credo AI.
 
     This evaluator runs the pandas profiler on a data. Pandas profiler calculates a number
     of descriptive statistics about the data.

diff --git a/credoai/evaluators/deepchecks.py b/credoai/evaluators/deepchecks.py
@@ -11,7 +11,7 @@
 
 class Deepchecks(Evaluator):
     """
-    deepchecks evaluator
+    Deepchecks evaluator
 
     This evaluator enables running of deepchecks `checks` and passing the results to
     the Governance platform in the form of a deepchecks SuiteResult, cast to JSON format.
@@ -49,7 +49,7 @@ def __init__(
         checks: Optional[List[BaseCheck]] = DEFAULT_CHECKS,
     ):
         super().__init__()
-        self.name = suite_name
+        self.suite_name = suite_name
         self.checks = checks
 
     def _setup(self):
@@ -75,7 +75,7 @@ def _setup_deepchecks(self):
         if self.model:
             self.deepchecks_model = self.model.model_like
 
-        self.suite = Suite(name=self.name)
+        self.suite = Suite(name=self.suite_name)
         for check in self.checks:
             self.suite.add(check)
             # doing this as a for-loop list seems to be the only way
@@ -94,7 +94,7 @@ def evaluate(self):
         self._setup_deepchecks()
         self.run_suite()
 
-        self.results = [DeepchecksContainer(self.name, self.suite_results)]
+        self.results = [DeepchecksContainer(self.suite_name, self.suite_results)]
 
         return self
 

diff --git a/credoai/evaluators/equity.py b/credoai/evaluators/equity.py
@@ -318,6 +318,28 @@ def logit(x):
 
 
 class ModelEquity(DataEquity):
+    """
+    Evaluates the equity of a model's predictions.
+
+    This evaluator assesses whether model predictions are distributed equally across a sensitive
+    feature. Depending on the kind of outcome, different tests will be performed.
+
+    - Discrete: chi-squared contingency tests,
+      followed by bonferronni corrected posthoc chi-sq tests
+    - Continuous: One-way ANOVA, followed by Tukey HSD posthoc tests
+    - Proportion (Bounded [0-1] continuous outcome): outcome is transformed to logits, then
+        proceed as normal for continuous
+
+    Parameters
+    ----------
+    use_predict_proba : bool, optional
+        Defines which predict method will be used, if True predict_proba will be used.
+        This methods outputs probabilities rather then class predictions. The availability
+        of predict_proba is dependent on the model under assessment. By default False
+    p_value : float, optional
+        The significance value to evaluate statistical tests, by default 0.01
+    """
+
     def __init__(self, use_predict_proba=False, p_value=0.01):
         self.use_predict_proba = use_predict_proba
         super().__init__(p_value)

diff --git a/credoai/evaluators/evaluator.py b/credoai/evaluators/evaluator.py
@@ -11,6 +11,13 @@ class Evaluator(ABC):
 
     Defines basic functions required from any evaluator object.
 
+    This class leverages the special method `__call__` to make artifacts
+    available in the class enclosure.
+
+    .. automethod:: __call__
+    .. automethod:: _init_artifacts
+    .. automethod:: _validate_arguments
+    .. automethod:: _setup
     """
 
     def __init__(self):
@@ -21,10 +28,22 @@ def __init__(self):
 
     @property
     def name(self):
+        """The name associated to the Evaluator, equals the class name."""
         return self.__class__.__name__
 
     @property
     def results(self):
+        """
+        Container for all results.
+
+        It is expected to be a list of EvidenceContainers. This is enforced in
+        the associated setter method.
+
+        Raises
+        ------
+        NotRunError
+            It indicates that results are missing, the evaluator was not run.
+        """
         if self._results is not None:
             return self._results
         else:
@@ -34,6 +53,7 @@ def results(self):
 
     @results.setter
     def results(self, results):
+        """Requires the results to be list of Evidence Containers"""
         if not isinstance(results, list):
             raise ValidationError("Results must be a list")
         for result in results:
@@ -44,39 +64,32 @@ def results(self, results):
     @property
     @abstractmethod
     def required_artifacts(self):
+        """
+        The required artifacts necessary for the functioning of the evaluator
+
+        This set contains the :ref:`artifacts<credoai.artifacts>` that Lens can feed to
+        an evaluator, the accepted values are ``{"model", "assessment_data", "training_data", "data"}``.
+
+        The string "data" means that the evaluator can be run on assessment and/or training data
+        (DataProfiler is an example). Lens will iterate over all the available artifacts internally.
+
+        The set can also include the string "sensitive_feature". This is to indicate
+        that the evaluator depends on sensitive features. Lens will iterate over the available sensitive
+        features internally.
+        """
         pass
 
     def __call__(self, **kwargs):
         """
         This method is used to pass the model, assessment_data and training_data
         artifacts to instantiated evaluator.
 
-        After objects are passed, it performs arguments validation and calls _setup
-
-        >>> pipeline = Lens(model = model, assessment_data = dataset1)
-
-        where a group of arguments shareable across multiple evaluators is passed.
-        This method inside a specific evaluator takes the required arguments and
-        makes them available to the evaluator instance.
-
-        Requirements
-        -------------
-        _shared_arg_assignment requires explicitly named arguments.
-
-        Returns
-        -------
-        self
+        The method is called internally by the Lens instance, which only passes the
+        artifacts specified in the property :meth:`required_artifacts<Evaluator.required_artifacts>`.
 
-        Implementation template
-        -----------------------
-        The following code template provides an example of what the internal of this
-        method could look like:
-
-        >>> self.model = kwargs['model']
-        >>> self.assessment_dataset = kwargs['assessment_dataset']
-
-        where model and assessment_dataset are Lens() arguments.
+        After the artifacts are passed, it performs arguments validation and calls :meth:`_setup<Evaluator._setup>`
 
+        At the end of these operation, the validated artifacts are available in the evaluator enclosure.
         """
         self._init_artifacts(kwargs)
         self._validate_arguments()
@@ -89,14 +102,23 @@ def evaluate(self):
         Execute any data/model processing required for the evaluator.
 
         Populates the self.results object.
-
-        Returns
-        -------
-        self
         """
         return self
 
     def get_container_info(self, labels: dict = None, metadata: dict = None):
+        """
+        Expands the base labels and metadata used to populate evidences.
+
+        Parameters
+        ----------
+        labels : dict, optional
+            The default labels can be expanded by the user when defining a new evaluator.
+            A label is in general any information necessary to identify evidences in the Credo AI Platform,
+            therefore, by default None.
+        metadata : dict, optional
+            Any extra info the user wants to associate to the evidences. Compared
+            to labels these are not necessary for evidence identification, by default None.
+        """
         info = self._base_container_info()
         if labels:
             info["labels"].update(labels)
@@ -105,13 +127,19 @@ def get_container_info(self, labels: dict = None, metadata: dict = None):
         return info
 
     def _base_container_info(self):
+        """Extract basic info to populate labels and metadata."""
         meta = {**self.metadata, **self._get_artifacts()}
         labels = {"evaluator": self.name}
         if "dataset_type" in meta:
             labels["dataset_type"] = meta["dataset_type"]
         return {"labels": labels, "metadata": meta}
 
     def _get_artifacts(self):
+        """
+        Extract artifacts that will be used by the evaluator.
+
+        The method also extract name info from the available artifacts.
+        """
         artifacts = {}
         save_keys = {
             "model": "model_name",
@@ -140,6 +168,9 @@ def _init_artifacts(self, artifacts):
 
     @abstractmethod
     def _setup(self):
+        """
+        Contains any extra steps necessary to initialize the evaluator
+        """
         pass
 
     @abstractmethod

diff --git a/credoai/evaluators/identity_verification.py b/credoai/evaluators/identity_verification.py
@@ -26,10 +26,11 @@
 
 
 class IdentityVerification(Evaluator):
-    """Pair-wise-comparison-based identity verification evaluator for Credo AI
+    """
+    Pair-wise-comparison-based identity verification evaluator for Credo AI
 
     This evaluator takes in identity verification data and
-        provides functionality to perform performance and fairness assessment
+    provides functionality to perform performance and fairness assessment
 
     Parameters
     ----------
@@ -65,50 +66,47 @@ class IdentityVerification(Evaluator):
 
     Example
     --------
-    import pandas as pd
-    from credoai.lens import Lens
-    from credoai.artifacts import ComparisonData, ComparisonModel
-    from credoai.evaluators import IdentityVerification
-
-    evaluator = IdentityVerification(similarity_thresholds=[60, 99])
-
-    pairs = pd.DataFrame({
-        'source-subject-id': ['s0', 's0', 's0', 's0', 's1', 's1', 's1', 's1', 's1', 's2'],
-        'source-subject-data-sample': ['s00', 's00', 's00', 's00', 's10', 's10', 's10', 's11', 's11', 's20'],
-        'target-subject-id': ['s1', 's1', 's2', 's3', 's1', 's2', 's3', 's2', 's3', 's3'],
-        'target-subject-data-sample': ['s10', 's11', 's20', 's30', 's11', 's20', 's30', 's20', 's30', 's30']
-    })
-
-    subjects_sensitive_features = pd.DataFrame({
-        'subject-id': ['s0', 's1', 's2', 's3'],
-        'gender': ['female', 'male', 'female', 'female']
-    })
-
-    class FaceCompare:
-        # a dummy selfie comparison model
-        def compare(self, pairs):
-            similarity_scores = [31.5, 16.7, 20.8, 84.4, 12.0, 15.2, 45.8, 23.5, 28.5, 44.5]
-            return similarity_scores
-
-    face_compare = FaceCompare()
-
-    credo_data = ComparisonData(
-        name="face-data",
-        pairs=pairs,
-        subjects_sensitive_features=subjects_sensitive_features
-        )
-
-    credo_model = ComparisonModel(
-        name="face-compare",
-        model_like=face_compare
-        )
-
-    pipeline = Lens(model=credo_model, assessment_data=credo_data)
 
-    pipeline.add(evaluator)
+    >>> import pandas as pd
+    >>> from credoai.lens import Lens
+    >>> from credoai.artifacts import ComparisonData, ComparisonModel
+    >>> from credoai.evaluators import IdentityVerification
+    >>> evaluator = IdentityVerification(similarity_thresholds=[60, 99])
+    >>> import doctest
+    >>> doctest.ELLIPSIS_MARKER = '-etc-'
+    >>> pairs = pd.DataFrame({
+    ...     'source-subject-id': ['s0', 's0', 's0', 's0', 's1', 's1', 's1', 's1', 's1', 's2'],
+    ...     'source-subject-data-sample': ['s00', 's00', 's00', 's00', 's10', 's10', 's10', 's11', 's11', 's20'],
+    ...     'target-subject-id': ['s1', 's1', 's2', 's3', 's1', 's2', 's3', 's2', 's3', 's3'],
+    ...     'target-subject-data-sample': ['s10', 's11', 's20', 's30', 's11', 's20', 's30', 's20', 's30', 's30']
+    ... })
+    >>> subjects_sensitive_features = pd.DataFrame({
+    ...     'subject-id': ['s0', 's1', 's2', 's3'],
+    ...     'gender': ['female', 'male', 'female', 'female']
+    ... })
+    >>> class FaceCompare:
+    ...     # a dummy selfie comparison model
+    ...     def compare(self, pairs):
+    ...         similarity_scores = [31.5, 16.7, 20.8, 84.4, 12.0, 15.2, 45.8, 23.5, 28.5, 44.5]
+    ...         return similarity_scores
+    >>> face_compare = FaceCompare()
+    >>> credo_data = ComparisonData(
+    ...     name="face-data",
+    ...     pairs=pairs,
+    ...     subjects_sensitive_features=subjects_sensitive_features
+    ...     )
+    >>> credo_model = ComparisonModel(
+    ...     name="face-compare",
+    ...     model_like=face_compare
+    ...     )
+    >>> pipeline = Lens(model=credo_model, assessment_data=credo_data)
+    >>> pipeline.add(evaluator) # doctest: +ELLIPSIS
+    -etc-
+    >>> pipeline.run() # doctest: +ELLIPSIS
+    -etc-
+    >>> pipeline.get_results() # doctest: +ELLIPSIS
+    -etc-
 
-    pipeline.run()
-    pipeline.get_results()
     """
 
     def __init__(

diff --git a/credoai/evaluators/privacy.py b/credoai/evaluators/privacy.py
@@ -72,7 +72,8 @@
 
 
 class Privacy(Evaluator):
-    """Privacy module for Credo AI.
+    """
+    Privacy module for Credo AI.
 
     This module takes in in classification model and data and provides functionality
         to perform privacy assessment

diff --git a/credoai/evaluators/ranking_fairness.py b/credoai/evaluators/ranking_fairness.py
@@ -34,7 +34,8 @@
 
 
 class RankingFairness(Evaluator):
-    """Ranking fairness evaluator for Credo AI
+    """
+    Ranking fairness evaluator for Credo AI
 
     This module takes in ranking results and provides functionality to perform fairness assessment
         The results should include rankings, sensitive features, and optionally, scores.

diff --git a/credoai/evaluators/security.py b/credoai/evaluators/security.py
@@ -30,7 +30,8 @@
 
 
 class Security(Evaluator):
-    """Security module for Credo AI.
+    """
+    Security module for Credo AI.
 
     This module takes in classification model and data and
      provides functionality to perform security assessment