From 8cfa9de08ad9a1e92430244bdd06058287229023 Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <shoffman@ibm.com>
Date: Thu, 16 May 2019 15:01:12 -0400
Subject: [PATCH 01/61] Initial sklearn-compatible datasets and metrics

---
 aif360/sklearn/__init__.py                 |   0
 aif360/sklearn/datasets/__init__.py        |   2 +
 aif360/sklearn/datasets/openml_datasets.py | 140 +++++++++++++
 aif360/sklearn/datasets/utils.py           | 108 +++++++++++
 aif360/sklearn/metrics/__init__.py         |   1 +
 aif360/sklearn/metrics/metrics.py          | 216 +++++++++++++++++++++
 docs/source/conf.py                        |   1 +
 7 files changed, 468 insertions(+)
 create mode 100644 aif360/sklearn/__init__.py
 create mode 100644 aif360/sklearn/datasets/__init__.py
 create mode 100644 aif360/sklearn/datasets/openml_datasets.py
 create mode 100644 aif360/sklearn/datasets/utils.py
 create mode 100644 aif360/sklearn/metrics/__init__.py
 create mode 100644 aif360/sklearn/metrics/metrics.py

diff --git a/aif360/sklearn/__init__.py b/aif360/sklearn/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/aif360/sklearn/datasets/__init__.py b/aif360/sklearn/datasets/__init__.py
new file mode 100644
index 00000000..1a5a27f0
--- /dev/null
+++ b/aif360/sklearn/datasets/__init__.py
@@ -0,0 +1,2 @@
+from aif360.sklearn.datasets.utils import *
+from aif360.sklearn.datasets.openml_datasets import *
diff --git a/aif360/sklearn/datasets/openml_datasets.py b/aif360/sklearn/datasets/openml_datasets.py
new file mode 100644
index 00000000..b902b436
--- /dev/null
+++ b/aif360/sklearn/datasets/openml_datasets.py
@@ -0,0 +1,140 @@
+import os
+
+import numpy as np
+import pandas as pd
+from sklearn.datasets import fetch_openml
+
+from aif360.sklearn.datasets.utils import standarize_dataset
+
+
+# cache location
+DATA_HOME = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+                         '..', 'data', 'raw')
+# name -> data_id mapping
+DATA_ID = {'adult': 1590,
+           'german': 31,
+           'bank': 1461  # TODO: this seems to be an old version
+}
+
+def fetch_and_format_openml(name):
+    """Fetch openml dataset by name and format categorical features.
+
+    Args:
+        name ({'adult', 'german', or 'bank'}): Name of OpenML dataset. Converted
+            to data_id using `DATA_ID` mapping.
+
+    Returns:
+        pandas.DataFrame: A DataFrame containing all data, including target,
+            with categorical features converted to 'category' dtypes.
+    """
+    def categorize(item):
+        return cats[int(item)] if not pd.isna(item) else item
+
+    data_id = DATA_ID[name]
+    data = fetch_openml(data_id=data_id, data_home=DATA_HOME, target_column=None)
+    df = pd.DataFrame(data.data, columns=data.feature_names)
+
+    for col, cats in data.categories.items():
+        df[col] = df[col].apply(categorize).astype('category')
+
+    return df
+
+def load_adult(usecols=[], dropcols=[], numeric_only=False, dropna=True):
+    """Load the Adult Census Income Dataset.
+
+    Args:
+        usecols (single label or list-like, optional): Column name(s) to keep.
+            All others are dropped.
+        dropcols (single label or list-like, optional): Column name(s) to drop.
+        numeric_only (bool): Drop all non-numeric feature columns.
+        dropna (bool): Drop rows with NAs.
+
+    Returns:
+        namedtuple: Tuple containing X, y, and sample_weights for the Adult
+            dataset accessible by index or name.
+
+    Examples:
+        >>> adult = load_adult()
+        >>> adult.X.shape
+        (45222, 13)
+
+        >>> adult_num = load_adult(numeric_only=True)
+        >>> adult_num.X.shape
+        (48842, 5)
+
+        >>> privileged = adult.xs('White', level='race', drop_level=False)
+        >>> privileged = adult.query('race == "White"')
+    """
+    return standarize_dataset(fetch_and_format_openml('adult'),
+                              protected_attributes=['race', 'sex'],
+                              target='class', pos_label='>50K',
+                              sample_weight='fnlwgt', usecols=usecols,
+                              dropcols=dropcols, numeric_only=numeric_only,
+                              dropna=dropna)
+
+def load_german(usecols=[], dropcols=[], numeric_only=False, dropna=True):
+    """Load the German Credit Dataset.
+
+    Args:
+        usecols (single label or list-like, optional): Column name(s) to keep.
+            All others are dropped.
+        dropcols (single label or list-like, optional): Column name(s) to drop.
+        numeric_only (bool): Drop all non-numeric feature columns.
+        dropna (bool): Drop rows with NAs.
+
+    Returns:
+        namedtuple: Tuple containing X and y for the German dataset accessible
+            by index or name.
+
+    Examples:
+        >>> german = load_german()
+        >>> german.X.shape
+        (1000, 21)
+
+        >>> german_num = load_german(numeric_only=True)
+        >>> german_num.X.shape
+        (1000, 7)
+    """
+    df = fetch_and_format_openml('german')
+    # Note: marital_status directly implies sex. i.e. 'div/dep/mar' => 'female'
+    # and all others => 'male'
+    personal_status = df.pop('personal_status').str.split(expand=True)
+    personal_status.columns = ['sex', 'marital_status']
+    df = df.join(personal_status.astype('category'))
+    return standarize_dataset(df, protected_attributes=['sex', 'age'],
+                              target='class', pos_label='good',
+                              usecols=usecols, dropcols=dropcols,
+                              numeric_only=numeric_only, dropna=dropna)
+
+def load_bank(usecols=[], dropcols='duration', numeric_only=False, dropna=False):
+    """Load the Bank Marketing Dataset.
+
+    Args:
+        usecols (single label or list-like, optional): Column name(s) to keep.
+            All others are dropped.
+        dropcols (single label or list-like, optional): Column name(s) to drop.
+        numeric_only (bool): Drop all non-numeric feature columns.
+        dropna (bool): Drop rows with NAs.
+
+    Returns:
+        namedtuple: Tuple containing X and y for the Bank dataset accessible by
+            index or name.
+
+    Examples:
+        >>> bank = load_bank()
+        >>> bank.X.shape
+        (45211, 15)
+
+        >>> bank_num = load_bank(numeric_only=True)
+        >>> bank_num.X.shape
+        (45211, 6)
+    """
+    df = fetch_and_format_openml('bank')
+    df.columns = ['age', 'job', 'marital', 'education', 'default', 'balance',
+                  'housing', 'loan', 'contact', 'day', 'month', 'duration',
+                  'campaign', 'pdays', 'previous', 'poutcome', 'y']
+    # df = df.replace({'unknown': None})  # TODO: this messes up the categories
+    # df.select_dtypes('object').astype('category', inplace=True)
+    return standarize_dataset(df, protected_attributes=['age'], target='y',
+                              pos_label='2', usecols=usecols, dropcols=dropcols,
+                              numeric_only=numeric_only, dropna=dropna)
diff --git a/aif360/sklearn/datasets/utils.py b/aif360/sklearn/datasets/utils.py
new file mode 100644
index 00000000..65239813
--- /dev/null
+++ b/aif360/sklearn/datasets/utils.py
@@ -0,0 +1,108 @@
+from collections import namedtuple
+
+import numpy as np
+import pandas as pd
+from pandas.core.dtypes.common import is_list_like
+from sklearn.compose import make_column_transformer
+from sklearn.preprocessing import OneHotEncoder
+
+# TODO: binarize protected_attributes option?
+def standarize_dataset(df, *, protected_attributes, target, pos_label=None,
+                       sample_weight=None, usecols=[], dropcols=[],
+                       numeric_only=False, dropna=True):
+    """Separate data, targets, and possibly sample weights and populate
+    protected attributes as sample properties.
+
+    Args:
+        df (pandas.DataFrame): DataFrame with features and target together.
+        protected_attributes (single label or list-like): Label or list of
+            labels corresponding to protected attribute columns. Even if these
+            are dropped from the features, they remain in the index.
+        target (single label or list-like): Column label of the target (outcome)
+            variable.
+        pos_label (scalar, list-like, or function, optional): A value, list of
+            values, or function designating the positive binary label from the
+            raw data.
+        sample_weight (single label, optional): Name of the column containing
+            sample weights.
+        usecols (single label or list-like, optional): Column(s) to keep. All
+            others are dropped.
+        dropcols (single label or list-like, optional): Column(s) to drop.
+        numeric_only (bool): Drop all non-numeric feature columns.
+        dropna (bool): Drop rows with NAs.
+
+    Returns:
+        (X, y, [sample_weight]):
+
+            * `pandas.DataFrame`: Feature array.
+
+            * `pandas.DataFrame` or `pandas.Series`: Target array.
+
+            * `pandas.Series`, optional: Sample weights.
+
+    Note:
+        The order of execution for the dropping parameters is: dropcols ->
+        usecols -> numeric_only -> dropna.
+
+    Examples:
+        >>> import pandas as pd
+        >>> from sklearn.linear_model import LinearRegression
+
+        >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=['X', 'y', 'Z'])
+        >>> train = standarize_dataset(df, protected_attributes='Z', target='y')
+        >>> reg = LinearRegression().fit(*train)
+
+        >>> import numpy as np
+        >>> from sklearn.datasets import make_classification
+        >>> from sklearn.model_selection import train_test_split
+        >>> df = pd.DataFrame(np.hstack(make_classification(n_features=5)))
+        >>> X, y = standarize_dataset(df, protected_attributes=0, target=5)
+        >>> X_tr, X_te, y_tr, y_te = train_test_split(X, y)
+    """
+    df = df.set_index(protected_attributes, drop=False)  # append=True?
+
+    y = df.pop(target)
+    if pos_label is not None:
+        if not callable(pos_label):
+            pos = pos_label if is_list_like(pos_label) else [pos_label]
+            pos = np.array(pos)
+            # find all instances which match any of the favorable classes
+            def pos_label(val):
+                # return np.logical_or.reduce(np.equal.outer(pos, col), axis=(0, 2))
+                return np.logical_or.reduce(pos == val)
+
+        # TODO: won't work for multilabel (target is list) case, try DataFrame.eval()?
+        y = y.apply(pos_label).astype('int')
+
+    # Column-wise drops
+    df = df.drop(dropcols, axis=1)
+    if usecols:
+        df = df[usecols]
+    if numeric_only:
+        df = df.select_dtypes(['number', 'bool'])
+        # upcast all feature dimensions to a consistent numerical dtype
+        df = df.apply(pd.to_numeric, axis=1)
+    # Index-wise drops
+    if dropna:
+        notna = df.notna().all(axis=1) & y.notna()
+        df = df.loc[notna]
+        y = y.loc[notna]
+
+    if sample_weight is not None:
+        sample_weight = df.pop(sample_weight)
+        # return namedtuple('Dataset', ['X', 'y', 'sample_weight'])(df, y, sample_weight)
+        # TODO: is this less readable?
+        return namedtuple('Dataset', 'X y sample_weight')(df, y, sample_weight)
+    return namedtuple('Dataset', ['X', 'y'])(df, y)
+
+def make_onehot_transformer(X):
+    """Shortcut for encoding categorical features as one-hot vectors.
+
+    Note: This changes the column order as well as removes DataFrame formatting.
+
+    Returns:
+        sklearn.compose.ColumnTransformer: Class capable of transforming
+            categorical features in X to one-hot features.
+    """
+    return make_column_transformer((OneHotEncoder(), X.dtypes == 'category'),
+                                   remainder='passthrough')
diff --git a/aif360/sklearn/metrics/__init__.py b/aif360/sklearn/metrics/__init__.py
new file mode 100644
index 00000000..ceaef288
--- /dev/null
+++ b/aif360/sklearn/metrics/__init__.py
@@ -0,0 +1 @@
+from aif360.sklearn.metrics.metrics import *
diff --git a/aif360/sklearn/metrics/metrics.py b/aif360/sklearn/metrics/metrics.py
new file mode 100644
index 00000000..ced11a34
--- /dev/null
+++ b/aif360/sklearn/metrics/metrics.py
@@ -0,0 +1,216 @@
+from functools import partial
+
+import numpy as np
+from sklearn.metrics import make_scorer, recall_score
+from sklearn.neighbors import NearestNeighbors
+
+
+# # ============================== VALIDATORS ====================================
+# def validate_index_match(arg1, arg2):
+#     """
+#     Raises:
+#         ValueError: If arg1 and arg2 do not have equivalent indices.
+#     """
+#     if not arg1.index.equals(arg2.index):
+#         raise ValueError("Indices must match to perform a valid comparison.")
+
+
+# ============================= META-METRICS ===================================
+def difference(func, y_true, y_pred=None, *, priv_expr):
+    """Compute the difference between unprivileged and privileged subsets for an
+    arbitrary metric.
+
+    Note: The optimal value of a difference is 0. To make it a scorer, one must
+    take the absolute value and set `greater_is_better` to False.
+
+    Unprivileged group is taken to be the inverse of the privileged group.
+
+    Args:
+        func (function): A metric function from `aif360.sklearn.metrics` or
+            `sklearn.metrics`. Keyword args should be filled in with partial.
+        y_true (pandas.Series): Ground truth (correct) target values.
+        y_pred (array-like, optional): Estimated targets as returned by a
+            classifier.
+        priv_expr (string, keyword-only): A query expression describing the
+            privileged group (see `pandas.DataFrame.eval` and
+            `pandas.DataFrame.query` for details).
+
+    Returns:
+        scalar: Difference in metric value for unprivileged and privileged groups.
+
+    Examples:
+        >>> X, y = load_german(numeric_only=True)
+        >>> y_pred = LogisticRegression().fit(X, y).predict(X)
+        >>> difference(precision_score, y, y_pred, priv_expr='sex == "male"')
+        -0.06955430006277463
+    """
+    # Note: provide blank name because if index name clashes with column name,
+    # column name gets preference
+    priv = y_true.to_frame('').eval(priv_expr)
+    if y_pred is None:
+        return func(y_true[~priv]) - func(y_true[priv])
+    return func(y_true[~priv], y_pred[~priv]) - func(y_true[priv], y_pred[priv])
+
+def ratio(func, y_true, y_pred=None, *, priv_expr):
+    """Compute the ratio between unprivileged and privileged subsets for an
+    arbitrary metric.
+
+    Note: The optimal value of a ratio is 1. To make it a scorer, one must
+    subtract 1, take the absolute value, and set `greater_is_better` to False.
+
+    Unprivileged group is taken to be the inverse of the privileged group.
+
+    Args:
+        func (function): A metric function from `aif360.sklearn.metrics` or
+            `sklearn.metrics`. Keyword args should be filled in with partial.
+        y_true (pandas.Series): Ground truth (correct) target values.
+        y_pred (array-like, optional): Estimated targets as returned by a
+            classifier.
+        priv_expr (string, keyword-only): A query expression describing the
+            privileged group (see `pandas.DataFrame.eval` and
+            `pandas.DataFrame.query` for details).
+
+    Returns:
+        scalar: Ratio of metric values for unprivileged and privileged groups.
+    """
+    # Note: provide blank name because if index name clashes with column name,
+    # column name gets preference
+    priv = y_true.to_frame('').eval(priv_expr)
+    if y_pred is None:
+        return func(y_true[~priv]) - func(y_true[priv])
+    return func(y_true[~priv], y_pred[~priv]) / func(y_true[priv], y_pred[priv])
+
+
+# =========================== SCORER FACTORIES =================================
+def make_difference_scorer(func):
+    return make_scorer(lambda y, y_pred, **kw: abs(func(y, y_pred, **kw)),
+                       greater_is_better=False)
+
+def make_ratio_scorer(func):
+    return make_scorer(lambda y, y_pred, **kw: abs(func(y, y_pred, **kw) - 1),
+                       greater_is_better=False)
+
+
+# ================================ HELPERS =====================================
+def specificity_score(y_true, y_pred, neg_label=0, sample_weight=None):
+    """Compute the specificity or true negative rate.
+
+    Args:
+        y_true:
+        y_pred:
+        neg_label (scalar, optional): The class to report. Note: the data should
+            be binary.
+    """
+    # neg_labels = np.setdiff1d(np.unique(np.hstack((y_true, y_pred))),
+    #                           np.array([pos_label]))
+    # if neg_labels.size != 2:
+    #     raise ValueError("This function only applies to binary classification.")
+    return recall_score(y_true, y_pred, pos_label=neg_label,
+                        sample_weight=sample_weight)
+
+def base_rate(y, y_pred=None, pos_label=1, sample_weight=None):
+    y = np.array(y)
+    if sample_weight is not None:
+        return ((y == pos_label) * sample_weight).sum() / sample_weight.sum()
+    return (y == pos_label).sum() / len(y)
+
+def selection_rate(y_true, y_pred, pos_label=1, sample_weight=None):
+    return base_rate(y_pred, pos_label=pos_label, sample_weight=sample_weight)
+
+
+# ============================ GROUP FAIRNESS ==================================
+def statistical_parity_difference(*y, priv_expr, pos_label=1, sample_weight=None):
+    rate = base_rate if len(y) == 1 or y[1] is None else selection_rate
+    rate = partial(rate, pos_label=pos_label, sample_weight=sample_weight)
+    return difference(rate, *y, priv_expr=priv_expr)
+
+def disparate_impact_ratio(*y, priv_expr, pos_label=1, sample_weight=None):
+    rate = base_rate if len(y) == 1 or y[1] is None else selection_rate
+    rate = partial(rate, pos_label=pos_label, sample_weight=sample_weight)
+    return ratio(rate, *y, priv_expr=priv_expr)
+
+
+def equal_opportunity_difference(y_true, y_pred, priv_expr, pos_label=1,
+                                 sample_weight=None):
+    rec = partial(recall_score, pos_label=pos_label,
+                  sample_weight=sample_weight)
+    return difference(rec, y_true, y_pred, priv_expr=priv_expr)
+
+def average_odds_difference(y_true, y_pred, priv_expr, pos_label=1,
+                            sample_weight=None):
+    tnr = partial(specificity_score, labels=labels, pos_label=pos_label,
+                  sample_weight=sample_weight)
+    tpr = partial(recall_score, labels=labels, pos_label=pos_label,
+                  sample_weight=sample_weight)
+    return 0.5 * (difference(tnr, y_true, y_pred, priv_expr=priv_expr)
+                + difference(tpr, y_true, y_pred, priv_expr=priv_expr))
+
+def average_odds_error(y_true, y_pred, priv_expr, pos_label=1,
+                       sample_weight=None):
+    tnr = partial(specificity_score, pos_label=pos_label,
+                  sample_weight=sample_weight)
+    tpr = partial(recall_score, pos_label=pos_label, sample_weight=sample_weight)
+    return 0.5 * (abs(difference(tnr, y_true, y_pred, priv_expr=priv_expr))
+                + abs(difference(tpr, y_true, y_pred, priv_expr=priv_expr)))
+
+
+# ================================ INDICES =====================================
+def generalized_entropy_index(b, alpha=2):
+    if alpha == 0:
+        return -(np.log(b / b.mean()) / b.mean()).mean()
+    elif alpha == 1:
+        # moving the b inside the log allows for 0 values
+        return (np.log((b / b.mean())**b) / b.mean()).mean()
+    else:
+        return ((b / b.mean())**alpha - 1).mean() / (alpha * (alpha - 1))
+
+def generalized_entropy_error(y_true, y_pred, alpha=2, pos_label=1):
+                              # sample_weight=None):
+    b = 1 + (y_pred == pos_label) - (y_true == pos_label)
+    return generalized_entropy_index(b, alpha=alpha)
+
+def between_group_generalized_entropy_error(priv_expr, y_true, y_pred, alpha=2,
+                                            pos_label=1):
+    b = np.empty_like(y_true, dtype='float')
+    priv = y_true.to_frame().eval(priv_expr)
+    b[priv] = (1 + (y_pred.loc[priv] == pos_label)
+                 - (y_true.loc[priv] == pos_label)).mean()
+    b[~priv] = (1 + (y_pred.loc[~priv] == pos_label)
+                  - (y_true.loc[~priv] == pos_label)).mean()
+    return generalized_entropy_index(b, alpha=alpha)
+
+def theil_index(b):
+    return generalized_entropy_index(b, alpha=1)
+
+def coefficient_of_variation(b):
+    return 2 * np.sqrt(generalized_entropy_index(b, alpha=2))
+
+
+# ========================== INDIVIDUAL FAIRNESS ===============================
+# TODO: not technically a scorer but you should be allowed to score transformers
+# Is consistency_difference posible?
+def consistency_score(X, y, n_neighbors=5):
+    # learn a KNN on the features
+    nbrs = NearestNeighbors(n_neighbors, algorithm='ball_tree').fit(X)
+    _, indices = nbrs.kneighbors(X)
+
+    # compute consistency score
+    return 1 - abs(y - y[indices].mean(axis=1)).mean()
+
+
+# ================================ ALIASES =====================================
+def sensitivity_score(y_true, y_pred, pos_label=1, sample_weight=None):
+    """Alias of `sklearn.metrics.recall_score` for binary classes only."""
+    return recall_score(y_true, y_pred, pos_label=pos_label,
+                        sample_weight=sample_weight)
+
+# def false_negative_rate_error(y_true, y_pred, pos_label=1, sample_weight=None):
+#     return 1 - recall_score(y_true, y_pred, pos_label=pos_label,
+#                             sample_weight=sample_weight)
+
+# def false_positive_rate_error(y_true, y_pred, pos_label=1, sample_weight=None):
+#     return 1 - specificity_score(y_true, y_pred, pos_label=pos_label,
+#                                  sample_weight=sample_weight)
+
+mean_difference = statistical_parity_difference
+mean_difference.__doc__ = """Alias of :meth:`statistical_parity_difference`."""
diff --git a/docs/source/conf.py b/docs/source/conf.py
index c96ac4c0..1c302d1e 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -41,6 +41,7 @@
 intersphinx_mapping = {'numpy': ('http://docs.scipy.org/doc/numpy/', None),
     'scipy': ('http://docs.scipy.org/doc/scipy/reference/', None),
     'pandas': ('https://pandas.pydata.org/pandas-docs/stable/', None),
+    'sklearn': ('https://scikit-learn.org/stable/modules/generated/', None),
     'python': ('https://docs.python.org/{}.{}'.format(*sys.version_info), None)}
 
 autoclass_content = 'both'

From 1f4ae57756be3b23808ce84f0616bb24b2b0ce6f Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <shoffman@ibm.com>
Date: Thu, 16 May 2019 17:43:44 -0400
Subject: [PATCH 02/61] added initial dataset tests

---
 aif360/sklearn/datasets/openml_datasets.py |  3 -
 aif360/sklearn/datasets/utils.py           | 19 +++--
 aif360/sklearn/tests/test_datasets.py      | 84 ++++++++++++++++++++++
 3 files changed, 96 insertions(+), 10 deletions(-)
 create mode 100644 aif360/sklearn/tests/test_datasets.py

diff --git a/aif360/sklearn/datasets/openml_datasets.py b/aif360/sklearn/datasets/openml_datasets.py
index b902b436..9d9986ea 100644
--- a/aif360/sklearn/datasets/openml_datasets.py
+++ b/aif360/sklearn/datasets/openml_datasets.py
@@ -61,9 +61,6 @@ def load_adult(usecols=[], dropcols=[], numeric_only=False, dropna=True):
         >>> adult_num = load_adult(numeric_only=True)
         >>> adult_num.X.shape
         (48842, 5)
-
-        >>> privileged = adult.xs('White', level='race', drop_level=False)
-        >>> privileged = adult.query('race == "White"')
     """
     return standarize_dataset(fetch_and_format_openml('adult'),
                               protected_attributes=['race', 'sex'],
diff --git a/aif360/sklearn/datasets/utils.py b/aif360/sklearn/datasets/utils.py
index 65239813..84d3551e 100644
--- a/aif360/sklearn/datasets/utils.py
+++ b/aif360/sklearn/datasets/utils.py
@@ -32,13 +32,16 @@ def standarize_dataset(df, *, protected_attributes, target, pos_label=None,
         dropna (bool): Drop rows with NAs.
 
     Returns:
-        (X, y, [sample_weight]):
+        namedtuple:
 
-            * `pandas.DataFrame`: Feature array.
+            A tuple-like object where items can be accessed by index or name.
+            Contains the following attributes:
 
-            * `pandas.DataFrame` or `pandas.Series`: Target array.
+            * `pandas.DataFrame`: X: Feature array.
 
-            * `pandas.Series`, optional: Sample weights.
+            * `pandas.DataFrame` or `pandas.Series`: y: Target array.
+
+            * `pandas.Series`, optional: sample_weight: Sample weights.
 
     Note:
         The order of execution for the dropping parameters is: dropcols ->
@@ -77,6 +80,9 @@ def pos_label(val):
     # Column-wise drops
     df = df.drop(dropcols, axis=1)
     if usecols:
+        if not is_list_like(usecols):
+            # make sure we don't return a Series instead of a DataFrame
+            usecols = [usecols]
         df = df[usecols]
     if numeric_only:
         df = df.select_dtypes(['number', 'bool'])
@@ -90,9 +96,8 @@ def pos_label(val):
 
     if sample_weight is not None:
         sample_weight = df.pop(sample_weight)
-        # return namedtuple('Dataset', ['X', 'y', 'sample_weight'])(df, y, sample_weight)
-        # TODO: is this less readable?
-        return namedtuple('Dataset', 'X y sample_weight')(df, y, sample_weight)
+        return namedtuple('WeightedDataset', ['X', 'y', 'sample_weight'])(
+                          df, y, sample_weight)
     return namedtuple('Dataset', ['X', 'y'])(df, y)
 
 def make_onehot_transformer(X):
diff --git a/aif360/sklearn/tests/test_datasets.py b/aif360/sklearn/tests/test_datasets.py
new file mode 100644
index 00000000..1b997c72
--- /dev/null
+++ b/aif360/sklearn/tests/test_datasets.py
@@ -0,0 +1,84 @@
+from functools import partial
+
+import numpy as np
+import pandas as pd
+import pytest
+
+from aif360.sklearn.datasets import *
+
+
+df = pd.DataFrame([[1, 2, 3, 'a'], [5, 6, 7, 'b'], [np.NaN, 10, 11, 'c']],
+                  columns=['X1', 'X2', 'y', 'Z'])
+basic = partial(standarize_dataset, df=df, protected_attributes='Z', target='y',
+                dropna=False)
+
+def test_standardize_dataset_basic():
+    dataset = basic()
+    X, y = dataset
+    X, y = dataset.X, dataset.y
+    with pytest.raises(ValueError):
+        X, y, sample_weight = dataset
+    with pytest.raises(AttributeError):
+        dataset.sample_weight
+    assert isinstance(X, pd.DataFrame)
+    assert isinstance(y, pd.Series)
+    assert X.index.equals(y.index)
+    assert X.shape == (3, 3)
+
+def test_sample_weight_basic():
+    with_weights = basic(sample_weight='X2')
+    assert len(with_weights) == 3
+    assert with_weights.X.shape == (3, 2)
+
+def test_pos_label_basic():
+    assert (basic().y == [3, 7, 11]).all()
+    assert (basic(pos_label=3).y == [1, 0, 0]).all()
+    assert (basic(pos_label=[3, 7, 11]).y == 1).all()
+    assert (basic(pos_label=lambda y: 10 > y > 5).y == [0, 1, 0]).all()
+
+def test_usecols_dropcols_basic():
+    assert basic(usecols='X1').X.columns.to_list() == ['X1']
+    assert basic(usecols=['X1', 'Z']).X.columns.to_list() == ['X1', 'Z']
+
+    assert basic(dropcols='X1').X.columns.to_list() == ['X2', 'Z']
+    assert basic(dropcols=['X1', 'Z']).X.columns.to_list() == ['X2']
+
+    assert basic(usecols='X1', dropcols=['X2']).X.columns.to_list() == ['X1']
+    with pytest.raises(KeyError):
+        basic(usecols=['X1', 'X2'], dropcols='X2')
+
+def test_dropna_basic():
+    basic_dropna = partial(standarize_dataset, df=df, protected_attributes='Z',
+                           target='y', dropna=True)
+    assert basic_dropna().X.shape == (2, 3)
+    assert basic(dropcols='X1').X.shape == (3, 2)
+
+def test_numeric_only_basic():
+    assert basic(numeric_only=True).X.shape == (3, 2)
+    assert (basic(numeric_only=True).X.dtypes == 'float').all()
+    assert basic(dropcols='Z', numeric_only=True).X.shape == (3, 2)
+    assert (basic(dropcols='X1', numeric_only=True).X.dtypes == 'int').all()
+
+def test_fetch_and_format_openml():
+    df = fetch_and_format_openml('german')
+    assert df.equals(df.select_dtypes(['number', 'category']))
+
+def test_load_adult():
+    adult = load_adult()
+    assert len(adult) == 3
+    assert adult.X.shape == (45222, 13)
+    assert load_adult(dropna=False).X.shape == (48842, 13)
+    assert load_adult(numeric_only=True).X.shape == (48842, 5)
+
+def test_load_german():
+    german = load_german()
+    assert len(german) == 2
+    assert german.X.shape == (1000, 21)
+    assert load_german(numeric_only=True).X.shape == (1000, 7)
+
+def test_load_bank():
+    bank = load_bank()
+    assert len(bank) == 2
+    assert bank.X.shape == (45211, 15)
+    assert load_bank(dropcols=[]).X.shape == (45211, 16)
+    assert load_bank(numeric_only=True).X.shape == (45211, 6)

From 2aef3fca622d12384832d91bdcfe442c799d2f75 Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <shoffman@ibm.com>
Date: Thu, 16 May 2019 20:46:30 -0400
Subject: [PATCH 03/61] fixed to_list for older pandas versions

---
 aif360/sklearn/tests/test_datasets.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/aif360/sklearn/tests/test_datasets.py b/aif360/sklearn/tests/test_datasets.py
index 1b997c72..9b00e801 100644
--- a/aif360/sklearn/tests/test_datasets.py
+++ b/aif360/sklearn/tests/test_datasets.py
@@ -37,13 +37,13 @@ def test_pos_label_basic():
     assert (basic(pos_label=lambda y: 10 > y > 5).y == [0, 1, 0]).all()
 
 def test_usecols_dropcols_basic():
-    assert basic(usecols='X1').X.columns.to_list() == ['X1']
-    assert basic(usecols=['X1', 'Z']).X.columns.to_list() == ['X1', 'Z']
+    assert basic(usecols='X1').X.columns.tolist() == ['X1']
+    assert basic(usecols=['X1', 'Z']).X.columns.tolist() == ['X1', 'Z']
 
-    assert basic(dropcols='X1').X.columns.to_list() == ['X2', 'Z']
-    assert basic(dropcols=['X1', 'Z']).X.columns.to_list() == ['X2']
+    assert basic(dropcols='X1').X.columns.tolist() == ['X2', 'Z']
+    assert basic(dropcols=['X1', 'Z']).X.columns.tolist() == ['X2']
 
-    assert basic(usecols='X1', dropcols=['X2']).X.columns.to_list() == ['X1']
+    assert basic(usecols='X1', dropcols=['X2']).X.columns.tolist() == ['X1']
     with pytest.raises(KeyError):
         basic(usecols=['X1', 'X2'], dropcols='X2')
 

From 2b1799a4bd8871f4de74c859d46dc545af9f4998 Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Fri, 17 May 2019 18:41:36 -0400
Subject: [PATCH 04/61] added metrics tests

---
 aif360/datasets/adult_dataset.py           |   2 +-
 aif360/sklearn/datasets/openml_datasets.py |   1 -
 aif360/sklearn/metrics/metrics.py          | 108 +++++++++++----------
 aif360/sklearn/tests/test_metrics.py       |  71 ++++++++++++++
 4 files changed, 129 insertions(+), 53 deletions(-)
 create mode 100644 aif360/sklearn/tests/test_metrics.py

diff --git a/aif360/datasets/adult_dataset.py b/aif360/datasets/adult_dataset.py
index e0c432b1..ea4b73f7 100644
--- a/aif360/datasets/adult_dataset.py
+++ b/aif360/datasets/adult_dataset.py
@@ -99,7 +99,7 @@ def __init__(self, label_name='income-per-year',
             import sys
             sys.exit(1)
 
-        df = pd.concat([train, test], ignore_index=True)
+        df = pd.concat([test, train], ignore_index=True)
 
         super(AdultDataset, self).__init__(df=df, label_name=label_name,
             favorable_classes=favorable_classes,
diff --git a/aif360/sklearn/datasets/openml_datasets.py b/aif360/sklearn/datasets/openml_datasets.py
index 9d9986ea..51646a73 100644
--- a/aif360/sklearn/datasets/openml_datasets.py
+++ b/aif360/sklearn/datasets/openml_datasets.py
@@ -1,6 +1,5 @@
 import os
 
-import numpy as np
 import pandas as pd
 from sklearn.datasets import fetch_openml
 
diff --git a/aif360/sklearn/metrics/metrics.py b/aif360/sklearn/metrics/metrics.py
index ced11a34..2a4da306 100644
--- a/aif360/sklearn/metrics/metrics.py
+++ b/aif360/sklearn/metrics/metrics.py
@@ -16,7 +16,7 @@
 
 
 # ============================= META-METRICS ===================================
-def difference(func, y_true, y_pred=None, *, priv_expr):
+def difference(func, y, *args, priv_expr, sample_weight=None, **kwargs):
     """Compute the difference between unprivileged and privileged subsets for an
     arbitrary metric.
 
@@ -27,13 +27,15 @@ def difference(func, y_true, y_pred=None, *, priv_expr):
 
     Args:
         func (function): A metric function from `aif360.sklearn.metrics` or
-            `sklearn.metrics`. Keyword args should be filled in with partial.
-        y_true (pandas.Series): Ground truth (correct) target values.
-        y_pred (array-like, optional): Estimated targets as returned by a
-            classifier.
+            `sklearn.metrics`.
+        y (pandas.Series): Outcome vector with protected attributes as index.
+        *args: Additional positional args to be passed through to `func`.
         priv_expr (string, keyword-only): A query expression describing the
             privileged group (see `pandas.DataFrame.eval` and
             `pandas.DataFrame.query` for details).
+        sample_weight (array-like, optional): Sample weights passed through to
+            `func`.
+        **kwargs: Additional keyword args to be passed through to `func`.
 
     Returns:
         scalar: Difference in metric value for unprivileged and privileged groups.
@@ -44,14 +46,18 @@ def difference(func, y_true, y_pred=None, *, priv_expr):
         >>> difference(precision_score, y, y_pred, priv_expr='sex == "male"')
         -0.06955430006277463
     """
+    args = (y,) + args
     # Note: provide blank name because if index name clashes with column name,
     # column name gets preference
-    priv = y_true.to_frame('').eval(priv_expr)
-    if y_pred is None:
-        return func(y_true[~priv]) - func(y_true[priv])
-    return func(y_true[~priv], y_pred[~priv]) - func(y_true[priv], y_pred[priv])
+    idx = y.to_frame('').eval(priv_expr)
+    unpriv = map(lambda a: a[~idx], args)
+    priv = map(lambda a: a[idx], args)
+    if sample_weight is not None:
+        return (func(*unpriv, sample_weight=sample_weight[~idx], **kwargs)
+              - func(*priv, sample_weight=sample_weight[idx], **kwargs))
+    return func(*unpriv, **kwargs) - func(*priv, **kwargs)
 
-def ratio(func, y_true, y_pred=None, *, priv_expr):
+def ratio(func, y, *args, priv_expr, sample_weight=None, **kwargs):
     """Compute the ratio between unprivileged and privileged subsets for an
     arbitrary metric.
 
@@ -62,23 +68,27 @@ def ratio(func, y_true, y_pred=None, *, priv_expr):
 
     Args:
         func (function): A metric function from `aif360.sklearn.metrics` or
-            `sklearn.metrics`. Keyword args should be filled in with partial.
-        y_true (pandas.Series): Ground truth (correct) target values.
-        y_pred (array-like, optional): Estimated targets as returned by a
-            classifier.
+            `sklearn.metrics`.
+        y (pandas.Series): Outcome vector with protected attributes as index.
+        *args: Additional positional args to be passed through to `func`.
         priv_expr (string, keyword-only): A query expression describing the
             privileged group (see `pandas.DataFrame.eval` and
             `pandas.DataFrame.query` for details).
+        sample_weight (array-like, optional): Sample weights passed through to
+            `func`.
+        **kwargs: Additional keyword args to be passed through to `func`.
 
     Returns:
         scalar: Ratio of metric values for unprivileged and privileged groups.
     """
-    # Note: provide blank name because if index name clashes with column name,
-    # column name gets preference
-    priv = y_true.to_frame('').eval(priv_expr)
-    if y_pred is None:
-        return func(y_true[~priv]) - func(y_true[priv])
-    return func(y_true[~priv], y_pred[~priv]) / func(y_true[priv], y_pred[priv])
+    args = (y,) + args
+    idx = y.to_frame('').eval(priv_expr)
+    unpriv = map(lambda a: a[~idx], args)
+    priv = map(lambda a: a[idx], args)
+    if sample_weight is not None:
+        return (func(*unpriv, sample_weight=sample_weight[~idx], **kwargs)
+              / func(*priv, sample_weight=sample_weight[idx], **kwargs))
+    return func(*unpriv, **kwargs) / func(*priv, **kwargs)
 
 
 # =========================== SCORER FACTORIES =================================
@@ -109,10 +119,7 @@ def specificity_score(y_true, y_pred, neg_label=0, sample_weight=None):
                         sample_weight=sample_weight)
 
 def base_rate(y, y_pred=None, pos_label=1, sample_weight=None):
-    y = np.array(y)
-    if sample_weight is not None:
-        return ((y == pos_label) * sample_weight).sum() / sample_weight.sum()
-    return (y == pos_label).sum() / len(y)
+    return np.average(y == pos_label, weights=sample_weight)
 
 def selection_rate(y_true, y_pred, pos_label=1, sample_weight=None):
     return base_rate(y_pred, pos_label=pos_label, sample_weight=sample_weight)
@@ -121,37 +128,35 @@ def selection_rate(y_true, y_pred, pos_label=1, sample_weight=None):
 # ============================ GROUP FAIRNESS ==================================
 def statistical_parity_difference(*y, priv_expr, pos_label=1, sample_weight=None):
     rate = base_rate if len(y) == 1 or y[1] is None else selection_rate
-    rate = partial(rate, pos_label=pos_label, sample_weight=sample_weight)
-    return difference(rate, *y, priv_expr=priv_expr)
+    return difference(rate, *y, priv_expr=priv_expr, pos_label=pos_label,
+                      sample_weight=sample_weight)
 
 def disparate_impact_ratio(*y, priv_expr, pos_label=1, sample_weight=None):
     rate = base_rate if len(y) == 1 or y[1] is None else selection_rate
-    rate = partial(rate, pos_label=pos_label, sample_weight=sample_weight)
-    return ratio(rate, *y, priv_expr=priv_expr)
+    return ratio(rate, *y, priv_expr=priv_expr, pos_label=pos_label,
+                 sample_weight=sample_weight)
 
 
 def equal_opportunity_difference(y_true, y_pred, priv_expr, pos_label=1,
                                  sample_weight=None):
-    rec = partial(recall_score, pos_label=pos_label,
-                  sample_weight=sample_weight)
-    return difference(rec, y_true, y_pred, priv_expr=priv_expr)
+    return difference(recall_score, y_true, y_pred, priv_expr=priv_expr,
+                      pos_label=pos_label, sample_weight=sample_weight)
 
-def average_odds_difference(y_true, y_pred, priv_expr, pos_label=1,
+def average_odds_difference(y_true, y_pred, priv_expr, pos_label=1, neg_label=0,
                             sample_weight=None):
-    tnr = partial(specificity_score, labels=labels, pos_label=pos_label,
-                  sample_weight=sample_weight)
-    tpr = partial(recall_score, labels=labels, pos_label=pos_label,
-                  sample_weight=sample_weight)
-    return 0.5 * (difference(tnr, y_true, y_pred, priv_expr=priv_expr)
-                + difference(tpr, y_true, y_pred, priv_expr=priv_expr))
-
-def average_odds_error(y_true, y_pred, priv_expr, pos_label=1,
+    tnr_diff = difference(specificity_score, y_true, y_pred, priv_expr=priv_expr,
+                          neg_label=neg_label, sample_weight=sample_weight)
+    tpr_diff = difference(recall_score, y_true, y_pred, priv_expr=priv_expr,
+                          pos_label=pos_label, sample_weight=sample_weight)
+    return (tpr_diff - tnr_diff) / 2
+
+def average_odds_error(y_true, y_pred, priv_expr, pos_label=1, neg_label=0,
                        sample_weight=None):
-    tnr = partial(specificity_score, pos_label=pos_label,
-                  sample_weight=sample_weight)
-    tpr = partial(recall_score, pos_label=pos_label, sample_weight=sample_weight)
-    return 0.5 * (abs(difference(tnr, y_true, y_pred, priv_expr=priv_expr))
-                + abs(difference(tpr, y_true, y_pred, priv_expr=priv_expr)))
+    tnr_diff = difference(specificity_score, y_true, y_pred, priv_expr=priv_expr,
+                          neg_label=neg_label, sample_weight=sample_weight)
+    tpr_diff = difference(recall_score, y_true, y_pred, priv_expr=priv_expr,
+                          pos_label=pos_label, sample_weight=sample_weight)
+    return (abs(tnr_diff) + abs(tpr_diff)) / 2
 
 
 # ================================ INDICES =====================================
@@ -169,14 +174,14 @@ def generalized_entropy_error(y_true, y_pred, alpha=2, pos_label=1):
     b = 1 + (y_pred == pos_label) - (y_true == pos_label)
     return generalized_entropy_index(b, alpha=alpha)
 
-def between_group_generalized_entropy_error(priv_expr, y_true, y_pred, alpha=2,
+def between_group_generalized_entropy_error(y_true, y_pred, priv_expr, alpha=2,
                                             pos_label=1):
     b = np.empty_like(y_true, dtype='float')
-    priv = y_true.to_frame().eval(priv_expr)
-    b[priv] = (1 + (y_pred.loc[priv] == pos_label)
-                 - (y_true.loc[priv] == pos_label)).mean()
-    b[~priv] = (1 + (y_pred.loc[~priv] == pos_label)
-                  - (y_true.loc[~priv] == pos_label)).mean()
+    priv = y_true.to_frame('').eval(priv_expr)
+    b[priv] = (1 + (y_pred[priv] == pos_label)
+                 - (y_true[priv] == pos_label)).mean()
+    b[~priv] = (1 + (y_pred[~priv] == pos_label)
+                  - (y_true[~priv] == pos_label)).mean()
     return generalized_entropy_index(b, alpha=alpha)
 
 def theil_index(b):
@@ -189,6 +194,7 @@ def coefficient_of_variation(b):
 # ========================== INDIVIDUAL FAIRNESS ===============================
 # TODO: not technically a scorer but you should be allowed to score transformers
 # Is consistency_difference posible?
+# use sample_weight?
 def consistency_score(X, y, n_neighbors=5):
     # learn a KNN on the features
     nbrs = NearestNeighbors(n_neighbors, algorithm='ball_tree').fit(X)
diff --git a/aif360/sklearn/tests/test_metrics.py b/aif360/sklearn/tests/test_metrics.py
new file mode 100644
index 00000000..5c263303
--- /dev/null
+++ b/aif360/sklearn/tests/test_metrics.py
@@ -0,0 +1,71 @@
+import numpy as np
+from sklearn.linear_model import LogisticRegression
+
+from aif360.datasets import AdultDataset
+from aif360.sklearn.datasets import load_adult
+from aif360.metrics import ClassificationMetric
+from aif360.sklearn.metrics import *
+
+
+X, y, sample_weight = load_adult(numeric_only=True)
+X.insert(2, 'race', X.index.get_level_values('race').to_series(index=X.index).map(
+        lambda r: r == 'White').astype('float'))
+X.insert(3, 'sex', X.index.get_level_values('sex').to_series(index=X.index).map(
+        {'Male': 1, 'Female': 0}).astype('float'))
+y_pred = LogisticRegression(solver='liblinear').fit(X, y,
+        sample_weight=sample_weight).predict(X)
+priv = 'sex == "Male"'
+adult = AdultDataset(instance_weights_name='fnlwgt', categorical_features=[],
+        features_to_keep=['age', 'education-num', 'capital-gain', 'capital-loss',
+                          'hours-per-week'], features_to_drop=[])
+adult_pred = adult.copy()
+adult_pred.labels = y_pred
+cm = ClassificationMetric(adult, adult_pred,
+                          unprivileged_groups=[{'sex': 0}],
+                          privileged_groups=[{'sex': 1}])
+
+def test_dataset_equality():
+    # print(X.shape, adult.features.shape)
+    # print(X.head())
+    # print(adult.feature_names)
+    # print(adult.features[:5])
+    assert (adult.features == X.values).all()
+
+def test_consistency():
+    assert consistency_score(X, y) == cm.consistency()
+
+def test_specificity():
+    assert specificity_score(y, y_pred, sample_weight=sample_weight) == cm.specificity()
+
+def test_selection_rate():
+    assert selection_rate(y, y_pred, sample_weight=sample_weight) == cm.selection_rate()
+
+def test_disparate_impact():
+    assert disparate_impact_ratio(y, y_pred, priv_expr=priv,
+            sample_weight=sample_weight) == cm.disparate_impact()
+
+def test_statistical_parity():
+    assert statistical_parity_difference(y, y_pred, priv_expr=priv,
+            sample_weight=sample_weight) == cm.statistical_parity_difference()
+
+def test_equal_opportunity():
+    assert equal_opportunity_difference(y, y_pred, priv_expr=priv,
+            sample_weight=sample_weight) == cm.equal_opportunity_difference()
+
+def test_average_odds_difference():
+    assert np.isclose(average_odds_difference(y, y_pred, priv_expr=priv,
+                                              sample_weight=sample_weight),
+                      cm.average_odds_difference())
+
+def test_average_odds_error():
+    assert np.isclose(average_odds_error(y, y_pred, priv_expr=priv,
+                                         sample_weight=sample_weight),
+                      cm.average_abs_odds_difference())
+
+def test_generalized_entropy_index():
+    assert np.isclose(generalized_entropy_error(y, y_pred),
+                      cm.generalized_entropy_index())
+
+def test_between_group_generalized_entropy_index():
+    assert between_group_generalized_entropy_error(y, y_pred, priv_expr=priv) \
+        == cm.between_group_generalized_entropy_index()

From 9da5abd7dc30106bba4742ad0ab5308d8c8b0c11 Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Tue, 21 May 2019 12:02:25 -0400
Subject: [PATCH 05/61] added README and docs

---
 aif360/sklearn/README.md                   | 41 ++++++++++++++++++++++
 aif360/sklearn/datasets/openml_datasets.py |  8 ++---
 aif360/sklearn/datasets/utils.py           | 15 ++++----
 aif360/sklearn/metrics/metrics.py          |  8 +++--
 docs/source/conf.py                        |  6 ++--
 docs/source/index.rst                      |  1 +
 docs/source/modules/sklearn.rst            | 21 +++++++++++
 7 files changed, 83 insertions(+), 17 deletions(-)
 create mode 100644 aif360/sklearn/README.md
 create mode 100644 docs/source/modules/sklearn.rst

diff --git a/aif360/sklearn/README.md b/aif360/sklearn/README.md
new file mode 100644
index 00000000..14a4c318
--- /dev/null
+++ b/aif360/sklearn/README.md
@@ -0,0 +1,41 @@
+## `aif360.sklearn`
+
+This is a wholly separate interface for interacting with data, viewing metrics,
+and running debiasing algorithms than the main AIF360 package. The purpose of
+this sub-package is to match scikit-learn paradigms/APIs for easier integration
+in typical machine learning workflows.
+
+To do:
+
+- [x] Reformat datasets as separate X and y (and sample_weight) DataFrame
+objects with sample properties (protected attributes) as the index
+- [ ] Load included datasets in the above format (partially done)
+  - [x] Use `sklearn.datasets.fetch_openml` to load UCI datasets (#53)
+  - [ ] COMPAS
+  - [ ] MEPS
+- [ ] Implement metrics as individual functions instead of instance methods
+(mostly done)
+  - [x] Make certain metrics compatible as sklearn scorers
+  - [ ] Generalized confusion matrix
+  - [ ] Sample distortion metrics
+- [ ] Make inprocessing algorithms compatible as sklearn `Estimator`s
+  - [ ] Adversarial debiasing
+  - [ ] Meta-fair classifier
+  - [ ] Prejudice remover
+- [ ] Make preprocessing algorithms compatible as sklearn `Transformer`s
+  - [ ] Add functionality to modify X and y (worst case: just `predict()` +
+  `transform()` separately)
+  - [ ] Disparate impact remover
+  - [ ] Learning fair representations
+  - [ ] Optimized preprocessing
+  - [ ] Reweighing
+    - [ ] Use dynamic object to pass sample_weight to estimator, etc. after they
+    are fitted
+- [ ] Make postprocessing algorithms compatible
+  - [ ] Allow `fit()` on `y_true`,`y_pred`
+  - [ ] Calibrated equalized odds postprocessing
+  - [ ] Equalized odds postprocessing
+  - [ ] Reject option classification
+- [ ] Miscellaneous:
+  - [ ] LIME encoder
+  - [ ] Explainers
diff --git a/aif360/sklearn/datasets/openml_datasets.py b/aif360/sklearn/datasets/openml_datasets.py
index 51646a73..c65f3784 100644
--- a/aif360/sklearn/datasets/openml_datasets.py
+++ b/aif360/sklearn/datasets/openml_datasets.py
@@ -24,7 +24,7 @@ def fetch_and_format_openml(name):
 
     Returns:
         pandas.DataFrame: A DataFrame containing all data, including target,
-            with categorical features converted to 'category' dtypes.
+        with categorical features converted to 'category' dtypes.
     """
     def categorize(item):
         return cats[int(item)] if not pd.isna(item) else item
@@ -50,7 +50,7 @@ def load_adult(usecols=[], dropcols=[], numeric_only=False, dropna=True):
 
     Returns:
         namedtuple: Tuple containing X, y, and sample_weights for the Adult
-            dataset accessible by index or name.
+        dataset accessible by index or name.
 
     Examples:
         >>> adult = load_adult()
@@ -80,7 +80,7 @@ def load_german(usecols=[], dropcols=[], numeric_only=False, dropna=True):
 
     Returns:
         namedtuple: Tuple containing X and y for the German dataset accessible
-            by index or name.
+        by index or name.
 
     Examples:
         >>> german = load_german()
@@ -114,7 +114,7 @@ def load_bank(usecols=[], dropcols='duration', numeric_only=False, dropna=False)
 
     Returns:
         namedtuple: Tuple containing X and y for the Bank dataset accessible by
-            index or name.
+        index or name.
 
     Examples:
         >>> bank = load_bank()
diff --git a/aif360/sklearn/datasets/utils.py b/aif360/sklearn/datasets/utils.py
index 84d3551e..3db33c11 100644
--- a/aif360/sklearn/datasets/utils.py
+++ b/aif360/sklearn/datasets/utils.py
@@ -7,7 +7,7 @@
 from sklearn.preprocessing import OneHotEncoder
 
 # TODO: binarize protected_attributes option?
-def standarize_dataset(df, *, protected_attributes, target, pos_label=None,
+def standarize_dataset(df, protected_attributes, target, pos_label=None,
                        sample_weight=None, usecols=[], dropcols=[],
                        numeric_only=False, dropna=True):
     """Separate data, targets, and possibly sample weights and populate
@@ -32,16 +32,16 @@ def standarize_dataset(df, *, protected_attributes, target, pos_label=None,
         dropna (bool): Drop rows with NAs.
 
     Returns:
-        namedtuple:
+        collections.namedtuple:
 
             A tuple-like object where items can be accessed by index or name.
             Contains the following attributes:
 
-            * `pandas.DataFrame`: X: Feature array.
+            * **X** (`pandas.DataFrame`) -- Feature array.
 
-            * `pandas.DataFrame` or `pandas.Series`: y: Target array.
+            * **y** (`pandas.DataFrame` or `pandas.Series`) -- Target array.
 
-            * `pandas.Series`, optional: sample_weight: Sample weights.
+            * **sample_weight** (`pandas.Series`, optional) -- Sample weights.
 
     Note:
         The order of execution for the dropping parameters is: dropcols ->
@@ -103,11 +103,12 @@ def pos_label(val):
 def make_onehot_transformer(X):
     """Shortcut for encoding categorical features as one-hot vectors.
 
-    Note: This changes the column order as well as removes DataFrame formatting.
+    Note:
+        This changes the column order as well as removes DataFrame formatting.
 
     Returns:
         sklearn.compose.ColumnTransformer: Class capable of transforming
-            categorical features in X to one-hot features.
+        categorical features in X to one-hot features.
     """
     return make_column_transformer((OneHotEncoder(), X.dtypes == 'category'),
                                    remainder='passthrough')
diff --git a/aif360/sklearn/metrics/metrics.py b/aif360/sklearn/metrics/metrics.py
index 2a4da306..4d02a310 100644
--- a/aif360/sklearn/metrics/metrics.py
+++ b/aif360/sklearn/metrics/metrics.py
@@ -206,7 +206,7 @@ def consistency_score(X, y, n_neighbors=5):
 
 # ================================ ALIASES =====================================
 def sensitivity_score(y_true, y_pred, pos_label=1, sample_weight=None):
-    """Alias of `sklearn.metrics.recall_score` for binary classes only."""
+    """Alias of :func:`sklearn.metrics.recall_score` for binary classes only."""
     return recall_score(y_true, y_pred, pos_label=pos_label,
                         sample_weight=sample_weight)
 
@@ -218,5 +218,7 @@ def sensitivity_score(y_true, y_pred, pos_label=1, sample_weight=None):
 #     return 1 - specificity_score(y_true, y_pred, pos_label=pos_label,
 #                                  sample_weight=sample_weight)
 
-mean_difference = statistical_parity_difference
-mean_difference.__doc__ = """Alias of :meth:`statistical_parity_difference`."""
+def mean_difference(*y, priv_expr, pos_label=1, sample_weight=None):
+    """Alias of :func:`statistical_parity_difference`."""
+    return statistical_parity_difference(*y, priv_expr=priv_expr,
+            pos_label=pos_label, sample_weight=sample_weight)
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 1c302d1e..66493140 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -38,10 +38,10 @@
     'sphinx.ext.intersphinx',
     'sphinx.ext.mathjax']
 
-intersphinx_mapping = {'numpy': ('http://docs.scipy.org/doc/numpy/', None),
-    'scipy': ('http://docs.scipy.org/doc/scipy/reference/', None),
+intersphinx_mapping = {'numpy': ('https://docs.scipy.org/doc/numpy/', None),
+    'scipy': ('https://docs.scipy.org/doc/scipy/reference/', None),
     'pandas': ('https://pandas.pydata.org/pandas-docs/stable/', None),
-    'sklearn': ('https://scikit-learn.org/stable/modules/generated/', None),
+    'sklearn': ('https://scikit-learn.org/stable/', None),
     'python': ('https://docs.python.org/{}.{}'.format(*sys.version_info), None)}
 
 autoclass_content = 'both'
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 9b780a61..37ba7078 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -14,6 +14,7 @@ Welcome to AI Fairness 360's documentation!
    modules/datasets
    modules/explainers
    modules/metrics
+   modules/sklearn
 
 
 Indices and tables
diff --git a/docs/source/modules/sklearn.rst b/docs/source/modules/sklearn.rst
new file mode 100644
index 00000000..e6234ddf
--- /dev/null
+++ b/docs/source/modules/sklearn.rst
@@ -0,0 +1,21 @@
+:mod:`aif360.sklearn`
+=====================
+
+.. automodule:: aif360.sklearn
+
+Datasets
+--------
+
+.. automodule:: aif360.sklearn.datasets.utils
+    :members:
+
+.. automodule:: aif360.sklearn.datasets.openml_datasets
+    :members:
+
+Metrics
+-------
+
+.. automodule:: aif360.sklearn.metrics.metrics
+    :members:
+
+.. autofunction:: aif360.sklearn.metrics.mean_difference

From 025ecc168f1481942718a16c7ee6803b1b0a6f5c Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Thu, 23 May 2019 15:16:01 -0400
Subject: [PATCH 06/61] simpler dataset loading and 'groups' for metrics

* dataset loading is more similar to sklearn.datasets
* label binarization is now done outside standardize_dataset
* metrics use 'groups' and 'priv_group' to signify priv/unpriv split
---
 aif360/sklearn/README.md                   |   5 +-
 aif360/sklearn/datasets/openml_datasets.py | 160 +++++++++++++++------
 aif360/sklearn/datasets/utils.py           |  41 +++---
 aif360/sklearn/metrics/metrics.py          | 127 ++++++++--------
 aif360/sklearn/tests/test_metrics.py       |   1 +
 docs/source/modules/sklearn.rst            |   2 -
 requirements.txt                           |   2 +-
 7 files changed, 198 insertions(+), 140 deletions(-)

diff --git a/aif360/sklearn/README.md b/aif360/sklearn/README.md
index 14a4c318..c5bd0d8c 100644
--- a/aif360/sklearn/README.md
+++ b/aif360/sklearn/README.md
@@ -9,13 +9,14 @@ To do:
 
 - [x] Reformat datasets as separate X and y (and sample_weight) DataFrame
 objects with sample properties (protected attributes) as the index
-- [ ] Load included datasets in the above format (partially done)
+- [ ] Load included datasets in the above format
   - [x] Use `sklearn.datasets.fetch_openml` to load UCI datasets (#53)
   - [ ] COMPAS
   - [ ] MEPS
 - [ ] Implement metrics as individual functions instead of instance methods
-(mostly done)
   - [x] Make certain metrics compatible as sklearn scorers
+  - [ ] Use "groups" and "priv_group" keywords to specify protected attributes to
+  functions (partially done)
   - [ ] Generalized confusion matrix
   - [ ] Sample distortion metrics
 - [ ] Make inprocessing algorithms compatible as sklearn `Estimator`s
diff --git a/aif360/sklearn/datasets/openml_datasets.py b/aif360/sklearn/datasets/openml_datasets.py
index c65f3784..6da3838c 100644
--- a/aif360/sklearn/datasets/openml_datasets.py
+++ b/aif360/sklearn/datasets/openml_datasets.py
@@ -7,20 +7,17 @@
 
 
 # cache location
-DATA_HOME = os.path.join(os.path.dirname(os.path.abspath(__file__)),
-                         '..', 'data', 'raw')
-# name -> data_id mapping
-DATA_ID = {'adult': 1590,
-           'german': 31,
-           'bank': 1461  # TODO: this seems to be an old version
-}
+DATA_HOME_DEFAULT = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+                                 '..', 'data', 'raw')
 
-def fetch_and_format_openml(name):
-    """Fetch openml dataset by name and format categorical features.
+def to_dataframe(data):
+    """Format an OpenML dataset Bunch as a DataFrame with categorical features
+    if needed.
 
     Args:
-        name ({'adult', 'german', or 'bank'}): Name of OpenML dataset. Converted
-            to data_id using `DATA_ID` mapping.
+        data (Bunch): Dict-like object containing `data`, `feature_names` and,
+            optionally, `categories` attributes. Note: `data` should contain
+            both X and y data.
 
     Returns:
         pandas.DataFrame: A DataFrame containing all data, including target,
@@ -29,22 +26,33 @@ def fetch_and_format_openml(name):
     def categorize(item):
         return cats[int(item)] if not pd.isna(item) else item
 
-    data_id = DATA_ID[name]
-    data = fetch_openml(data_id=data_id, data_home=DATA_HOME, target_column=None)
-    df = pd.DataFrame(data.data, columns=data.feature_names)
-
-    for col, cats in data.categories.items():
+    df = pd.DataFrame(data['data'], columns=data['feature_names'])
+    for col, cats in data['categories'].items():
         df[col] = df[col].apply(categorize).astype('category')
 
     return df
 
-def load_adult(usecols=[], dropcols=[], numeric_only=False, dropna=True):
+def fetch_adult(subset='all', data_home=None, binary_race=True, usecols=[],
+                dropcols=[], numeric_only=False, dropna=True):
     """Load the Adult Census Income Dataset.
 
+    Binarizes 'race' to 'White' (privileged) or 'Non-white' (unprivileged).
+    The other protected attribute is 'sex' ('Male' is privileged and 'Female' is
+    unprivileged). The outcome variable is '>50K' (favorable) or '<=50K'
+    (unfavorable).
+
     Args:
-        usecols (single label or list-like, optional): Column name(s) to keep.
-            All others are dropped.
-        dropcols (single label or list-like, optional): Column name(s) to drop.
+        subset ({'train', 'test', or 'all'}, optional): Select the dataset to
+            load: 'train' for the training set, 'test' for the test set, 'all'
+            for both.
+        data_home (string, optional): Specify another download and cache folder
+            for the datasets. By default all AIF360 datasets are stored in
+            'aif360/sklearn/data/raw' subfolders.
+        binary_race (bool, optional): Group all non-white races together.
+        usecols (single label or list-like, optional): Feature column(s) to
+            keep. All others are dropped.
+        dropcols (single label or list-like, optional): Feature column(s) to
+            drop.
         numeric_only (bool): Drop all non-numeric feature columns.
         dropna (bool): Drop rows with NAs.
 
@@ -53,25 +61,57 @@ def load_adult(usecols=[], dropcols=[], numeric_only=False, dropna=True):
         dataset accessible by index or name.
 
     Examples:
-        >>> adult = load_adult()
+        >>> adult = fetch_adult()
         >>> adult.X.shape
         (45222, 13)
 
-        >>> adult_num = load_adult(numeric_only=True)
+        >>> adult_num = fetch_adult(numeric_only=True)
         >>> adult_num.X.shape
         (48842, 5)
     """
-    return standarize_dataset(fetch_and_format_openml('adult'),
-                              protected_attributes=['race', 'sex'],
-                              target='class', pos_label='>50K',
-                              sample_weight='fnlwgt', usecols=usecols,
-                              dropcols=dropcols, numeric_only=numeric_only,
-                              dropna=dropna)
+    if subset not in {'train', 'test', 'all'}:
+        raise ValueError("subset must be either 'train', 'test', or 'all'; "
+                         "cannot be {}".format(subset))
+    df = to_dataframe(fetch_openml(data_id=1590, data_home=data_home or
+                                   DATA_HOME_DEFAULT, target_column=None))
+    if subset == 'train':
+        df = df.iloc[16281:]
+    elif subset == 'test':
+        df = df.iloc[:16281]
+
+    df.class = df.class.cat.as_ordered()  # '<=50K' < '>50K'
+    df = df.rename(columns={'class': 'annual-income'})  # more descriptive name
+
+    # binarize protected attributes
+    if binary_race:
+        df.race = df.race.cat.set_categories(['Non-white', 'White'],
+                                             ordered=True).fillna('Non-white')
+    df.sex = df.sex.cat.as_ordered()  # 'Female' < 'Male'
+
+    return standarize_dataset(df, protected_attributes=['race', 'sex'],
+                              target='annual-income', sample_weight='fnlwgt',
+                              usecols=usecols, dropcols=dropcols,
+                              numeric_only=numeric_only, dropna=dropna)
 
-def load_german(usecols=[], dropcols=[], numeric_only=False, dropna=True):
+def fetch_german(data_home=None, usecols=[], dropcols=[], numeric_only=False,
+                 dropna=True):
     """Load the German Credit Dataset.
 
+    Protected attributes are 'sex' ('male' is privileged and 'female' is
+    unprivileged) and 'age' (left as continuous but [1]_ recommends `age >= 25`
+    be considered privileged and `age < 25` be considered unprivileged; this can
+    be done at metric evaluation time). The outcome variable is 'good'
+    (favorable) or 'bad' (unfavorable).
+
+    References:
+        .. [1] F. Kamiran and T. Calders, "Classifying without
+           discriminating," 2nd International Conference on Computer,
+           Control and Communication, 2009.
+
     Args:
+        data_home (string, optional): Specify another download and cache folder
+            for the datasets. By default all AIF360 datasets are stored in
+            'aif360/sklearn/data/raw' subfolders.
         usecols (single label or list-like, optional): Column name(s) to keep.
             All others are dropped.
         dropcols (single label or list-like, optional): Column name(s) to drop.
@@ -83,54 +123,84 @@ def load_german(usecols=[], dropcols=[], numeric_only=False, dropna=True):
         by index or name.
 
     Examples:
-        >>> german = load_german()
+        >>> german = fetch_german()
         >>> german.X.shape
         (1000, 21)
 
-        >>> german_num = load_german(numeric_only=True)
+        >>> german_num = fetch_german(numeric_only=True)
         >>> german_num.X.shape
         (1000, 7)
+
+        >>> DISPARATE IMPACT AGE EXAMPLE HERE
     """
-    df = fetch_and_format_openml('german')
+    df = to_dataframe(fetch_openml(data_id=31, data_home=data_home or
+                                   DATA_HOME_DEFAULT, target_column=None))
+
+    df.class = df.class.cat.as_ordered()  # 'bad' < 'good'
+    df = df.rename(columns={'class': 'credit-risk'})  # more descriptive name
+
     # Note: marital_status directly implies sex. i.e. 'div/dep/mar' => 'female'
     # and all others => 'male'
     personal_status = df.pop('personal_status').str.split(expand=True)
     personal_status.columns = ['sex', 'marital_status']
     df = df.join(personal_status.astype('category'))
+    df.sex = df.sex.cat.as_ordered()  # 'female' < 'male'
+
     return standarize_dataset(df, protected_attributes=['sex', 'age'],
-                              target='class', pos_label='good',
-                              usecols=usecols, dropcols=dropcols,
-                              numeric_only=numeric_only, dropna=dropna)
+                              target='credit-risk', usecols=usecols,
+                              dropcols=dropcols, numeric_only=numeric_only,
+                              dropna=dropna)
 
-def load_bank(usecols=[], dropcols='duration', numeric_only=False, dropna=False):
+def fetch_bank(data_home=None, percent10=False, usecols=[], dropcols='duration',
+               numeric_only=False, dropna=False):
     """Load the Bank Marketing Dataset.
 
+    The protected attribute is 'age' (left as continuous). The outcome variable
+    is 'yes' or 'no'. TODO: which is favorable?
+
     Args:
+        data_home (string, optional): Specify another download and cache folder
+            for the datasets. By default all AIF360 datasets are stored in
+            'aif360/sklearn/data/raw' subfolders.
+        percent10 (bool, optional): Download the reduced version (10% of data).
         usecols (single label or list-like, optional): Column name(s) to keep.
             All others are dropped.
         dropcols (single label or list-like, optional): Column name(s) to drop.
         numeric_only (bool): Drop all non-numeric feature columns.
-        dropna (bool): Drop rows with NAs.
+        dropna (bool): Drop rows with NAs. Note: this is False by default for
+            this dataset.
 
     Returns:
         namedtuple: Tuple containing X and y for the Bank dataset accessible by
         index or name.
 
     Examples:
-        >>> bank = load_bank()
+        >>> bank = fetch_bank()
         >>> bank.X.shape
         (45211, 15)
 
-        >>> bank_num = load_bank(numeric_only=True)
+        >>> bank_nona = fetch_bank(dropna=True)
+        >>> bank_nona.X.shape
+        (7842, 15)
+
+        >>> bank_num = fetch_bank(numeric_only=True)
         >>> bank_num.X.shape
         (45211, 6)
     """
-    df = fetch_and_format_openml('bank')
+    # TODO: this seems to be an old version
+    df = to_dataframe(fetch_openml(data_id=1558 if percent10 else 1461,
+                                   data_home=data_home or DATA_HOME_DEFAULT,
+                                   target_column=None))
     df.columns = ['age', 'job', 'marital', 'education', 'default', 'balance',
                   'housing', 'loan', 'contact', 'day', 'month', 'duration',
-                  'campaign', 'pdays', 'previous', 'poutcome', 'y']
-    # df = df.replace({'unknown': None})  # TODO: this messes up the categories
-    # df.select_dtypes('object').astype('category', inplace=True)
-    return standarize_dataset(df, protected_attributes=['age'], target='y',
-                              pos_label='2', usecols=usecols, dropcols=dropcols,
+                  'campaign', 'pdays', 'previous', 'poutcome', 'deposit']
+    # remap target
+    df.deposit = df.deposit.cat.rename_categories({'1': 'no', '2': 'yes'})
+    # df.deposit = df.deposit.cat.as_ordered()
+    # replace 'unknown' marker with NaN
+    df.select_dtypes('category').apply(lambda s: s.cat.remove_categories('unknown')
+                                       if 'unknown' in s.cat.categories else s,
+                                       inplace=True)
+    return standarize_dataset(df, protected_attributes='age', target='deposit',
+                              usecols=usecols, dropcols=dropcols,
                               numeric_only=numeric_only, dropna=dropna)
diff --git a/aif360/sklearn/datasets/utils.py b/aif360/sklearn/datasets/utils.py
index 3db33c11..60d61e37 100644
--- a/aif360/sklearn/datasets/utils.py
+++ b/aif360/sklearn/datasets/utils.py
@@ -1,15 +1,12 @@
 from collections import namedtuple
 
-import numpy as np
 import pandas as pd
 from pandas.core.dtypes.common import is_list_like
 from sklearn.compose import make_column_transformer
 from sklearn.preprocessing import OneHotEncoder
 
-# TODO: binarize protected_attributes option?
-def standarize_dataset(df, protected_attributes, target, pos_label=None,
-                       sample_weight=None, usecols=[], dropcols=[],
-                       numeric_only=False, dropna=True):
+def standarize_dataset(df, protected_attributes, target, sample_weight=None,
+                       usecols=[], dropcols=[], numeric_only=False, dropna=True):
     """Separate data, targets, and possibly sample weights and populate
     protected attributes as sample properties.
 
@@ -20,9 +17,11 @@ def standarize_dataset(df, protected_attributes, target, pos_label=None,
             are dropped from the features, they remain in the index.
         target (single label or list-like): Column label of the target (outcome)
             variable.
-        pos_label (scalar, list-like, or function, optional): A value, list of
-            values, or function designating the positive binary label from the
-            raw data.
+        # pos_label (scalar, list-like, or function, optional): A value, list of
+        #     values, or boolean function (True if positive) designating the
+        #     positive binary label from the raw data. All others will be
+        #     considered negative. The resulting target array will have value 1 if
+        #     positive and 0 if negative.
         sample_weight (single label, optional): Name of the column containing
             sample weights.
         usecols (single label or list-like, optional): Column(s) to keep. All
@@ -62,20 +61,17 @@ def standarize_dataset(df, protected_attributes, target, pos_label=None,
         >>> X, y = standarize_dataset(df, protected_attributes=0, target=5)
         >>> X_tr, X_te, y_tr, y_te = train_test_split(X, y)
     """
-    df = df.set_index(protected_attributes, drop=False)  # append=True?
+    df = df.set_index(protected_attributes, drop=False)  # TODO: append=True?
 
+    # TODO: convert to 1/0 if numeric_only?
     y = df.pop(target)
-    if pos_label is not None:
-        if not callable(pos_label):
-            pos = pos_label if is_list_like(pos_label) else [pos_label]
-            pos = np.array(pos)
-            # find all instances which match any of the favorable classes
-            def pos_label(val):
-                # return np.logical_or.reduce(np.equal.outer(pos, col), axis=(0, 2))
-                return np.logical_or.reduce(pos == val)
-
-        # TODO: won't work for multilabel (target is list) case, try DataFrame.eval()?
-        y = y.apply(pos_label).astype('int')
+    # if not callable(pos_label):
+    #     if not is_list_like(pos_label):
+    #         pos_label = [pos_label]
+    #     # find all instances which match any of the favorable classes
+    #     y = y.isin(pos_label).astype('int')
+    # else:
+    #     y = y.apply(pos_label).astype('int')
 
     # Column-wise drops
     df = df.drop(dropcols, axis=1)
@@ -85,6 +81,11 @@ def pos_label(val):
             usecols = [usecols]
         df = df[usecols]
     if numeric_only:
+        # binary categorical columns -> 1/0
+        for col in df.select_dtypes('category'):
+            # TODO: allow any size ordered categorical?
+            if len(df[col].cat.categories) == 2 and df[col].cat.ordered:
+                df[col] = df[col].cat.factorize(sort=True)[0]
         df = df.select_dtypes(['number', 'bool'])
         # upcast all feature dimensions to a consistent numerical dtype
         df = df.apply(pd.to_numeric, axis=1)
diff --git a/aif360/sklearn/metrics/metrics.py b/aif360/sklearn/metrics/metrics.py
index 4d02a310..79c91c19 100644
--- a/aif360/sklearn/metrics/metrics.py
+++ b/aif360/sklearn/metrics/metrics.py
@@ -1,22 +1,10 @@
-from functools import partial
-
 import numpy as np
 from sklearn.metrics import make_scorer, recall_score
 from sklearn.neighbors import NearestNeighbors
 
 
-# # ============================== VALIDATORS ====================================
-# def validate_index_match(arg1, arg2):
-#     """
-#     Raises:
-#         ValueError: If arg1 and arg2 do not have equivalent indices.
-#     """
-#     if not arg1.index.equals(arg2.index):
-#         raise ValueError("Indices must match to perform a valid comparison.")
-
-
 # ============================= META-METRICS ===================================
-def difference(func, y, *args, priv_expr, sample_weight=None, **kwargs):
+def difference(func, y, *args, groups, priv_group=1, sample_weight=None, **kwargs):
     """Compute the difference between unprivileged and privileged subsets for an
     arbitrary metric.
 
@@ -30,9 +18,9 @@ def difference(func, y, *args, priv_expr, sample_weight=None, **kwargs):
             `sklearn.metrics`.
         y (pandas.Series): Outcome vector with protected attributes as index.
         *args: Additional positional args to be passed through to `func`.
-        priv_expr (string, keyword-only): A query expression describing the
-            privileged group (see `pandas.DataFrame.eval` and
-            `pandas.DataFrame.query` for details).
+        groups (array-like, keyword-only): Group labels (protected attributes)
+            for the samples.
+        priv_group (scalar, optional): Label value for the privileged group.
         sample_weight (array-like, optional): Sample weights passed through to
             `func`.
         **kwargs: Additional keyword args to be passed through to `func`.
@@ -43,21 +31,19 @@ def difference(func, y, *args, priv_expr, sample_weight=None, **kwargs):
     Examples:
         >>> X, y = load_german(numeric_only=True)
         >>> y_pred = LogisticRegression().fit(X, y).predict(X)
-        >>> difference(precision_score, y, y_pred, priv_expr='sex == "male"')
+        >>> sex = X.index.get_level_values('sex')
+        >>> difference(precision_score, y, y_pred, groups=sex, priv_group='male')
         -0.06955430006277463
     """
-    args = (y,) + args
-    # Note: provide blank name because if index name clashes with column name,
-    # column name gets preference
-    idx = y.to_frame('').eval(priv_expr)
-    unpriv = map(lambda a: a[~idx], args)
-    priv = map(lambda a: a[idx], args)
+    idx = (groups == priv_group)
+    unpriv = map(lambda a: a[~idx], (y,) + args)
+    priv = map(lambda a: a[idx], (y,) + args)
     if sample_weight is not None:
         return (func(*unpriv, sample_weight=sample_weight[~idx], **kwargs)
               - func(*priv, sample_weight=sample_weight[idx], **kwargs))
     return func(*unpriv, **kwargs) - func(*priv, **kwargs)
 
-def ratio(func, y, *args, priv_expr, sample_weight=None, **kwargs):
+def ratio(func, y, *args, groups, priv_group=1, sample_weight=None, **kwargs):
     """Compute the ratio between unprivileged and privileged subsets for an
     arbitrary metric.
 
@@ -71,9 +57,9 @@ def ratio(func, y, *args, priv_expr, sample_weight=None, **kwargs):
             `sklearn.metrics`.
         y (pandas.Series): Outcome vector with protected attributes as index.
         *args: Additional positional args to be passed through to `func`.
-        priv_expr (string, keyword-only): A query expression describing the
-            privileged group (see `pandas.DataFrame.eval` and
-            `pandas.DataFrame.query` for details).
+        groups (array-like, keyword-only): Group labels (protected attributes)
+            for the samples.
+        priv_group (scalar, optional): Label value for the privileged group.
         sample_weight (array-like, optional): Sample weights passed through to
             `func`.
         **kwargs: Additional keyword args to be passed through to `func`.
@@ -81,10 +67,9 @@ def ratio(func, y, *args, priv_expr, sample_weight=None, **kwargs):
     Returns:
         scalar: Ratio of metric values for unprivileged and privileged groups.
     """
-    args = (y,) + args
-    idx = y.to_frame('').eval(priv_expr)
-    unpriv = map(lambda a: a[~idx], args)
-    priv = map(lambda a: a[idx], args)
+    idx = (groups == priv_group)
+    unpriv = map(lambda a: a[~idx], (y,) + args)
+    priv = map(lambda a: a[idx], (y,) + args)
     if sample_weight is not None:
         return (func(*unpriv, sample_weight=sample_weight[~idx], **kwargs)
               / func(*priv, sample_weight=sample_weight[idx], **kwargs))
@@ -106,15 +91,11 @@ def specificity_score(y_true, y_pred, neg_label=0, sample_weight=None):
     """Compute the specificity or true negative rate.
 
     Args:
-        y_true:
-        y_pred:
+        y_true (array-like): Ground truth (correct) target values.
+        y_pred (array-like): Estimated targets as returned by a classifier.
         neg_label (scalar, optional): The class to report. Note: the data should
             be binary.
     """
-    # neg_labels = np.setdiff1d(np.unique(np.hstack((y_true, y_pred))),
-    #                           np.array([pos_label]))
-    # if neg_labels.size != 2:
-    #     raise ValueError("This function only applies to binary classification.")
     return recall_score(y_true, y_pred, pos_label=neg_label,
                         sample_weight=sample_weight)
 
@@ -126,40 +107,46 @@ def selection_rate(y_true, y_pred, pos_label=1, sample_weight=None):
 
 
 # ============================ GROUP FAIRNESS ==================================
-def statistical_parity_difference(*y, priv_expr, pos_label=1, sample_weight=None):
+def statistical_parity_difference(*y, groups, priv_group=1, pos_label=1,
+                                  sample_weight=None):
     rate = base_rate if len(y) == 1 or y[1] is None else selection_rate
-    return difference(rate, *y, priv_expr=priv_expr, pos_label=pos_label,
-                      sample_weight=sample_weight)
+    return difference(rate, *y, groups=groups, priv_group=priv_group,
+                      pos_label=pos_label, sample_weight=sample_weight)
 
-def disparate_impact_ratio(*y, priv_expr, pos_label=1, sample_weight=None):
+def disparate_impact_ratio(*y, groups, priv_group=1, pos_label=1,
+                           sample_weight=None):
     rate = base_rate if len(y) == 1 or y[1] is None else selection_rate
-    return ratio(rate, *y, priv_expr=priv_expr, pos_label=pos_label,
-                 sample_weight=sample_weight)
+    return ratio(rate, *y, groups=groups, priv_group=priv_group,
+                 pos_label=pos_label, sample_weight=sample_weight)
 
+def equal_opportunity_difference(y_true, y_pred, groups, priv_group=1,
+                                 pos_label=1, sample_weight=None):
+    return difference(recall_score, y_true, y_pred, groups=groups,
+                      priv_group=priv_group, pos_label=pos_label,
+                      sample_weight=sample_weight)
 
-def equal_opportunity_difference(y_true, y_pred, priv_expr, pos_label=1,
-                                 sample_weight=None):
-    return difference(recall_score, y_true, y_pred, priv_expr=priv_expr,
-                      pos_label=pos_label, sample_weight=sample_weight)
-
-def average_odds_difference(y_true, y_pred, priv_expr, pos_label=1, neg_label=0,
-                            sample_weight=None):
-    tnr_diff = difference(specificity_score, y_true, y_pred, priv_expr=priv_expr,
-                          neg_label=neg_label, sample_weight=sample_weight)
-    tpr_diff = difference(recall_score, y_true, y_pred, priv_expr=priv_expr,
-                          pos_label=pos_label, sample_weight=sample_weight)
+def average_odds_difference(y_true, y_pred, groups, priv_group=1, pos_label=1,
+                            neg_label=0, sample_weight=None):
+    tnr_diff = difference(specificity_score, y_true, y_pred, groups=groups,
+                          priv_group=priv_group, neg_label=neg_label,
+                          sample_weight=sample_weight)
+    tpr_diff = difference(recall_score, y_true, y_pred, groups=groups,
+                          priv_group=priv_group, pos_label=pos_label,
+                          sample_weight=sample_weight)
     return (tpr_diff - tnr_diff) / 2
 
-def average_odds_error(y_true, y_pred, priv_expr, pos_label=1, neg_label=0,
-                       sample_weight=None):
-    tnr_diff = difference(specificity_score, y_true, y_pred, priv_expr=priv_expr,
-                          neg_label=neg_label, sample_weight=sample_weight)
-    tpr_diff = difference(recall_score, y_true, y_pred, priv_expr=priv_expr,
-                          pos_label=pos_label, sample_weight=sample_weight)
+def average_odds_error(y_true, y_pred, groups, priv_group=1, pos_label=1,
+                       neg_label=0, sample_weight=None):
+    tnr_diff = difference(specificity_score, y_true, y_pred, groups=groups,
+                          priv_group=priv_group, neg_label=neg_label,
+                          sample_weight=sample_weight)
+    tpr_diff = difference(recall_score, y_true, y_pred, groups=groups,
+                          priv_group=priv_group, pos_label=pos_label,
+                          sample_weight=sample_weight)
     return (abs(tnr_diff) + abs(tpr_diff)) / 2
 
 
-# ================================ INDICES =====================================
+# ========================== INDIVIDUAL FAIRNESS ===============================
 def generalized_entropy_index(b, alpha=2):
     if alpha == 0:
         return -(np.log(b / b.mean()) / b.mean()).mean()
@@ -174,14 +161,15 @@ def generalized_entropy_error(y_true, y_pred, alpha=2, pos_label=1):
     b = 1 + (y_pred == pos_label) - (y_true == pos_label)
     return generalized_entropy_index(b, alpha=alpha)
 
-def between_group_generalized_entropy_error(y_true, y_pred, priv_expr, alpha=2,
+def between_group_generalized_entropy_error(y_true, y_pred, groups,
+                                            priv_group=None, alpha=2,
                                             pos_label=1):
     b = np.empty_like(y_true, dtype='float')
-    priv = y_true.to_frame('').eval(priv_expr)
-    b[priv] = (1 + (y_pred[priv] == pos_label)
-                 - (y_true[priv] == pos_label)).mean()
-    b[~priv] = (1 + (y_pred[~priv] == pos_label)
-                  - (y_true[~priv] == pos_label)).mean()
+    if priv_group is not None:
+        groups = [1 if g == priv_group else 0 for g in groups]
+    for g in np.unique(groups):
+        b[groups == g] = (1 + (y_pred[groups == g] == pos_label)
+                            - (y_true[groups == g] == pos_label)).mean()
     return generalized_entropy_index(b, alpha=alpha)
 
 def theil_index(b):
@@ -191,7 +179,6 @@ def coefficient_of_variation(b):
     return 2 * np.sqrt(generalized_entropy_index(b, alpha=2))
 
 
-# ========================== INDIVIDUAL FAIRNESS ===============================
 # TODO: not technically a scorer but you should be allowed to score transformers
 # Is consistency_difference posible?
 # use sample_weight?
@@ -218,7 +205,7 @@ def sensitivity_score(y_true, y_pred, pos_label=1, sample_weight=None):
 #     return 1 - specificity_score(y_true, y_pred, pos_label=pos_label,
 #                                  sample_weight=sample_weight)
 
-def mean_difference(*y, priv_expr, pos_label=1, sample_weight=None):
+def mean_difference(*y, groups, priv_group=1, pos_label=1, sample_weight=None):
     """Alias of :func:`statistical_parity_difference`."""
-    return statistical_parity_difference(*y, priv_expr=priv_expr,
+    return statistical_parity_difference(*y, groups=groups, priv_group=priv_group,
             pos_label=pos_label, sample_weight=sample_weight)
diff --git a/aif360/sklearn/tests/test_metrics.py b/aif360/sklearn/tests/test_metrics.py
index 5c263303..ed3636f8 100644
--- a/aif360/sklearn/tests/test_metrics.py
+++ b/aif360/sklearn/tests/test_metrics.py
@@ -30,6 +30,7 @@ def test_dataset_equality():
     # print(adult.feature_names)
     # print(adult.features[:5])
     assert (adult.features == X.values).all()
+    assert (adult.labels == y.values).all()
 
 def test_consistency():
     assert consistency_score(X, y) == cm.consistency()
diff --git a/docs/source/modules/sklearn.rst b/docs/source/modules/sklearn.rst
index e6234ddf..273f8256 100644
--- a/docs/source/modules/sklearn.rst
+++ b/docs/source/modules/sklearn.rst
@@ -17,5 +17,3 @@ Metrics
 
 .. automodule:: aif360.sklearn.metrics.metrics
     :members:
-
-.. autofunction:: aif360.sklearn.metrics.mean_difference
diff --git a/requirements.txt b/requirements.txt
index 76daec2e..767db283 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,7 +7,7 @@ ipywidgets
 tqdm
 numpy>=1.16
 matplotlib
-pandas>=0.23.3
+pandas>=0.24
 pytest>=3.5.0
 scipy
 scikit-learn

From 8e96177c887f9f56dcc7f06625c262c10db6d226 Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Wed, 5 Jun 2019 11:11:47 -0400
Subject: [PATCH 07/61] fixes to categoricals

---
 aif360/sklearn/datasets/openml_datasets.py | 4 ++--
 aif360/sklearn/datasets/utils.py           | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/aif360/sklearn/datasets/openml_datasets.py b/aif360/sklearn/datasets/openml_datasets.py
index 6da3838c..d6082840 100644
--- a/aif360/sklearn/datasets/openml_datasets.py
+++ b/aif360/sklearn/datasets/openml_datasets.py
@@ -79,8 +79,8 @@ def fetch_adult(subset='all', data_home=None, binary_race=True, usecols=[],
     elif subset == 'test':
         df = df.iloc[:16281]
 
-    df.class = df.class.cat.as_ordered()  # '<=50K' < '>50K'
     df = df.rename(columns={'class': 'annual-income'})  # more descriptive name
+    df['annual-income'] = df['annual-income'].cat.as_ordered()  # '<=50K' < '>50K'
 
     # binarize protected attributes
     if binary_race:
@@ -136,8 +136,8 @@ def fetch_german(data_home=None, usecols=[], dropcols=[], numeric_only=False,
     df = to_dataframe(fetch_openml(data_id=31, data_home=data_home or
                                    DATA_HOME_DEFAULT, target_column=None))
 
-    df.class = df.class.cat.as_ordered()  # 'bad' < 'good'
     df = df.rename(columns={'class': 'credit-risk'})  # more descriptive name
+    df['credit-risk'] = df['credit-risk'].cat.as_ordered()  # 'bad' < 'good'
 
     # Note: marital_status directly implies sex. i.e. 'div/dep/mar' => 'female'
     # and all others => 'male'
diff --git a/aif360/sklearn/datasets/utils.py b/aif360/sklearn/datasets/utils.py
index 60d61e37..b5fff624 100644
--- a/aif360/sklearn/datasets/utils.py
+++ b/aif360/sklearn/datasets/utils.py
@@ -85,7 +85,7 @@ def standarize_dataset(df, protected_attributes, target, sample_weight=None,
         for col in df.select_dtypes('category'):
             # TODO: allow any size ordered categorical?
             if len(df[col].cat.categories) == 2 and df[col].cat.ordered:
-                df[col] = df[col].cat.factorize(sort=True)[0]
+                df[col] = df[col].factorize(sort=True)[0]
         df = df.select_dtypes(['number', 'bool'])
         # upcast all feature dimensions to a consistent numerical dtype
         df = df.apply(pd.to_numeric, axis=1)

From 8abb897dfd41e8d6c406da333edf147fca4e4888 Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Wed, 5 Jun 2019 16:03:10 -0400
Subject: [PATCH 08/61] fixes for tests, updated README

---
 .travis.yml                                   |   4 +-
 aif360/sklearn/README.md                      |   7 +-
 aif360/sklearn/datasets/openml_datasets.py    |   6 +-
 aif360/sklearn/examples/Getting Started.ipynb | 468 ++++++++++++++++++
 aif360/sklearn/metrics/metrics.py             |   2 +-
 aif360/sklearn/tests/test_datasets.py         |  38 +-
 aif360/sklearn/tests/test_metrics.py          |  27 +-
 7 files changed, 510 insertions(+), 42 deletions(-)
 create mode 100644 aif360/sklearn/examples/Getting Started.ipynb

diff --git a/.travis.yml b/.travis.yml
index 9aa44262..fdfa087e 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -9,7 +9,7 @@ env:
 
 branches:
   only:
-    - master
+    - sklearn-compat
 
 install:
   - pip install -r requirements.txt
@@ -28,4 +28,4 @@ before_script:
 script:
   # stop the build if there are Python syntax errors or undefined names
   - flake8 . --count --select=E901,E999,F821,F822,F823 --show-source --statistics
-  - travis_wait pytest tests
+  - travis_wait python -m pytest aif360/sklearn/tests
diff --git a/aif360/sklearn/README.md b/aif360/sklearn/README.md
index c5bd0d8c..fbaf9adc 100644
--- a/aif360/sklearn/README.md
+++ b/aif360/sklearn/README.md
@@ -5,6 +5,9 @@ and running debiasing algorithms than the main AIF360 package. The purpose of
 this sub-package is to match scikit-learn paradigms/APIs for easier integration
 in typical machine learning workflows.
 
+See [Getting Started](examples/Getting%20Started.ipynb) to see `aif360.sklearn`
+in action.
+
 To do:
 
 - [x] Reformat datasets as separate X and y (and sample_weight) DataFrame
@@ -15,8 +18,8 @@ objects with sample properties (protected attributes) as the index
   - [ ] MEPS
 - [ ] Implement metrics as individual functions instead of instance methods
   - [x] Make certain metrics compatible as sklearn scorers
-  - [ ] Use "groups" and "priv_group" keywords to specify protected attributes to
-  functions (partially done)
+  - [x] Use "groups" and "priv_group" keywords to specify protected attributes to
+  functions
   - [ ] Generalized confusion matrix
   - [ ] Sample distortion metrics
 - [ ] Make inprocessing algorithms compatible as sklearn `Estimator`s
diff --git a/aif360/sklearn/datasets/openml_datasets.py b/aif360/sklearn/datasets/openml_datasets.py
index d6082840..ac8c32d1 100644
--- a/aif360/sklearn/datasets/openml_datasets.py
+++ b/aif360/sklearn/datasets/openml_datasets.py
@@ -198,9 +198,9 @@ def fetch_bank(data_home=None, percent10=False, usecols=[], dropcols='duration',
     df.deposit = df.deposit.cat.rename_categories({'1': 'no', '2': 'yes'})
     # df.deposit = df.deposit.cat.as_ordered()
     # replace 'unknown' marker with NaN
-    df.select_dtypes('category').apply(lambda s: s.cat.remove_categories('unknown')
-                                       if 'unknown' in s.cat.categories else s,
-                                       inplace=True)
+    df.select_dtypes('category').apply(lambda s:
+            s.cat.remove_categories('unknown', inplace=True)
+            if 'unknown' in s.cat.categories else s)
     return standarize_dataset(df, protected_attributes='age', target='deposit',
                               usecols=usecols, dropcols=dropcols,
                               numeric_only=numeric_only, dropna=dropna)
diff --git a/aif360/sklearn/examples/Getting Started.ipynb b/aif360/sklearn/examples/Getting Started.ipynb
new file mode 100644
index 00000000..58031a8d
--- /dev/null
+++ b/aif360/sklearn/examples/Getting Started.ipynb	
@@ -0,0 +1,468 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Getting Started"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "from sklearn.linear_model import LogisticRegression\n",
+    "from sklearn.metrics import recall_score\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "\n",
+    "from aif360.sklearn.datasets import fetch_adult\n",
+    "from aif360.sklearn.metrics import disparate_impact_ratio"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Loading data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Datasets are formatted as separate `X` (# samples x # features) and `y` (# samples x # labels) DataFrames. The index of each DataFrame contains protected attribute values per sample. Datasets may also load a `sample_weight` object to be used with certain algorithms/metrics. All of this makes it so that aif360 is compatible with scikit-learn objects.\n",
+    "\n",
+    "For example, we can easily load the Adult dataset from UCI with the following line:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th>age</th>\n",
+       "      <th>workclass</th>\n",
+       "      <th>education</th>\n",
+       "      <th>education-num</th>\n",
+       "      <th>marital-status</th>\n",
+       "      <th>occupation</th>\n",
+       "      <th>relationship</th>\n",
+       "      <th>race</th>\n",
+       "      <th>sex</th>\n",
+       "      <th>capital-gain</th>\n",
+       "      <th>capital-loss</th>\n",
+       "      <th>hours-per-week</th>\n",
+       "      <th>native-country</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>race</th>\n",
+       "      <th>sex</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>Non-white</th>\n",
+       "      <th>Male</th>\n",
+       "      <td>25.0</td>\n",
+       "      <td>Private</td>\n",
+       "      <td>11th</td>\n",
+       "      <td>7.0</td>\n",
+       "      <td>Never-married</td>\n",
+       "      <td>Machine-op-inspct</td>\n",
+       "      <td>Own-child</td>\n",
+       "      <td>Non-white</td>\n",
+       "      <td>Male</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>40.0</td>\n",
+       "      <td>United-States</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th rowspan=\"2\" valign=\"top\">White</th>\n",
+       "      <th>Male</th>\n",
+       "      <td>38.0</td>\n",
+       "      <td>Private</td>\n",
+       "      <td>HS-grad</td>\n",
+       "      <td>9.0</td>\n",
+       "      <td>Married-civ-spouse</td>\n",
+       "      <td>Farming-fishing</td>\n",
+       "      <td>Husband</td>\n",
+       "      <td>White</td>\n",
+       "      <td>Male</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>50.0</td>\n",
+       "      <td>United-States</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Male</th>\n",
+       "      <td>28.0</td>\n",
+       "      <td>Local-gov</td>\n",
+       "      <td>Assoc-acdm</td>\n",
+       "      <td>12.0</td>\n",
+       "      <td>Married-civ-spouse</td>\n",
+       "      <td>Protective-serv</td>\n",
+       "      <td>Husband</td>\n",
+       "      <td>White</td>\n",
+       "      <td>Male</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>40.0</td>\n",
+       "      <td>United-States</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Non-white</th>\n",
+       "      <th>Male</th>\n",
+       "      <td>44.0</td>\n",
+       "      <td>Private</td>\n",
+       "      <td>Some-college</td>\n",
+       "      <td>10.0</td>\n",
+       "      <td>Married-civ-spouse</td>\n",
+       "      <td>Machine-op-inspct</td>\n",
+       "      <td>Husband</td>\n",
+       "      <td>Non-white</td>\n",
+       "      <td>Male</td>\n",
+       "      <td>7688.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>40.0</td>\n",
+       "      <td>United-States</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>White</th>\n",
+       "      <th>Male</th>\n",
+       "      <td>34.0</td>\n",
+       "      <td>Private</td>\n",
+       "      <td>10th</td>\n",
+       "      <td>6.0</td>\n",
+       "      <td>Never-married</td>\n",
+       "      <td>Other-service</td>\n",
+       "      <td>Not-in-family</td>\n",
+       "      <td>White</td>\n",
+       "      <td>Male</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>30.0</td>\n",
+       "      <td>United-States</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                 age  workclass     education  education-num  \\\n",
+       "race      sex                                                  \n",
+       "Non-white Male  25.0    Private          11th            7.0   \n",
+       "White     Male  38.0    Private       HS-grad            9.0   \n",
+       "          Male  28.0  Local-gov    Assoc-acdm           12.0   \n",
+       "Non-white Male  44.0    Private  Some-college           10.0   \n",
+       "White     Male  34.0    Private          10th            6.0   \n",
+       "\n",
+       "                    marital-status         occupation   relationship  \\\n",
+       "race      sex                                                          \n",
+       "Non-white Male       Never-married  Machine-op-inspct      Own-child   \n",
+       "White     Male  Married-civ-spouse    Farming-fishing        Husband   \n",
+       "          Male  Married-civ-spouse    Protective-serv        Husband   \n",
+       "Non-white Male  Married-civ-spouse  Machine-op-inspct        Husband   \n",
+       "White     Male       Never-married      Other-service  Not-in-family   \n",
+       "\n",
+       "                     race   sex  capital-gain  capital-loss  hours-per-week  \\\n",
+       "race      sex                                                                 \n",
+       "Non-white Male  Non-white  Male           0.0           0.0            40.0   \n",
+       "White     Male      White  Male           0.0           0.0            50.0   \n",
+       "          Male      White  Male           0.0           0.0            40.0   \n",
+       "Non-white Male  Non-white  Male        7688.0           0.0            40.0   \n",
+       "White     Male      White  Male           0.0           0.0            30.0   \n",
+       "\n",
+       "               native-country  \n",
+       "race      sex                  \n",
+       "Non-white Male  United-States  \n",
+       "White     Male  United-States  \n",
+       "          Male  United-States  \n",
+       "Non-white Male  United-States  \n",
+       "White     Male  United-States  "
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "X, y, sample_weight = fetch_adult()\n",
+    "X.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can also easily load a version of the dataset which only contains numeric or binary columns and split it with scikit-learn:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th>age</th>\n",
+       "      <th>education-num</th>\n",
+       "      <th>race</th>\n",
+       "      <th>sex</th>\n",
+       "      <th>capital-gain</th>\n",
+       "      <th>capital-loss</th>\n",
+       "      <th>hours-per-week</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>race</th>\n",
+       "      <th>sex</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>Non-white</th>\n",
+       "      <th>Female</th>\n",
+       "      <td>18.0</td>\n",
+       "      <td>7.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>20.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th rowspan=\"4\" valign=\"top\">White</th>\n",
+       "      <th>Male</th>\n",
+       "      <td>55.0</td>\n",
+       "      <td>9.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>40.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Female</th>\n",
+       "      <td>43.0</td>\n",
+       "      <td>9.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>40.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Male</th>\n",
+       "      <td>44.0</td>\n",
+       "      <td>11.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>4386.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>40.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Male</th>\n",
+       "      <td>41.0</td>\n",
+       "      <td>9.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>55.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                   age  education-num  race  sex  capital-gain  capital-loss  \\\n",
+       "race      sex                                                                  \n",
+       "Non-white Female  18.0            7.0   0.0  0.0           0.0           0.0   \n",
+       "White     Male    55.0            9.0   1.0  1.0           0.0           0.0   \n",
+       "          Female  43.0            9.0   1.0  0.0           0.0           0.0   \n",
+       "          Male    44.0           11.0   1.0  1.0        4386.0           0.0   \n",
+       "          Male    41.0            9.0   1.0  1.0           0.0           0.0   \n",
+       "\n",
+       "                  hours-per-week  \n",
+       "race      sex                     \n",
+       "Non-white Female            20.0  \n",
+       "White     Male              40.0  \n",
+       "          Female            40.0  \n",
+       "          Male              40.0  \n",
+       "          Male              55.0  "
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "X, y, _ = fetch_adult(numeric_only=True)\n",
+    "X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, random_state=123)\n",
+    "X_train.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Running metrics"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "With the data in this format, we can easily train a scikit-learn model and get predictions for the test data:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "y_pred = LogisticRegression(solver='liblinear').fit(X_train, y_train).predict(X_test)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now, we can analyze our predictions and quickly calucate the disparate impact for females vs. males:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.19176335549523604"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sex = y_test.index.get_level_values('sex')\n",
+    "disparate_impact_ratio(y_test, y_pred, groups=sex, priv_group='Male', pos_label='>50K')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Debiasing algorithms"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Not yet implemented."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.5.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/aif360/sklearn/metrics/metrics.py b/aif360/sklearn/metrics/metrics.py
index 79c91c19..a0e4f813 100644
--- a/aif360/sklearn/metrics/metrics.py
+++ b/aif360/sklearn/metrics/metrics.py
@@ -29,7 +29,7 @@ def difference(func, y, *args, groups, priv_group=1, sample_weight=None, **kwarg
         scalar: Difference in metric value for unprivileged and privileged groups.
 
     Examples:
-        >>> X, y = load_german(numeric_only=True)
+        >>> X, y = fetch_german(numeric_only=True)
         >>> y_pred = LogisticRegression().fit(X, y).predict(X)
         >>> sex = X.index.get_level_values('sex')
         >>> difference(precision_score, y, y_pred, groups=sex, priv_group='male')
diff --git a/aif360/sklearn/tests/test_datasets.py b/aif360/sklearn/tests/test_datasets.py
index 9b00e801..3e5c8a4a 100644
--- a/aif360/sklearn/tests/test_datasets.py
+++ b/aif360/sklearn/tests/test_datasets.py
@@ -30,11 +30,11 @@ def test_sample_weight_basic():
     assert len(with_weights) == 3
     assert with_weights.X.shape == (3, 2)
 
-def test_pos_label_basic():
-    assert (basic().y == [3, 7, 11]).all()
-    assert (basic(pos_label=3).y == [1, 0, 0]).all()
-    assert (basic(pos_label=[3, 7, 11]).y == 1).all()
-    assert (basic(pos_label=lambda y: 10 > y > 5).y == [0, 1, 0]).all()
+# def test_pos_label_basic():
+#     assert (basic().y == [3, 7, 11]).all()
+#     assert (basic(pos_label=3).y == [1, 0, 0]).all()
+#     assert (basic(pos_label=[3, 7, 11]).y == 1).all()
+#     assert (basic(pos_label=lambda y: 10 > y > 5).y == [0, 1, 0]).all()
 
 def test_usecols_dropcols_basic():
     assert basic(usecols='X1').X.columns.tolist() == ['X1']
@@ -59,26 +59,26 @@ def test_numeric_only_basic():
     assert basic(dropcols='Z', numeric_only=True).X.shape == (3, 2)
     assert (basic(dropcols='X1', numeric_only=True).X.dtypes == 'int').all()
 
-def test_fetch_and_format_openml():
-    df = fetch_and_format_openml('german')
-    assert df.equals(df.select_dtypes(['number', 'category']))
+# def test_fetch_and_format_openml():
+#     df = fetch_and_format_openml('german')
+#     assert df.equals(df.select_dtypes(['number', 'category']))
 
-def test_load_adult():
-    adult = load_adult()
+def test_fetch_adult():
+    adult = fetch_adult()
     assert len(adult) == 3
     assert adult.X.shape == (45222, 13)
-    assert load_adult(dropna=False).X.shape == (48842, 13)
-    assert load_adult(numeric_only=True).X.shape == (48842, 5)
+    assert fetch_adult(dropna=False).X.shape == (48842, 13)
+    assert fetch_adult(numeric_only=True).X.shape == (48842, 7)
 
-def test_load_german():
-    german = load_german()
+def test_fetch_german():
+    german = fetch_german()
     assert len(german) == 2
     assert german.X.shape == (1000, 21)
-    assert load_german(numeric_only=True).X.shape == (1000, 7)
+    assert fetch_german(numeric_only=True).X.shape == (1000, 8)
 
-def test_load_bank():
-    bank = load_bank()
+def test_fetch_bank():
+    bank = fetch_bank()
     assert len(bank) == 2
     assert bank.X.shape == (45211, 15)
-    assert load_bank(dropcols=[]).X.shape == (45211, 16)
-    assert load_bank(numeric_only=True).X.shape == (45211, 6)
+    assert fetch_bank(dropcols=[]).X.shape == (45211, 16)
+    assert fetch_bank(numeric_only=True).X.shape == (45211, 6)
diff --git a/aif360/sklearn/tests/test_metrics.py b/aif360/sklearn/tests/test_metrics.py
index ed3636f8..9edf9146 100644
--- a/aif360/sklearn/tests/test_metrics.py
+++ b/aif360/sklearn/tests/test_metrics.py
@@ -2,19 +2,16 @@
 from sklearn.linear_model import LogisticRegression
 
 from aif360.datasets import AdultDataset
-from aif360.sklearn.datasets import load_adult
+from aif360.sklearn.datasets import fetch_adult
 from aif360.metrics import ClassificationMetric
 from aif360.sklearn.metrics import *
 
 
-X, y, sample_weight = load_adult(numeric_only=True)
-X.insert(2, 'race', X.index.get_level_values('race').to_series(index=X.index).map(
-        lambda r: r == 'White').astype('float'))
-X.insert(3, 'sex', X.index.get_level_values('sex').to_series(index=X.index).map(
-        {'Male': 1, 'Female': 0}).astype('float'))
+X, y, sample_weight = fetch_adult(numeric_only=True)
+y = y.factorize(sort=True)[0]
 y_pred = LogisticRegression(solver='liblinear').fit(X, y,
         sample_weight=sample_weight).predict(X)
-priv = 'sex == "Male"'
+priv = X.index.get_level_values('sex')
 adult = AdultDataset(instance_weights_name='fnlwgt', categorical_features=[],
         features_to_keep=['age', 'education-num', 'capital-gain', 'capital-loss',
                           'hours-per-week'], features_to_drop=[])
@@ -30,10 +27,10 @@ def test_dataset_equality():
     # print(adult.feature_names)
     # print(adult.features[:5])
     assert (adult.features == X.values).all()
-    assert (adult.labels == y.values).all()
+    assert (adult.labels.ravel() == y).all()
 
 def test_consistency():
-    assert consistency_score(X, y) == cm.consistency()
+    assert np.isclose(consistency_score(X, y), cm.consistency())
 
 def test_specificity():
     assert specificity_score(y, y_pred, sample_weight=sample_weight) == cm.specificity()
@@ -42,24 +39,24 @@ def test_selection_rate():
     assert selection_rate(y, y_pred, sample_weight=sample_weight) == cm.selection_rate()
 
 def test_disparate_impact():
-    assert disparate_impact_ratio(y, y_pred, priv_expr=priv,
+    assert disparate_impact_ratio(y, y_pred, groups=priv, priv_group='Male',
             sample_weight=sample_weight) == cm.disparate_impact()
 
 def test_statistical_parity():
-    assert statistical_parity_difference(y, y_pred, priv_expr=priv,
+    assert statistical_parity_difference(y, y_pred, groups=priv, priv_group='Male',
             sample_weight=sample_weight) == cm.statistical_parity_difference()
 
 def test_equal_opportunity():
-    assert equal_opportunity_difference(y, y_pred, priv_expr=priv,
+    assert equal_opportunity_difference(y, y_pred, groups=priv, priv_group='Male',
             sample_weight=sample_weight) == cm.equal_opportunity_difference()
 
 def test_average_odds_difference():
-    assert np.isclose(average_odds_difference(y, y_pred, priv_expr=priv,
+    assert np.isclose(average_odds_difference(y, y_pred, groups=priv, priv_group='Male',
                                               sample_weight=sample_weight),
                       cm.average_odds_difference())
 
 def test_average_odds_error():
-    assert np.isclose(average_odds_error(y, y_pred, priv_expr=priv,
+    assert np.isclose(average_odds_error(y, y_pred, groups=priv, priv_group='Male',
                                          sample_weight=sample_weight),
                       cm.average_abs_odds_difference())
 
@@ -68,5 +65,5 @@ def test_generalized_entropy_index():
                       cm.generalized_entropy_index())
 
 def test_between_group_generalized_entropy_index():
-    assert between_group_generalized_entropy_error(y, y_pred, priv_expr=priv) \
+    assert between_group_generalized_entropy_error(y, y_pred, groups=priv, priv_group='Male') \
         == cm.between_group_generalized_entropy_index()

From 15a8eb2de59e934ceff7db02eeb4408002d5f093 Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Wed, 5 Jun 2019 22:49:46 -0400
Subject: [PATCH 09/61] added travis badge to README

---
 aif360/sklearn/README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/aif360/sklearn/README.md b/aif360/sklearn/README.md
index fbaf9adc..59298f3f 100644
--- a/aif360/sklearn/README.md
+++ b/aif360/sklearn/README.md
@@ -1,5 +1,7 @@
 ## `aif360.sklearn`
 
+[![Build Status](https://travis-ci.org/IBM/AIF360.svg?branch=sklearn-compat)](https://travis-ci.org/IBM/AIF360)
+
 This is a wholly separate interface for interacting with data, viewing metrics,
 and running debiasing algorithms than the main AIF360 package. The purpose of
 this sub-package is to match scikit-learn paradigms/APIs for easier integration

From 3f594a42e5c04823a8ff6f1da9350783f03894bf Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Thu, 13 Jun 2019 10:06:25 -0400
Subject: [PATCH 10/61] updated todo with external blockers

---
 aif360/sklearn/README.md | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/aif360/sklearn/README.md b/aif360/sklearn/README.md
index 59298f3f..695b5eff 100644
--- a/aif360/sklearn/README.md
+++ b/aif360/sklearn/README.md
@@ -25,12 +25,15 @@ objects with sample properties (protected attributes) as the index
   - [ ] Generalized confusion matrix
   - [ ] Sample distortion metrics
 - [ ] Make inprocessing algorithms compatible as sklearn `Estimator`s
+  - [ ] **[External]** `get_feature_names()` from data preprocessing
+  steps that would remove DataFrame formatting
+    - [ ] SLEP008?
+  - [ ] Prejudice remover
   - [ ] Adversarial debiasing
   - [ ] Meta-fair classifier
-  - [ ] Prejudice remover
 - [ ] Make preprocessing algorithms compatible as sklearn `Transformer`s
-  - [ ] Add functionality to modify X and y (worst case: just `predict()` +
-  `transform()` separately)
+  - [ ] **[External]** Add functionality to modify X and y
+    - [ ] SLEP001
   - [ ] Disparate impact remover
   - [ ] Learning fair representations
   - [ ] Optimized preprocessing
@@ -38,7 +41,8 @@ objects with sample properties (protected attributes) as the index
     - [ ] Use dynamic object to pass sample_weight to estimator, etc. after they
     are fitted
 - [ ] Make postprocessing algorithms compatible
-  - [ ] Allow `fit()` on `y_true`,`y_pred`
+  - [ ] **[External]** Allow for `fit(y_true, y_pred)`
+    - [ ] New SLEP?
   - [ ] Calibrated equalized odds postprocessing
   - [ ] Equalized odds postprocessing
   - [ ] Reject option classification

From 7754b32e1bf4bb1e2c6700311efdf35b063e55e7 Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Thu, 13 Jun 2019 11:37:40 -0400
Subject: [PATCH 11/61] added reweighing workaround to example

---
 aif360/sklearn/examples/Getting Started.ipynb | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/aif360/sklearn/examples/Getting Started.ipynb b/aif360/sklearn/examples/Getting Started.ipynb
index 58031a8d..0df0db33 100644
--- a/aif360/sklearn/examples/Getting Started.ipynb	
+++ b/aif360/sklearn/examples/Getting Started.ipynb	
@@ -15,10 +15,12 @@
    "source": [
     "import numpy as np\n",
     "import pandas as pd\n",
+    "from sklearn.pipeline import make_pipeline\n",
     "from sklearn.linear_model import LogisticRegression\n",
     "from sklearn.metrics import recall_score\n",
     "from sklearn.model_selection import train_test_split\n",
     "\n",
+    "from aif360.sklearn.algorithms.preprocessing import Reweighing\n",
     "from aif360.sklearn.datasets import fetch_adult\n",
     "from aif360.sklearn.metrics import disparate_impact_ratio"
    ]
@@ -441,7 +443,13 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "pipe = make_pipeline(Reweighing(), LinearRegression())\n",
+    "# sample_weight_ will be updated after it is fit\n",
+    "fit_params = {'linearregression__sample_weight':\n",
+    "              pipe.named_steps.reweighing.sample_weight_}\n",
+    "pipe.fit(X, y, **fit_params)"
+   ]
   }
  ],
  "metadata": {

From 17b0c952c3ac776d15c46946359824bf3cdf8d16 Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Tue, 18 Jun 2019 16:55:49 -0400
Subject: [PATCH 12/61] added Reweighing algorithm

---
 aif360/sklearn/README.md                   |  11 +-
 aif360/sklearn/datasets/openml_datasets.py |  28 +++--
 aif360/sklearn/preprocessing/__init__.py   |   1 +
 aif360/sklearn/preprocessing/reweighing.py | 113 +++++++++++++++++++++
 aif360/sklearn/tests/test_reweighing.py    |  57 +++++++++++
 docs/source/modules/sklearn.rst            |   6 ++
 6 files changed, 203 insertions(+), 13 deletions(-)
 create mode 100644 aif360/sklearn/preprocessing/__init__.py
 create mode 100644 aif360/sklearn/preprocessing/reweighing.py
 create mode 100644 aif360/sklearn/tests/test_reweighing.py

diff --git a/aif360/sklearn/README.md b/aif360/sklearn/README.md
index 695b5eff..7912bb4c 100644
--- a/aif360/sklearn/README.md
+++ b/aif360/sklearn/README.md
@@ -27,19 +27,20 @@ objects with sample properties (protected attributes) as the index
 - [ ] Make inprocessing algorithms compatible as sklearn `Estimator`s
   - [ ] **[External]** `get_feature_names()` from data preprocessing
   steps that would remove DataFrame formatting
-    - [ ] SLEP008?
+    - [ ] SLEP008
   - [ ] Prejudice remover
   - [ ] Adversarial debiasing
   - [ ] Meta-fair classifier
 - [ ] Make preprocessing algorithms compatible as sklearn `Transformer`s
   - [ ] **[External]** Add functionality to modify X and y
-    - [ ] SLEP001
+    - [ ] SLEP005 - Resampling API
   - [ ] Disparate impact remover
   - [ ] Learning fair representations
   - [ ] Optimized preprocessing
-  - [ ] Reweighing
-    - [ ] Use dynamic object to pass sample_weight to estimator, etc. after they
-    are fitted
+  - [X] Reweighing
+    - [X] Use dynamic object to pass sample_weight to estimator, etc. after they
+    are fitted (NOTE: does not work with GridSearchCV)
+    - [ ] **[External]** SLEP006 - Sample properties
 - [ ] Make postprocessing algorithms compatible
   - [ ] **[External]** Allow for `fit(y_true, y_pred)`
     - [ ] New SLEP?
diff --git a/aif360/sklearn/datasets/openml_datasets.py b/aif360/sklearn/datasets/openml_datasets.py
index ac8c32d1..1aac923a 100644
--- a/aif360/sklearn/datasets/openml_datasets.py
+++ b/aif360/sklearn/datasets/openml_datasets.py
@@ -93,18 +93,18 @@ def fetch_adult(subset='all', data_home=None, binary_race=True, usecols=[],
                               usecols=usecols, dropcols=dropcols,
                               numeric_only=numeric_only, dropna=dropna)
 
-def fetch_german(data_home=None, usecols=[], dropcols=[], numeric_only=False,
-                 dropna=True):
+def fetch_german(data_home=None, binary_age=False, usecols=[], dropcols=[],
+                 numeric_only=False, dropna=True):
     """Load the German Credit Dataset.
 
     Protected attributes are 'sex' ('male' is privileged and 'female' is
-    unprivileged) and 'age' (left as continuous but [1]_ recommends `age >= 25`
-    be considered privileged and `age < 25` be considered unprivileged; this can
-    be done at metric evaluation time). The outcome variable is 'good'
-    (favorable) or 'bad' (unfavorable).
+    unprivileged) and 'age' (left as continuous but [#kamiran09]_ recommends
+    `age >= 25` be considered privileged and `age < 25` be considered
+    unprivileged; this can be done at metric evaluation time). The outcome
+    variable is 'good' (favorable) or 'bad' (unfavorable).
 
     References:
-        .. [1] F. Kamiran and T. Calders, "Classifying without
+        .. [#kamiran09] F. Kamiran and T. Calders, "Classifying without
            discriminating," 2nd International Conference on Computer,
            Control and Communication, 2009.
 
@@ -131,7 +131,15 @@ def fetch_german(data_home=None, usecols=[], dropcols=[], numeric_only=False,
         >>> german_num.X.shape
         (1000, 7)
 
-        >>> DISPARATE IMPACT AGE EXAMPLE HERE
+
+
+        >>> X, y = fetch_german(numeric_only=True)
+        >>> y_pred = LogisticRegression().fit(X, y).predict(X)
+        >>> age = X.index.get_level_values('age') >= 25
+        >>> disparate_impact_ratio(y, y_pred, groups=age, priv_group=True,
+        ... pos_label='good')
+        0.9483094846144106
+
     """
     df = to_dataframe(fetch_openml(data_id=31, data_home=data_home or
                                    DATA_HOME_DEFAULT, target_column=None))
@@ -139,6 +147,10 @@ def fetch_german(data_home=None, usecols=[], dropcols=[], numeric_only=False,
     df = df.rename(columns={'class': 'credit-risk'})  # more descriptive name
     df['credit-risk'] = df['credit-risk'].cat.as_ordered()  # 'bad' < 'good'
 
+    # binarize protected attributes
+    if binary_age:
+        df.age = pd.cut(df.age, [0, 25, 100], right=False, labels=['young', 'aged'])
+
     # Note: marital_status directly implies sex. i.e. 'div/dep/mar' => 'female'
     # and all others => 'male'
     personal_status = df.pop('personal_status').str.split(expand=True)
diff --git a/aif360/sklearn/preprocessing/__init__.py b/aif360/sklearn/preprocessing/__init__.py
new file mode 100644
index 00000000..8cac812f
--- /dev/null
+++ b/aif360/sklearn/preprocessing/__init__.py
@@ -0,0 +1 @@
+from aif360.sklearn.preprocessing.reweighing import *
diff --git a/aif360/sklearn/preprocessing/reweighing.py b/aif360/sklearn/preprocessing/reweighing.py
new file mode 100644
index 00000000..f61d7643
--- /dev/null
+++ b/aif360/sklearn/preprocessing/reweighing.py
@@ -0,0 +1,113 @@
+from warnings import warn
+
+import numpy as np
+from sklearn.base import BaseEstimator, TransformerMixin, MetaEstimatorMixin
+from sklearn.base import clone
+from sklearn.utils.metaestimators import if_delegate_has_method
+
+class Reweighing(BaseEstimator, TransformerMixin):
+    """Reweighing is a preprocessing technique that weights the examples in each
+    (group, label) combination differently to ensure fairness before
+    classification [#kamiran12]_.
+
+    Attributes:
+        groups_ (array, shape (n_groups,)): A list of group labels known to the
+            transformer.
+        classes_ (array, shape (n_classes,)): A list of class labels known to
+            the transformer.
+        sample_weight_ (array, shape (n_samples,)): New sample weights after
+            transformation. See examples for details.
+        reweigh_factors_ (array, shape (n_groups, n_labels)): Reweighing factors
+            for each combination of group and class labels used to debias
+            samples. Existing sample weights are multiplied by the corresponding
+            factor for that sample's group and class.
+
+    Examples:
+        >>> pipe = make_pipeline(Reweighing(), LinearRegression())
+        >>> # sample_weight_ will be used after it is fit
+        >>> fit_params = {'linearregression__sample_weight':
+        ...               pipe['reweighing'].sample_weight_}
+        >>> pipe.fit(X, y, **fit_params)
+
+    References:
+        .. [#kamiran12] F. Kamiran and T. Calders,  "Data Preprocessing
+           Techniques for Classification without Discrimination," Knowledge and
+           Information Systems, 2012.
+    """
+    # TODO: binary option for groups/labels?
+    def __init__(self):
+        self.sample_weight_ = np.empty(0)  # dynamic object for use in Pipeline
+
+    def fit(self, X, y=None):
+        raise NotImplementedError("Only 'fit_transform' is allowed.")
+
+    def transform(self, X):
+        raise NotImplementedError("Only 'fit_transform' is allowed.")
+
+    def fit_transform(self, X, y, groups, sample_weight=None):
+        """Compute the factors for reweighing the dataset and transform the
+        sample weights.
+
+        Args:
+            X (array-like): Training samples.
+            y (array-like): Training labels.
+            groups (array-like): Protected attributes corresponding to samples.
+            sample_weight (array-like, optional): Sample weights.
+
+        Returns:
+            X: Unchanged samples. Only the sample weights are different after
+            transformation (see the `sample_weight_` attribute).
+        """
+        if sample_weight is None:
+            sample_weight = np.ones(y.shape)
+        # resize all references (might be part of a Pipeline)
+        self.sample_weight_.resize(sample_weight.shape, refcheck=False)
+        self.groups_ = np.unique(groups)
+        self.classes_ = np.unique(y)
+
+        def N_(i): return sample_weight[i].sum()
+
+        N = sample_weight.sum()
+        for g in self.groups_:
+            for c in self.classes_:
+                g_and_c = (groups == g) & (y == c)
+                if np.any(g_and_c):
+                    W_gc = N_(groups == g) * N_(y == c) / (N * N_(g_and_c))
+                    self.sample_weight_[g_and_c] = W_gc * sample_weight[g_and_c]
+        return X
+
+
+class ReweighingMeta(BaseEstimator, MetaEstimatorMixin):
+    def __init__(self, estimator):
+        self.reweigher = Reweighing()
+        self.estimator = estimator
+
+    def fit(self, X, y, pa_groups, sample_weight=None):
+        self.reweigher_ = clone(self.reweigher)
+        self.estimator_ = clone(self.estimator)
+
+        self.reweigher_.fit_transform(X, y, pa_groups, sample_weight=sample_weight)
+        try:
+            self.estimator_.fit(X, y, sample_weight=self.reweigher_.sample_weight_)
+        except TypeError:
+            raise ValueError("'estimator' ({}) does not incorporate "
+                             "'sample_weight' in 'fit()''.".format(
+                                     type(self.estimator_)))
+        return self
+
+    @if_delegate_has_method('estimator')
+    def predict(self, X):
+        return self.estimator_.predict(X)
+
+    @if_delegate_has_method('estimator')
+    def predict_proba(self, X):
+        return self.estimator_.predict_proba(X)
+
+    @if_delegate_has_method('estimator')
+    def predict_log_proba(self, X):
+        return self.estimator_.predict_log_proba(X)
+
+    # TODO: sample_weight isn't passed by GridSearchCV.score()
+    @if_delegate_has_method('estimator')
+    def score(self, X, y, sample_weight=None):
+        return self.estimator_.score(X, y, sample_weight=sample_weight)
diff --git a/aif360/sklearn/tests/test_reweighing.py b/aif360/sklearn/tests/test_reweighing.py
new file mode 100644
index 00000000..ec42c0e9
--- /dev/null
+++ b/aif360/sklearn/tests/test_reweighing.py
@@ -0,0 +1,57 @@
+import numpy as np
+from sklearn.linear_model import LogisticRegression
+from sklearn.model_selection import GridSearchCV
+from sklearn.pipeline import make_pipeline
+
+from aif360.datasets import GermanDataset
+from aif360.sklearn.datasets import fetch_german
+from aif360.algorithms.preprocessing import Reweighing as OrigReweighing
+from aif360.sklearn.preprocessing import Reweighing, ReweighingMeta
+
+
+X, y = fetch_german(numeric_only=True, binary_age=True, dropcols='duration')
+german = GermanDataset(categorical_features=[], features_to_keep=[
+        'credit_amount', 'investment_as_income_percentage', 'residence_since',
+        'age', 'number_of_credits', 'people_liable_for', 'sex'])
+
+def test_dataset_equality():
+    assert (german.features == X.values).all()
+
+def test_reweighing_sex():
+    orig_rew = OrigReweighing(unprivileged_groups=[{'sex': 0}],
+                              privileged_groups=[{'sex': 1}])
+    german_fair = orig_rew.fit_transform(german)
+    rew = Reweighing()
+    rew.fit_transform(X, y, groups=X.index.get_level_values('sex'))
+
+    # assert orig_rew.w_up_unfav == rew.reweigh_factors_[0, 0]
+    # assert orig_rew.w_up_fav == rew.reweigh_factors_[0, 1]
+    # assert np.isclose(orig_rew.w_p_unfav, rew.reweigh_factors_[1, 0])
+    # assert orig_rew.w_p_fav, rew.reweigh_factors_[1, 1]
+    assert np.allclose(german_fair.instance_weights, rew.sample_weight_)
+
+def test_reweighing_intersection():
+    rew = Reweighing()
+    rew.fit_transform(X, y, groups=X.index.to_flat_index())
+    # assert rew.reweigh_factors_.shape == (4, 2)
+    assert len(rew.groups_) == 4
+
+def test_pipeline():
+    logreg = LogisticRegression(solver='liblinear')
+    pipe = make_pipeline(Reweighing(), logreg)
+    fit_params = {'logisticregression__sample_weight': pipe[0].sample_weight_,
+                  'reweighing__groups': X.index.get_level_values('sex')}
+    pipe.fit(X, y, **fit_params)
+    assert (logreg.fit(X, y, sample_weight=pipe[0].sample_weight_).coef_
+         == pipe[-1].coef_).all()
+
+def test_gridsearch():
+    rew = ReweighingMeta(LogisticRegression(solver='liblinear'))
+    params = {'estimator__C': [1, 10]}
+    clf = GridSearchCV(rew, params, cv=5)
+    # TODO: 'groups' name clashes with CV splitter
+    fit_params = {'pa_groups': X.index.get_level_values('sex'),
+                  'sample_weight': np.random.random(y.shape)}
+    clf.fit(X, y, **fit_params)
+    # print(clf.score(X, y))
+    assert len(clf.best_estimator_.reweigher_.groups_) == 2
diff --git a/docs/source/modules/sklearn.rst b/docs/source/modules/sklearn.rst
index 273f8256..5a9fdb15 100644
--- a/docs/source/modules/sklearn.rst
+++ b/docs/source/modules/sklearn.rst
@@ -17,3 +17,9 @@ Metrics
 
 .. automodule:: aif360.sklearn.metrics.metrics
     :members:
+
+Preprocessing
+-------------
+
+.. autoclass:: aif360.sklearn.preprocessing.reweighing.Reweighing
+    :members:

From cc9246f9cb9e1a0845965a61cd546b045668bea1 Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Tue, 18 Jun 2019 16:59:07 -0400
Subject: [PATCH 13/61] clean up comments

---
 aif360/sklearn/datasets/utils.py        | 12 ------------
 aif360/sklearn/metrics/metrics.py       |  4 ++--
 aif360/sklearn/tests/test_datasets.py   | 10 ----------
 aif360/sklearn/tests/test_metrics.py    |  4 ----
 aif360/sklearn/tests/test_reweighing.py |  6 +-----
 5 files changed, 3 insertions(+), 33 deletions(-)

diff --git a/aif360/sklearn/datasets/utils.py b/aif360/sklearn/datasets/utils.py
index b5fff624..e9cce52f 100644
--- a/aif360/sklearn/datasets/utils.py
+++ b/aif360/sklearn/datasets/utils.py
@@ -17,11 +17,6 @@ def standarize_dataset(df, protected_attributes, target, sample_weight=None,
             are dropped from the features, they remain in the index.
         target (single label or list-like): Column label of the target (outcome)
             variable.
-        # pos_label (scalar, list-like, or function, optional): A value, list of
-        #     values, or boolean function (True if positive) designating the
-        #     positive binary label from the raw data. All others will be
-        #     considered negative. The resulting target array will have value 1 if
-        #     positive and 0 if negative.
         sample_weight (single label, optional): Name of the column containing
             sample weights.
         usecols (single label or list-like, optional): Column(s) to keep. All
@@ -65,13 +60,6 @@ def standarize_dataset(df, protected_attributes, target, sample_weight=None,
 
     # TODO: convert to 1/0 if numeric_only?
     y = df.pop(target)
-    # if not callable(pos_label):
-    #     if not is_list_like(pos_label):
-    #         pos_label = [pos_label]
-    #     # find all instances which match any of the favorable classes
-    #     y = y.isin(pos_label).astype('int')
-    # else:
-    #     y = y.apply(pos_label).astype('int')
 
     # Column-wise drops
     df = df.drop(dropcols, axis=1)
diff --git a/aif360/sklearn/metrics/metrics.py b/aif360/sklearn/metrics/metrics.py
index a0e4f813..d4da5c81 100644
--- a/aif360/sklearn/metrics/metrics.py
+++ b/aif360/sklearn/metrics/metrics.py
@@ -16,7 +16,7 @@ def difference(func, y, *args, groups, priv_group=1, sample_weight=None, **kwarg
     Args:
         func (function): A metric function from `aif360.sklearn.metrics` or
             `sklearn.metrics`.
-        y (pandas.Series): Outcome vector with protected attributes as index.
+        y (array-like): Outcome vector with protected attributes as index.
         *args: Additional positional args to be passed through to `func`.
         groups (array-like, keyword-only): Group labels (protected attributes)
             for the samples.
@@ -55,7 +55,7 @@ def ratio(func, y, *args, groups, priv_group=1, sample_weight=None, **kwargs):
     Args:
         func (function): A metric function from `aif360.sklearn.metrics` or
             `sklearn.metrics`.
-        y (pandas.Series): Outcome vector with protected attributes as index.
+        y (array-like): Outcome vector with protected attributes as index.
         *args: Additional positional args to be passed through to `func`.
         groups (array-like, keyword-only): Group labels (protected attributes)
             for the samples.
diff --git a/aif360/sklearn/tests/test_datasets.py b/aif360/sklearn/tests/test_datasets.py
index 3e5c8a4a..eab905f3 100644
--- a/aif360/sklearn/tests/test_datasets.py
+++ b/aif360/sklearn/tests/test_datasets.py
@@ -30,12 +30,6 @@ def test_sample_weight_basic():
     assert len(with_weights) == 3
     assert with_weights.X.shape == (3, 2)
 
-# def test_pos_label_basic():
-#     assert (basic().y == [3, 7, 11]).all()
-#     assert (basic(pos_label=3).y == [1, 0, 0]).all()
-#     assert (basic(pos_label=[3, 7, 11]).y == 1).all()
-#     assert (basic(pos_label=lambda y: 10 > y > 5).y == [0, 1, 0]).all()
-
 def test_usecols_dropcols_basic():
     assert basic(usecols='X1').X.columns.tolist() == ['X1']
     assert basic(usecols=['X1', 'Z']).X.columns.tolist() == ['X1', 'Z']
@@ -59,10 +53,6 @@ def test_numeric_only_basic():
     assert basic(dropcols='Z', numeric_only=True).X.shape == (3, 2)
     assert (basic(dropcols='X1', numeric_only=True).X.dtypes == 'int').all()
 
-# def test_fetch_and_format_openml():
-#     df = fetch_and_format_openml('german')
-#     assert df.equals(df.select_dtypes(['number', 'category']))
-
 def test_fetch_adult():
     adult = fetch_adult()
     assert len(adult) == 3
diff --git a/aif360/sklearn/tests/test_metrics.py b/aif360/sklearn/tests/test_metrics.py
index 9edf9146..a2db21ec 100644
--- a/aif360/sklearn/tests/test_metrics.py
+++ b/aif360/sklearn/tests/test_metrics.py
@@ -22,10 +22,6 @@
                           privileged_groups=[{'sex': 1}])
 
 def test_dataset_equality():
-    # print(X.shape, adult.features.shape)
-    # print(X.head())
-    # print(adult.feature_names)
-    # print(adult.features[:5])
     assert (adult.features == X.values).all()
     assert (adult.labels.ravel() == y).all()
 
diff --git a/aif360/sklearn/tests/test_reweighing.py b/aif360/sklearn/tests/test_reweighing.py
index ec42c0e9..30e8f37a 100644
--- a/aif360/sklearn/tests/test_reweighing.py
+++ b/aif360/sklearn/tests/test_reweighing.py
@@ -24,17 +24,13 @@ def test_reweighing_sex():
     rew = Reweighing()
     rew.fit_transform(X, y, groups=X.index.get_level_values('sex'))
 
-    # assert orig_rew.w_up_unfav == rew.reweigh_factors_[0, 0]
-    # assert orig_rew.w_up_fav == rew.reweigh_factors_[0, 1]
-    # assert np.isclose(orig_rew.w_p_unfav, rew.reweigh_factors_[1, 0])
-    # assert orig_rew.w_p_fav, rew.reweigh_factors_[1, 1]
     assert np.allclose(german_fair.instance_weights, rew.sample_weight_)
 
 def test_reweighing_intersection():
     rew = Reweighing()
     rew.fit_transform(X, y, groups=X.index.to_flat_index())
-    # assert rew.reweigh_factors_.shape == (4, 2)
     assert len(rew.groups_) == 4
+    assert len(rew.classes_) == 2
 
 def test_pipeline():
     logreg = LogisticRegression(solver='liblinear')

From 8c58f650f12ceb04a2f066b791ec1b03c331ea85 Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Tue, 18 Jun 2019 16:59:50 -0400
Subject: [PATCH 14/61] fixed package version in docs

---
 docs/source/conf.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/source/conf.py b/docs/source/conf.py
index 66493140..03058220 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -67,10 +67,10 @@
 # |version| and |release|, also used in various other places throughout the
 # built documents.
 #
-# The short X.Y version.
-version = u'0.1'
 # The full version, including alpha/beta/rc tags.
-release = u'0.1.0'
+release = aif360.__version__
+# The short X.Y version.
+version = '.'.join(release.split('.')[:2])
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.

From 1e7899c42498e30ccf2db3435e127014f5fe80a0 Mon Sep 17 00:00:00 2001
From: Animesh Singh <singhan@us.ibm.com>
Date: Wed, 19 Jun 2019 22:48:43 -0700
Subject: [PATCH 15/61] adding hyperlinks to SLEPs

---
 aif360/sklearn/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/aif360/sklearn/README.md b/aif360/sklearn/README.md
index 7912bb4c..558ac562 100644
--- a/aif360/sklearn/README.md
+++ b/aif360/sklearn/README.md
@@ -27,13 +27,13 @@ objects with sample properties (protected attributes) as the index
 - [ ] Make inprocessing algorithms compatible as sklearn `Estimator`s
   - [ ] **[External]** `get_feature_names()` from data preprocessing
   steps that would remove DataFrame formatting
-    - [ ] SLEP008
+    - [ ] [SLEP008](https://github.com/scikit-learn/enhancement_proposals/pull/18 )?
   - [ ] Prejudice remover
   - [ ] Adversarial debiasing
   - [ ] Meta-fair classifier
 - [ ] Make preprocessing algorithms compatible as sklearn `Transformer`s
   - [ ] **[External]** Add functionality to modify X and y
-    - [ ] SLEP005 - Resampling API
+    - [ ] [SLEP001](https://github.com/scikit-learn/enhancement_proposals/blob/master/slep001/proposal.rst)
   - [ ] Disparate impact remover
   - [ ] Learning fair representations
   - [ ] Optimized preprocessing

From c1c1e4052d6738b70d78c862f8935185bf24ecec Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Mon, 24 Jun 2019 10:46:03 -0400
Subject: [PATCH 16/61] added binary_age opt to german; fixed NAs in bank

---
 aif360/sklearn/datasets/openml_datasets.py | 37 +++++++++++-----------
 1 file changed, 19 insertions(+), 18 deletions(-)

diff --git a/aif360/sklearn/datasets/openml_datasets.py b/aif360/sklearn/datasets/openml_datasets.py
index 1aac923a..98d0aa31 100644
--- a/aif360/sklearn/datasets/openml_datasets.py
+++ b/aif360/sklearn/datasets/openml_datasets.py
@@ -72,8 +72,8 @@ def fetch_adult(subset='all', data_home=None, binary_race=True, usecols=[],
     if subset not in {'train', 'test', 'all'}:
         raise ValueError("subset must be either 'train', 'test', or 'all'; "
                          "cannot be {}".format(subset))
-    df = to_dataframe(fetch_openml(data_id=1590, data_home=data_home or
-                                   DATA_HOME_DEFAULT, target_column=None))
+    df = to_dataframe(fetch_openml(data_id=1590, target_column=None,
+                                   data_home=data_home or DATA_HOME_DEFAULT))
     if subset == 'train':
         df = df.iloc[16281:]
     elif subset == 'test':
@@ -93,15 +93,15 @@ def fetch_adult(subset='all', data_home=None, binary_race=True, usecols=[],
                               usecols=usecols, dropcols=dropcols,
                               numeric_only=numeric_only, dropna=dropna)
 
-def fetch_german(data_home=None, binary_age=False, usecols=[], dropcols=[],
+def fetch_german(data_home=None, binary_age=True, usecols=[], dropcols=[],
                  numeric_only=False, dropna=True):
     """Load the German Credit Dataset.
 
     Protected attributes are 'sex' ('male' is privileged and 'female' is
-    unprivileged) and 'age' (left as continuous but [#kamiran09]_ recommends
-    `age >= 25` be considered privileged and `age < 25` be considered
-    unprivileged; this can be done at metric evaluation time). The outcome
-    variable is 'good' (favorable) or 'bad' (unfavorable).
+    unprivileged) and 'age' (binarized by default as recommended by
+    [#kamiran09]_: `age >= 25` is considered privileged and `age < 25` is
+    considered unprivileged; see the `binary_age` flag to keep this continuous).
+    The outcome variable is 'good' (favorable) or 'bad' (unfavorable).
 
     References:
         .. [#kamiran09] F. Kamiran and T. Calders, "Classifying without
@@ -112,6 +112,9 @@ def fetch_german(data_home=None, binary_age=False, usecols=[], dropcols=[],
         data_home (string, optional): Specify another download and cache folder
             for the datasets. By default all AIF360 datasets are stored in
             'aif360/sklearn/data/raw' subfolders.
+        binary_age (bool, optional): If `True`, split protected attribute,
+            `age`, into 'aged' (privileged) and 'youth' (unprivileged). The
+            `age` feature remains continuous.
         usecols (single label or list-like, optional): Column name(s) to keep.
             All others are dropped.
         dropcols (single label or list-like, optional): Column name(s) to drop.
@@ -135,21 +138,20 @@ def fetch_german(data_home=None, binary_age=False, usecols=[], dropcols=[],
 
         >>> X, y = fetch_german(numeric_only=True)
         >>> y_pred = LogisticRegression().fit(X, y).predict(X)
-        >>> age = X.index.get_level_values('age') >= 25
-        >>> disparate_impact_ratio(y, y_pred, groups=age, priv_group=True,
+        >>> disparate_impact_ratio(y, y_pred, prot_attr='age', priv_group=True,
         ... pos_label='good')
         0.9483094846144106
 
     """
-    df = to_dataframe(fetch_openml(data_id=31, data_home=data_home or
-                                   DATA_HOME_DEFAULT, target_column=None))
+    df = to_dataframe(fetch_openml(data_id=31, target_column=None,
+                                   data_home=data_home or DATA_HOME_DEFAULT))
 
     df = df.rename(columns={'class': 'credit-risk'})  # more descriptive name
     df['credit-risk'] = df['credit-risk'].cat.as_ordered()  # 'bad' < 'good'
 
-    # binarize protected attributes
-    if binary_age:
-        df.age = pd.cut(df.age, [0, 25, 100], right=False, labels=['young', 'aged'])
+    # binarize protected attribute (but not corresponding feature)
+    age = (pd.cut(df.age, [0, 25, 100], right=False, labels=['young', 'aged'])
+           if binary_age else 'age')
 
     # Note: marital_status directly implies sex. i.e. 'div/dep/mar' => 'female'
     # and all others => 'male'
@@ -158,7 +160,7 @@ def fetch_german(data_home=None, binary_age=False, usecols=[], dropcols=[],
     df = df.join(personal_status.astype('category'))
     df.sex = df.sex.cat.as_ordered()  # 'female' < 'male'
 
-    return standarize_dataset(df, protected_attributes=['sex', 'age'],
+    return standarize_dataset(df, protected_attributes=['sex', age],
                               target='credit-risk', usecols=usecols,
                               dropcols=dropcols, numeric_only=numeric_only,
                               dropna=dropna)
@@ -210,9 +212,8 @@ def fetch_bank(data_home=None, percent10=False, usecols=[], dropcols='duration',
     df.deposit = df.deposit.cat.rename_categories({'1': 'no', '2': 'yes'})
     # df.deposit = df.deposit.cat.as_ordered()
     # replace 'unknown' marker with NaN
-    df.select_dtypes('category').apply(lambda s:
-            s.cat.remove_categories('unknown', inplace=True)
-            if 'unknown' in s.cat.categories else s)
+    df.apply(lambda s: s.cat.remove_categories('unknown', inplace=True)
+             if hasattr(s, 'cat') and 'unknown' in s.cat.categories else s)
     return standarize_dataset(df, protected_attributes='age', target='deposit',
                               usecols=usecols, dropcols=dropcols,
                               numeric_only=numeric_only, dropna=dropna)

From 93a7cdf00bb606ff91ff2f9bd0a02aec54f71765 Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Mon, 24 Jun 2019 10:53:53 -0400
Subject: [PATCH 17/61] modified onehot_transformer to return DataFrame

---
 aif360/sklearn/datasets/utils.py      | 47 ++++++++++++++++++++++-----
 aif360/sklearn/tests/test_datasets.py |  7 +++-
 2 files changed, 45 insertions(+), 9 deletions(-)

diff --git a/aif360/sklearn/datasets/utils.py b/aif360/sklearn/datasets/utils.py
index e9cce52f..4566c983 100644
--- a/aif360/sklearn/datasets/utils.py
+++ b/aif360/sklearn/datasets/utils.py
@@ -2,8 +2,9 @@
 
 import pandas as pd
 from pandas.core.dtypes.common import is_list_like
-from sklearn.compose import make_column_transformer
+from sklearn.compose import ColumnTransformer
 from sklearn.preprocessing import OneHotEncoder
+from sklearn.utils.validation import check_is_fitted
 
 def standarize_dataset(df, protected_attributes, target, sample_weight=None,
                        usecols=[], dropcols=[], numeric_only=False, dropna=True):
@@ -56,7 +57,9 @@ def standarize_dataset(df, protected_attributes, target, sample_weight=None,
         >>> X, y = standarize_dataset(df, protected_attributes=0, target=5)
         >>> X_tr, X_te, y_tr, y_te = train_test_split(X, y)
     """
-    df = df.set_index(protected_attributes, drop=False)  # TODO: append=True?
+    df = df.set_index(protected_attributes, drop=False, append=True)
+    # df = df.set_index(sample_weight or np.ones(df.shape[0]), append=True)
+    # df.index = df.index.set_names('sample_weight', level=-1)
 
     # TODO: convert to 1/0 if numeric_only?
     y = df.pop(target)
@@ -84,20 +87,48 @@ def standarize_dataset(df, protected_attributes, target, sample_weight=None,
         y = y.loc[notna]
 
     if sample_weight is not None:
-        sample_weight = df.pop(sample_weight)
         return namedtuple('WeightedDataset', ['X', 'y', 'sample_weight'])(
-                          df, y, sample_weight)
+                          df, y, df.pop(sample_weight).rename('sample_weight'))
     return namedtuple('Dataset', ['X', 'y'])(df, y)
 
-def make_onehot_transformer(X):
+def make_onehot_transformer():
     """Shortcut for encoding categorical features as one-hot vectors.
 
     Note:
-        This changes the column order as well as removes DataFrame formatting.
+        This changes the column order.
 
     Returns:
         sklearn.compose.ColumnTransformer: Class capable of transforming
         categorical features in X to one-hot features.
     """
-    return make_column_transformer((OneHotEncoder(), X.dtypes == 'category'),
-                                   remainder='passthrough')
+    class PandasOutOneHotTransformer(ColumnTransformer):
+        def __init__(self):
+            ohe = ('onehotencoder', OneHotEncoder(),
+                   lambda X: X.dtypes == 'category')
+            super().__init__([ohe], remainder='passthrough')
+
+        def get_feature_names(self):
+            check_is_fitted(self, 'transformers_')
+            dummies = self.named_transformers_.onehotencoder.get_feature_names(
+                    input_features=self.ohe_input_features_)
+            passthroughs = self.passthrough_features_
+            return list(dummies) + list(passthroughs)
+
+        def fit(self, X, y=None):
+            self.ohe_input_features_ = X.columns[X.dtypes == 'category']
+            self.passthrough_features_ = X.columns[X.dtypes != 'category']
+            return super().fit(X, y=y)
+
+        def fit_transform(self, X, y=None):
+            Xt = super().fit_transform(X, y=y)
+            self.ohe_input_features_ = X.columns[X.dtypes == 'category']
+            self.passthrough_features_ = X.columns[X.dtypes != 'category']
+            columns = self.get_feature_names()
+            return pd.DataFrame(Xt, columns=columns, index=X.index)
+
+        def transform(self, X):
+            Xt = super().transform(X)
+            columns = self.get_feature_names()
+            return pd.DataFrame(Xt, columns=columns, index=X.index)
+
+    return PandasOutOneHotTransformer()
diff --git a/aif360/sklearn/tests/test_datasets.py b/aif360/sklearn/tests/test_datasets.py
index eab905f3..4253bcd8 100644
--- a/aif360/sklearn/tests/test_datasets.py
+++ b/aif360/sklearn/tests/test_datasets.py
@@ -4,7 +4,8 @@
 import pandas as pd
 import pytest
 
-from aif360.sklearn.datasets import *
+from aif360.sklearn.datasets import fetch_adult, fetch_bank, fetch_german
+from aif360.sklearn.datasets import standarize_dataset, make_onehot_transformer
 
 
 df = pd.DataFrame([[1, 2, 3, 'a'], [5, 6, 7, 'b'], [np.NaN, 10, 11, 'c']],
@@ -72,3 +73,7 @@ def test_fetch_bank():
     assert bank.X.shape == (45211, 15)
     assert fetch_bank(dropcols=[]).X.shape == (45211, 16)
     assert fetch_bank(numeric_only=True).X.shape == (45211, 6)
+
+def test_onehot_transformer():
+    X, y = fetch_german()
+    assert len(make_onehot_transformer().fit_transform(X).columns) == 63

From 8e52268d69a0739a757f29a9a2975b65eae61e77 Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Mon, 24 Jun 2019 10:58:46 -0400
Subject: [PATCH 18/61] tweaks to reweighing to conform with sklearn

---
 aif360/sklearn/preprocessing/__init__.py   |   2 +-
 aif360/sklearn/preprocessing/reweighing.py | 100 ++++++++++++++-------
 aif360/sklearn/tests/test_reweighing.py    |  71 +++++++--------
 3 files changed, 105 insertions(+), 68 deletions(-)

diff --git a/aif360/sklearn/preprocessing/__init__.py b/aif360/sklearn/preprocessing/__init__.py
index 8cac812f..f49b7673 100644
--- a/aif360/sklearn/preprocessing/__init__.py
+++ b/aif360/sklearn/preprocessing/__init__.py
@@ -1 +1 @@
-from aif360.sklearn.preprocessing.reweighing import *
+from aif360.sklearn.preprocessing.reweighing import Reweighing, ReweighingMeta
diff --git a/aif360/sklearn/preprocessing/reweighing.py b/aif360/sklearn/preprocessing/reweighing.py
index f61d7643..58cb13eb 100644
--- a/aif360/sklearn/preprocessing/reweighing.py
+++ b/aif360/sklearn/preprocessing/reweighing.py
@@ -1,11 +1,23 @@
-from warnings import warn
-
 import numpy as np
-from sklearn.base import BaseEstimator, TransformerMixin, MetaEstimatorMixin
-from sklearn.base import clone
+from pandas.core.dtypes.common import is_list_like
+from sklearn.base import BaseEstimator, MetaEstimatorMixin, clone
+from sklearn.utils import check_consistent_length
 from sklearn.utils.metaestimators import if_delegate_has_method
+from sklearn.utils.validation import column_or_1d, has_fit_parameter
+
+
+def check_inputs(X, y, sample_weight):
+    if not hasattr(X, 'index'):
+        raise TypeError("Expected `DataFrame`, got {} instead.".format(type(X)))
+    y = column_or_1d(y)
+    if sample_weight is not None:
+        sample_weight = column_or_1d(sample_weight)
+    else:
+        sample_weight = np.ones(X.shape[0])
+    check_consistent_length(X, y, sample_weight)
+    return X, y, sample_weight
 
-class Reweighing(BaseEstimator, TransformerMixin):
+class Reweighing(BaseEstimator):
     """Reweighing is a preprocessing technique that weights the examples in each
     (group, label) combination differently to ensure fairness before
     classification [#kamiran12]_.
@@ -34,65 +46,90 @@ class Reweighing(BaseEstimator, TransformerMixin):
            Techniques for Classification without Discrimination," Knowledge and
            Information Systems, 2012.
     """
-    # TODO: binary option for groups/labels?
-    def __init__(self):
-        self.sample_weight_ = np.empty(0)  # dynamic object for use in Pipeline
 
-    def fit(self, X, y=None):
-        raise NotImplementedError("Only 'fit_transform' is allowed.")
+    def __init__(self, prot_attr=None):
+        """
+        Args:
+            prot_attr (single label or list-like, optional): Protected
+                attribute(s) to use as sensitive attribute(s) in the reweighing
+                process. If more than one attribute, all combinations of values
+                (intersections) are considered. Default is `None` meaning all
+                protected attributes from the dataset are used.
+        """
+        self.prot_attr = prot_attr
 
-    def transform(self, X):
-        raise NotImplementedError("Only 'fit_transform' is allowed.")
+    def fit(self, X, y, sample_weight=None):
+        self.fit_transform(X, y, sample_weight=sample_weight)
+        return self
 
-    def fit_transform(self, X, y, groups, sample_weight=None):
+    def fit_transform(self, X, y, sample_weight=None):
         """Compute the factors for reweighing the dataset and transform the
         sample weights.
 
         Args:
             X (array-like): Training samples.
             y (array-like): Training labels.
-            groups (array-like): Protected attributes corresponding to samples.
             sample_weight (array-like, optional): Sample weights.
 
         Returns:
             X: Unchanged samples. Only the sample weights are different after
             transformation (see the `sample_weight_` attribute).
         """
-        if sample_weight is None:
-            sample_weight = np.ones(y.shape)
-        # resize all references (might be part of a Pipeline)
-        self.sample_weight_.resize(sample_weight.shape, refcheck=False)
+        X, y, sample_weight = check_inputs(X, y, sample_weight)
+
+        all_prot_attrs = X.index.names[1:]
+        if self.prot_attr is None:
+            self.prot_attr_ = all_prot_attrs
+        elif not is_list_like(self.prot_attr):
+            self.prot_attr_ = [self.prot_attr]
+        else:
+            self.prot_attr_ = self.prot_attr
+
+        if any(p not in X.index.names for p in self.prot_attr_):
+            raise ValueError("Some of the attributes provided are not present "
+                             "in the dataset. Expected a subset of:\n{}\nGot:\n"
+                             "{}".format(all_prot_attrs, self.prot_attr_))
+
+        self.sample_weight_ = np.empty_like(sample_weight)
+        groups = X.index.droplevel(list(set(X.index.names)
+                                      - set(self.prot_attr_))).to_flat_index()
+        # TODO: maintain categorical ordering
         self.groups_ = np.unique(groups)
         self.classes_ = np.unique(y)
+        n_groups = len(self.groups_)
+        n_classes = len(self.classes_)
+        self.reweigh_factors_ = np.full((n_groups, n_classes), np.nan)
 
         def N_(i): return sample_weight[i].sum()
-
         N = sample_weight.sum()
-        for g in self.groups_:
-            for c in self.classes_:
+        for i, g in enumerate(self.groups_):
+            for j, c in enumerate(self.classes_):
                 g_and_c = (groups == g) & (y == c)
                 if np.any(g_and_c):
                     W_gc = N_(groups == g) * N_(y == c) / (N * N_(g_and_c))
                     self.sample_weight_[g_and_c] = W_gc * sample_weight[g_and_c]
+                    self.reweigh_factors_[i, j] = W_gc
         return X
 
 
 class ReweighingMeta(BaseEstimator, MetaEstimatorMixin):
-    def __init__(self, estimator):
-        self.reweigher = Reweighing()
+    def __init__(self, estimator, reweigher=Reweighing()):
+        if not has_fit_parameter(estimator, 'sample_weight'):
+            raise TypeError("`estimator` (type: {}) does not have fit parameter"
+                            " `sample_weight`.".format(type(estimator)))
+        self.reweigher = reweigher
         self.estimator = estimator
 
-    def fit(self, X, y, pa_groups, sample_weight=None):
+    @property
+    def _estimator_type(self):
+        return self.estimator._estimator_type
+
+    def fit(self, X, y, sample_weight=None):
         self.reweigher_ = clone(self.reweigher)
         self.estimator_ = clone(self.estimator)
 
-        self.reweigher_.fit_transform(X, y, pa_groups, sample_weight=sample_weight)
-        try:
-            self.estimator_.fit(X, y, sample_weight=self.reweigher_.sample_weight_)
-        except TypeError:
-            raise ValueError("'estimator' ({}) does not incorporate "
-                             "'sample_weight' in 'fit()''.".format(
-                                     type(self.estimator_)))
+        self.reweigher_.fit_transform(X, y, sample_weight=sample_weight)
+        self.estimator_.fit(X, y, sample_weight=self.reweigher_.sample_weight_)
         return self
 
     @if_delegate_has_method('estimator')
@@ -107,7 +144,6 @@ def predict_proba(self, X):
     def predict_log_proba(self, X):
         return self.estimator_.predict_log_proba(X)
 
-    # TODO: sample_weight isn't passed by GridSearchCV.score()
     @if_delegate_has_method('estimator')
     def score(self, X, y, sample_weight=None):
         return self.estimator_.score(X, y, sample_weight=sample_weight)
diff --git a/aif360/sklearn/tests/test_reweighing.py b/aif360/sklearn/tests/test_reweighing.py
index 30e8f37a..f1e2a223 100644
--- a/aif360/sklearn/tests/test_reweighing.py
+++ b/aif360/sklearn/tests/test_reweighing.py
@@ -1,53 +1,54 @@
 import numpy as np
 from sklearn.linear_model import LogisticRegression
 from sklearn.model_selection import GridSearchCV
-from sklearn.pipeline import make_pipeline
+from sklearn.metrics import accuracy_score, make_scorer
 
-from aif360.datasets import GermanDataset
-from aif360.sklearn.datasets import fetch_german
+from aif360.datasets import AdultDataset
+from aif360.sklearn.datasets import fetch_adult
 from aif360.algorithms.preprocessing import Reweighing as OrigReweighing
 from aif360.sklearn.preprocessing import Reweighing, ReweighingMeta
 
 
-X, y = fetch_german(numeric_only=True, binary_age=True, dropcols='duration')
-german = GermanDataset(categorical_features=[], features_to_keep=[
-        'credit_amount', 'investment_as_income_percentage', 'residence_since',
-        'age', 'number_of_credits', 'people_liable_for', 'sex'])
-
-def test_dataset_equality():
-    assert (german.features == X.values).all()
+# X, y = fetch_german(numeric_only=True, dropcols='duration')
+# X.age = (X.age >= 25).astype('int')
+# german = GermanDataset(categorical_features=[], features_to_keep=[
+#         'credit_amount', 'investment_as_income_percentage', 'residence_since',
+#         'age', 'number_of_credits', 'people_liable_for', 'sex'])
+X, y, sample_weight = fetch_adult(numeric_only=True)
+adult = AdultDataset(instance_weights_name='fnlwgt', categorical_features=[],
+        features_to_keep=['age', 'education-num', 'capital-gain', 'capital-loss',
+                          'hours-per-week'], features_to_drop=[])
 
 def test_reweighing_sex():
     orig_rew = OrigReweighing(unprivileged_groups=[{'sex': 0}],
                               privileged_groups=[{'sex': 1}])
-    german_fair = orig_rew.fit_transform(german)
-    rew = Reweighing()
-    rew.fit_transform(X, y, groups=X.index.get_level_values('sex'))
+    adult_fair = orig_rew.fit_transform(adult)
+    rew = Reweighing('sex')
+    rew.fit_transform(X, y, sample_weight=sample_weight)
 
-    assert np.allclose(german_fair.instance_weights, rew.sample_weight_)
+    # assert np.allclose([[orig_rew.w_up_unfav, orig_rew.w_up_fav],
+    #                     [orig_rew.w_p_unfav, orig_rew.w_p_fav]],
+    #                    rew.reweigh_factors_)
+    assert np.allclose(adult_fair.instance_weights, rew.sample_weight_)
 
 def test_reweighing_intersection():
     rew = Reweighing()
-    rew.fit_transform(X, y, groups=X.index.to_flat_index())
-    assert len(rew.groups_) == 4
-    assert len(rew.classes_) == 2
-
-def test_pipeline():
-    logreg = LogisticRegression(solver='liblinear')
-    pipe = make_pipeline(Reweighing(), logreg)
-    fit_params = {'logisticregression__sample_weight': pipe[0].sample_weight_,
-                  'reweighing__groups': X.index.get_level_values('sex')}
-    pipe.fit(X, y, **fit_params)
-    assert (logreg.fit(X, y, sample_weight=pipe[0].sample_weight_).coef_
-         == pipe[-1].coef_).all()
+    rew.fit_transform(X, y)
+    assert rew.reweigh_factors_.shape == (4, 2)
 
 def test_gridsearch():
-    rew = ReweighingMeta(LogisticRegression(solver='liblinear'))
-    params = {'estimator__C': [1, 10]}
-    clf = GridSearchCV(rew, params, cv=5)
-    # TODO: 'groups' name clashes with CV splitter
-    fit_params = {'pa_groups': X.index.get_level_values('sex'),
-                  'sample_weight': np.random.random(y.shape)}
-    clf.fit(X, y, **fit_params)
-    # print(clf.score(X, y))
-    assert len(clf.best_estimator_.reweigher_.groups_) == 2
+    # logreg = LogisticRegression(solver='lbfgs', max_iter=500)
+    # rew = ReweighingMeta(estimator=logreg, reweigher=Reweighing('sex'))
+    rew = ReweighingMeta(estimator=LogisticRegression(solver='liblinear'))
+
+    # UGLY workaround for sklearn issue: https://stackoverflow.com/a/49598597
+    def score_func(y_true, y_pred, sample_weight):
+        idx = y_true.index.to_flat_index()
+        return accuracy_score(y_true, y_pred, sample_weight=sample_weight[idx])
+    scoring = make_scorer(score_func, **{'sample_weight': sample_weight})
+
+    params = {'estimator__C': [1, 10], 'reweigher__prot_attr': ['sex']}
+
+    clf = GridSearchCV(rew, params, scoring=scoring, cv=5, iid=False)
+    clf.fit(X, y, **{'sample_weight': sample_weight})
+    # print(clf.best_score_)

From 0183449ddb87366d0c1cc8719078da8f8157e52b Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Mon, 24 Jun 2019 12:32:22 -0400
Subject: [PATCH 19/61] updated README

---
 aif360/sklearn/README.md | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/aif360/sklearn/README.md b/aif360/sklearn/README.md
index 558ac562..da318ced 100644
--- a/aif360/sklearn/README.md
+++ b/aif360/sklearn/README.md
@@ -27,7 +27,7 @@ objects with sample properties (protected attributes) as the index
 - [ ] Make inprocessing algorithms compatible as sklearn `Estimator`s
   - [ ] **[External]** `get_feature_names()` from data preprocessing
   steps that would remove DataFrame formatting
-    - [ ] [SLEP008](https://github.com/scikit-learn/enhancement_proposals/pull/18 )?
+    - [ ] SLEP007/8
   - [ ] Prejudice remover
   - [ ] Adversarial debiasing
   - [ ] Meta-fair classifier
@@ -38,8 +38,7 @@ objects with sample properties (protected attributes) as the index
   - [ ] Learning fair representations
   - [ ] Optimized preprocessing
   - [X] Reweighing
-    - [X] Use dynamic object to pass sample_weight to estimator, etc. after they
-    are fitted (NOTE: does not work with GridSearchCV)
+    - [X] Meta-estimator workaround
     - [ ] **[External]** SLEP006 - Sample properties
 - [ ] Make postprocessing algorithms compatible
   - [ ] **[External]** Allow for `fit(y_true, y_pred)`

From 89b4a79253c5009b8a17bcb543369815db2887a0 Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Mon, 24 Jun 2019 19:48:56 -0400
Subject: [PATCH 20/61] fixed docstring formatting

---
 aif360/sklearn/datasets/openml_datasets.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/aif360/sklearn/datasets/openml_datasets.py b/aif360/sklearn/datasets/openml_datasets.py
index 98d0aa31..37122b17 100644
--- a/aif360/sklearn/datasets/openml_datasets.py
+++ b/aif360/sklearn/datasets/openml_datasets.py
@@ -15,9 +15,9 @@ def to_dataframe(data):
     if needed.
 
     Args:
-        data (Bunch): Dict-like object containing `data`, `feature_names` and,
-            optionally, `categories` attributes. Note: `data` should contain
-            both X and y data.
+        data (Bunch): Dict-like object containing ``data``, ``feature_names``
+            and, optionally, ``categories`` attributes. Note: ``data`` should
+            contain both X and y data.
 
     Returns:
         pandas.DataFrame: A DataFrame containing all data, including target,
@@ -99,9 +99,10 @@ def fetch_german(data_home=None, binary_age=True, usecols=[], dropcols=[],
 
     Protected attributes are 'sex' ('male' is privileged and 'female' is
     unprivileged) and 'age' (binarized by default as recommended by
-    [#kamiran09]_: `age >= 25` is considered privileged and `age < 25` is
-    considered unprivileged; see the `binary_age` flag to keep this continuous).
-    The outcome variable is 'good' (favorable) or 'bad' (unfavorable).
+    [#kamiran09]_: ``age >= 25`` is considered privileged and ``age < 25`` is
+    considered unprivileged; see the ``binary_age`` flag to keep this
+    continuous). The outcome variable is 'good' (favorable) or 'bad'
+    (unfavorable).
 
     References:
         .. [#kamiran09] F. Kamiran and T. Calders, "Classifying without
@@ -113,8 +114,8 @@ def fetch_german(data_home=None, binary_age=True, usecols=[], dropcols=[],
             for the datasets. By default all AIF360 datasets are stored in
             'aif360/sklearn/data/raw' subfolders.
         binary_age (bool, optional): If `True`, split protected attribute,
-            `age`, into 'aged' (privileged) and 'youth' (unprivileged). The
-            `age` feature remains continuous.
+            ``age``, into 'aged' (privileged) and 'youth' (unprivileged). The
+            ``age`` feature remains continuous.
         usecols (single label or list-like, optional): Column name(s) to keep.
             All others are dropped.
         dropcols (single label or list-like, optional): Column name(s) to drop.

From d57b6df1ec2bcc313b344d924d828f409c3429fb Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Mon, 24 Jun 2019 19:50:38 -0400
Subject: [PATCH 21/61] changed metrics to use prot_attr

---
 aif360/sklearn/metrics/metrics.py          | 86 +++++++++++++---------
 aif360/sklearn/preprocessing/reweighing.py | 41 ++---------
 aif360/sklearn/tests/test_metrics.py       | 55 ++++++++------
 aif360/sklearn/utils.py                    | 53 +++++++++++++
 4 files changed, 145 insertions(+), 90 deletions(-)
 create mode 100644 aif360/sklearn/utils.py

diff --git a/aif360/sklearn/metrics/metrics.py b/aif360/sklearn/metrics/metrics.py
index d4da5c81..4d87490e 100644
--- a/aif360/sklearn/metrics/metrics.py
+++ b/aif360/sklearn/metrics/metrics.py
@@ -2,39 +2,52 @@
 from sklearn.metrics import make_scorer, recall_score
 from sklearn.neighbors import NearestNeighbors
 
+from aif360.sklearn.utils import check_groups
+
+
+__all__ = [
+    'consistency_score', 'specificity_score', 'selection_rate',
+    'disparate_impact_ratio', 'statistical_parity_difference',
+    'equal_opportunity_difference', 'average_odds_difference',
+    'average_odds_error', 'generalized_entropy_error',
+    'between_group_generalized_entropy_error'
+]
 
 # ============================= META-METRICS ===================================
-def difference(func, y, *args, groups, priv_group=1, sample_weight=None, **kwargs):
+def difference(func, y, *args, prot_attr=None, priv_group=1, sample_weight=None,
+               **kwargs):
     """Compute the difference between unprivileged and privileged subsets for an
     arbitrary metric.
 
     Note: The optimal value of a difference is 0. To make it a scorer, one must
-    take the absolute value and set `greater_is_better` to False.
+    take the absolute value and set ``greater_is_better`` to False.
 
     Unprivileged group is taken to be the inverse of the privileged group.
 
     Args:
-        func (function): A metric function from `aif360.sklearn.metrics` or
-            `sklearn.metrics`.
+        func (function): A metric function from :mod:`sklearn.metrics` or
+            :mod:`aif360.sklearn.metrics.metrics`.
         y (array-like): Outcome vector with protected attributes as index.
-        *args: Additional positional args to be passed through to `func`.
-        groups (array-like, keyword-only): Group labels (protected attributes)
-            for the samples.
+        *args: Additional positional args to be passed through to ``func``.
+        prot_attr (array-like, keyword-only): Protected attribute(s). If
+            ``None``, all protected attributes in ``y`` are used.
         priv_group (scalar, optional): Label value for the privileged group.
         sample_weight (array-like, optional): Sample weights passed through to
-            `func`.
-        **kwargs: Additional keyword args to be passed through to `func`.
+            ``func``.
+        **kwargs: Additional keyword args to be passed through to ``func``.
 
     Returns:
-        scalar: Difference in metric value for unprivileged and privileged groups.
+        scalar: Difference in metric value for unprivileged and privileged
+        groups.
 
     Examples:
         >>> X, y = fetch_german(numeric_only=True)
         >>> y_pred = LogisticRegression().fit(X, y).predict(X)
-        >>> sex = X.index.get_level_values('sex')
-        >>> difference(precision_score, y, y_pred, groups=sex, priv_group='male')
+        >>> difference(precision_score, y, y_pred, prot_attr='sex',
+        ... priv_group='male')
         -0.06955430006277463
     """
+    groups, _ = check_groups(y, prot_attr)
     idx = (groups == priv_group)
     unpriv = map(lambda a: a[~idx], (y,) + args)
     priv = map(lambda a: a[idx], (y,) + args)
@@ -43,30 +56,32 @@ def difference(func, y, *args, groups, priv_group=1, sample_weight=None, **kwarg
               - func(*priv, sample_weight=sample_weight[idx], **kwargs))
     return func(*unpriv, **kwargs) - func(*priv, **kwargs)
 
-def ratio(func, y, *args, groups, priv_group=1, sample_weight=None, **kwargs):
+def ratio(func, y, *args, prot_attr=None, priv_group=1, sample_weight=None,
+          **kwargs):
     """Compute the ratio between unprivileged and privileged subsets for an
     arbitrary metric.
 
     Note: The optimal value of a ratio is 1. To make it a scorer, one must
-    subtract 1, take the absolute value, and set `greater_is_better` to False.
+    subtract 1, take the absolute value, and set ``greater_is_better`` to False.
 
     Unprivileged group is taken to be the inverse of the privileged group.
 
     Args:
-        func (function): A metric function from `aif360.sklearn.metrics` or
-            `sklearn.metrics`.
+        func (function): A metric function from :mod:`sklearn.metrics` or
+            :mod:`aif360.sklearn.metrics.metrics`.
         y (array-like): Outcome vector with protected attributes as index.
-        *args: Additional positional args to be passed through to `func`.
+        *args: Additional positional args to be passed through to ``func``.
         groups (array-like, keyword-only): Group labels (protected attributes)
             for the samples.
         priv_group (scalar, optional): Label value for the privileged group.
         sample_weight (array-like, optional): Sample weights passed through to
-            `func`.
-        **kwargs: Additional keyword args to be passed through to `func`.
+            ``func``.
+        **kwargs: Additional keyword args to be passed through to ``func``.
 
     Returns:
         scalar: Ratio of metric values for unprivileged and privileged groups.
     """
+    groups, _ = check_groups(y, prot_attr)
     idx = (groups == priv_group)
     unpriv = map(lambda a: a[~idx], (y,) + args)
     priv = map(lambda a: a[idx], (y,) + args)
@@ -107,40 +122,40 @@ def selection_rate(y_true, y_pred, pos_label=1, sample_weight=None):
 
 
 # ============================ GROUP FAIRNESS ==================================
-def statistical_parity_difference(*y, groups, priv_group=1, pos_label=1,
+def statistical_parity_difference(*y, prot_attr=None, priv_group=1, pos_label=1,
                                   sample_weight=None):
     rate = base_rate if len(y) == 1 or y[1] is None else selection_rate
-    return difference(rate, *y, groups=groups, priv_group=priv_group,
+    return difference(rate, *y, prot_attr=prot_attr, priv_group=priv_group,
                       pos_label=pos_label, sample_weight=sample_weight)
 
-def disparate_impact_ratio(*y, groups, priv_group=1, pos_label=1,
+def disparate_impact_ratio(*y, prot_attr=None, priv_group=1, pos_label=1,
                            sample_weight=None):
     rate = base_rate if len(y) == 1 or y[1] is None else selection_rate
-    return ratio(rate, *y, groups=groups, priv_group=priv_group,
+    return ratio(rate, *y, prot_attr=prot_attr, priv_group=priv_group,
                  pos_label=pos_label, sample_weight=sample_weight)
 
-def equal_opportunity_difference(y_true, y_pred, groups, priv_group=1,
+def equal_opportunity_difference(y_true, y_pred, prot_attr=None, priv_group=1,
                                  pos_label=1, sample_weight=None):
-    return difference(recall_score, y_true, y_pred, groups=groups,
+    return difference(recall_score, y_true, y_pred, prot_attr=prot_attr,
                       priv_group=priv_group, pos_label=pos_label,
                       sample_weight=sample_weight)
 
-def average_odds_difference(y_true, y_pred, groups, priv_group=1, pos_label=1,
+def average_odds_difference(y_true, y_pred, prot_attr=None, priv_group=1, pos_label=1,
                             neg_label=0, sample_weight=None):
-    tnr_diff = difference(specificity_score, y_true, y_pred, groups=groups,
+    tnr_diff = difference(specificity_score, y_true, y_pred, prot_attr=prot_attr,
                           priv_group=priv_group, neg_label=neg_label,
                           sample_weight=sample_weight)
-    tpr_diff = difference(recall_score, y_true, y_pred, groups=groups,
+    tpr_diff = difference(recall_score, y_true, y_pred, prot_attr=prot_attr,
                           priv_group=priv_group, pos_label=pos_label,
                           sample_weight=sample_weight)
     return (tpr_diff - tnr_diff) / 2
 
-def average_odds_error(y_true, y_pred, groups, priv_group=1, pos_label=1,
+def average_odds_error(y_true, y_pred, prot_attr=None, priv_group=1, pos_label=1,
                        neg_label=0, sample_weight=None):
-    tnr_diff = difference(specificity_score, y_true, y_pred, groups=groups,
+    tnr_diff = difference(specificity_score, y_true, y_pred, prot_attr=prot_attr,
                           priv_group=priv_group, neg_label=neg_label,
                           sample_weight=sample_weight)
-    tpr_diff = difference(recall_score, y_true, y_pred, groups=groups,
+    tpr_diff = difference(recall_score, y_true, y_pred, prot_attr=prot_attr,
                           priv_group=priv_group, pos_label=pos_label,
                           sample_weight=sample_weight)
     return (abs(tnr_diff) + abs(tpr_diff)) / 2
@@ -157,13 +172,14 @@ def generalized_entropy_index(b, alpha=2):
         return ((b / b.mean())**alpha - 1).mean() / (alpha * (alpha - 1))
 
 def generalized_entropy_error(y_true, y_pred, alpha=2, pos_label=1):
-                              # sample_weight=None):
+    #                           sample_weight=None):
     b = 1 + (y_pred == pos_label) - (y_true == pos_label)
     return generalized_entropy_index(b, alpha=alpha)
 
-def between_group_generalized_entropy_error(y_true, y_pred, groups,
+def between_group_generalized_entropy_error(y_true, y_pred, prot_attr=None,
                                             priv_group=None, alpha=2,
                                             pos_label=1):
+    groups = check_groups(y_true, prot_attr)
     b = np.empty_like(y_true, dtype='float')
     if priv_group is not None:
         groups = [1 if g == priv_group else 0 for g in groups]
@@ -205,7 +221,7 @@ def sensitivity_score(y_true, y_pred, pos_label=1, sample_weight=None):
 #     return 1 - specificity_score(y_true, y_pred, pos_label=pos_label,
 #                                  sample_weight=sample_weight)
 
-def mean_difference(*y, groups, priv_group=1, pos_label=1, sample_weight=None):
+def mean_difference(*y, prot_attr=None, priv_group=1, pos_label=1, sample_weight=None):
     """Alias of :func:`statistical_parity_difference`."""
-    return statistical_parity_difference(*y, groups=groups, priv_group=priv_group,
+    return statistical_parity_difference(*y, prot_attr=prot_attr, priv_group=priv_group,
             pos_label=pos_label, sample_weight=sample_weight)
diff --git a/aif360/sklearn/preprocessing/reweighing.py b/aif360/sklearn/preprocessing/reweighing.py
index 58cb13eb..5a80c457 100644
--- a/aif360/sklearn/preprocessing/reweighing.py
+++ b/aif360/sklearn/preprocessing/reweighing.py
@@ -1,21 +1,10 @@
 import numpy as np
-from pandas.core.dtypes.common import is_list_like
 from sklearn.base import BaseEstimator, MetaEstimatorMixin, clone
-from sklearn.utils import check_consistent_length
 from sklearn.utils.metaestimators import if_delegate_has_method
-from sklearn.utils.validation import column_or_1d, has_fit_parameter
+from sklearn.utils.validation import has_fit_parameter
 
+from aif360.sklearn.utils import check_inputs, check_groups
 
-def check_inputs(X, y, sample_weight):
-    if not hasattr(X, 'index'):
-        raise TypeError("Expected `DataFrame`, got {} instead.".format(type(X)))
-    y = column_or_1d(y)
-    if sample_weight is not None:
-        sample_weight = column_or_1d(sample_weight)
-    else:
-        sample_weight = np.ones(X.shape[0])
-    check_consistent_length(X, y, sample_weight)
-    return X, y, sample_weight
 
 class Reweighing(BaseEstimator):
     """Reweighing is a preprocessing technique that weights the examples in each
@@ -53,7 +42,7 @@ def __init__(self, prot_attr=None):
             prot_attr (single label or list-like, optional): Protected
                 attribute(s) to use as sensitive attribute(s) in the reweighing
                 process. If more than one attribute, all combinations of values
-                (intersections) are considered. Default is `None` meaning all
+                (intersections) are considered. Default is ``None`` meaning all
                 protected attributes from the dataset are used.
         """
         self.prot_attr = prot_attr
@@ -77,22 +66,8 @@ def fit_transform(self, X, y, sample_weight=None):
         """
         X, y, sample_weight = check_inputs(X, y, sample_weight)
 
-        all_prot_attrs = X.index.names[1:]
-        if self.prot_attr is None:
-            self.prot_attr_ = all_prot_attrs
-        elif not is_list_like(self.prot_attr):
-            self.prot_attr_ = [self.prot_attr]
-        else:
-            self.prot_attr_ = self.prot_attr
-
-        if any(p not in X.index.names for p in self.prot_attr_):
-            raise ValueError("Some of the attributes provided are not present "
-                             "in the dataset. Expected a subset of:\n{}\nGot:\n"
-                             "{}".format(all_prot_attrs, self.prot_attr_))
-
         self.sample_weight_ = np.empty_like(sample_weight)
-        groups = X.index.droplevel(list(set(X.index.names)
-                                      - set(self.prot_attr_))).to_flat_index()
+        groups, self.prot_attr_ = check_groups(X, self.prot_attr)
         # TODO: maintain categorical ordering
         self.groups_ = np.unique(groups)
         self.classes_ = np.unique(y)
@@ -132,18 +107,18 @@ def fit(self, X, y, sample_weight=None):
         self.estimator_.fit(X, y, sample_weight=self.reweigher_.sample_weight_)
         return self
 
-    @if_delegate_has_method('estimator')
+    @if_delegate_has_method('estimator_')
     def predict(self, X):
         return self.estimator_.predict(X)
 
-    @if_delegate_has_method('estimator')
+    @if_delegate_has_method('estimator_')
     def predict_proba(self, X):
         return self.estimator_.predict_proba(X)
 
-    @if_delegate_has_method('estimator')
+    @if_delegate_has_method('estimator_')
     def predict_log_proba(self, X):
         return self.estimator_.predict_log_proba(X)
 
-    @if_delegate_has_method('estimator')
+    @if_delegate_has_method('estimator_')
     def score(self, X, y, sample_weight=None):
         return self.estimator_.score(X, y, sample_weight=sample_weight)
diff --git a/aif360/sklearn/tests/test_metrics.py b/aif360/sklearn/tests/test_metrics.py
index a2db21ec..e470f32e 100644
--- a/aif360/sklearn/tests/test_metrics.py
+++ b/aif360/sklearn/tests/test_metrics.py
@@ -4,17 +4,22 @@
 from aif360.datasets import AdultDataset
 from aif360.sklearn.datasets import fetch_adult
 from aif360.metrics import ClassificationMetric
-from aif360.sklearn.metrics import *
+from aif360.sklearn.metrics import (
+        consistency_score, specificity_score, selection_rate,
+        disparate_impact_ratio, statistical_parity_difference,
+        equal_opportunity_difference, average_odds_difference,
+        average_odds_error, generalized_entropy_error,
+        between_group_generalized_entropy_error)
 
 
 X, y, sample_weight = fetch_adult(numeric_only=True)
 y = y.factorize(sort=True)[0]
 y_pred = LogisticRegression(solver='liblinear').fit(X, y,
         sample_weight=sample_weight).predict(X)
-priv = X.index.get_level_values('sex')
 adult = AdultDataset(instance_weights_name='fnlwgt', categorical_features=[],
-        features_to_keep=['age', 'education-num', 'capital-gain', 'capital-loss',
-                          'hours-per-week'], features_to_drop=[])
+        features_to_keep=['age', 'education-num', 'capital-gain',
+                          'capital-loss', 'hours-per-week'],
+        features_to_drop=[])
 adult_pred = adult.copy()
 adult_pred.labels = y_pred
 cm = ClassificationMetric(adult, adult_pred,
@@ -29,37 +34,43 @@ def test_consistency():
     assert np.isclose(consistency_score(X, y), cm.consistency())
 
 def test_specificity():
-    assert specificity_score(y, y_pred, sample_weight=sample_weight) == cm.specificity()
+    spec = specificity_score(y, y_pred, sample_weight=sample_weight)
+    assert spec == cm.specificity()
 
 def test_selection_rate():
-    assert selection_rate(y, y_pred, sample_weight=sample_weight) == cm.selection_rate()
+    select = selection_rate(y, y_pred, sample_weight=sample_weight)
+    assert select == cm.selection_rate()
 
 def test_disparate_impact():
-    assert disparate_impact_ratio(y, y_pred, groups=priv, priv_group='Male',
-            sample_weight=sample_weight) == cm.disparate_impact()
+    di = disparate_impact_ratio(y, y_pred, prot_attr='sex', priv_group='Male',
+                                sample_weight=sample_weight)
+    assert di == cm.disparate_impact()
 
 def test_statistical_parity():
-    assert statistical_parity_difference(y, y_pred, groups=priv, priv_group='Male',
-            sample_weight=sample_weight) == cm.statistical_parity_difference()
+    stat = statistical_parity_difference(y, y_pred, prot_attr='sex',
+            priv_group='Male', sample_weight=sample_weight)
+    assert stat == cm.statistical_parity_difference()
 
 def test_equal_opportunity():
-    assert equal_opportunity_difference(y, y_pred, groups=priv, priv_group='Male',
-            sample_weight=sample_weight) == cm.equal_opportunity_difference()
+    eopp = equal_opportunity_difference(y, y_pred, prot_attr='sex',
+            priv_group='Male', sample_weight=sample_weight)
+    assert eopp == cm.equal_opportunity_difference()
 
 def test_average_odds_difference():
-    assert np.isclose(average_odds_difference(y, y_pred, groups=priv, priv_group='Male',
-                                              sample_weight=sample_weight),
-                      cm.average_odds_difference())
+    aod = average_odds_difference(y, y_pred, prot_attr='sex', priv_group='Male',
+                                  sample_weight=sample_weight)
+    assert np.isclose(aod, cm.average_odds_difference())
 
 def test_average_odds_error():
-    assert np.isclose(average_odds_error(y, y_pred, groups=priv, priv_group='Male',
-                                         sample_weight=sample_weight),
-                      cm.average_abs_odds_difference())
+    aoe = average_odds_error(y, y_pred, prot_attr='sex', priv_group='Male',
+                             sample_weight=sample_weight)
+    assert np.isclose(aoe, cm.average_abs_odds_difference())
 
 def test_generalized_entropy_index():
-    assert np.isclose(generalized_entropy_error(y, y_pred),
-                      cm.generalized_entropy_index())
+    gei = generalized_entropy_error(y, y_pred)
+    assert np.isclose(gei, cm.generalized_entropy_index())
 
 def test_between_group_generalized_entropy_index():
-    assert between_group_generalized_entropy_error(y, y_pred, groups=priv, priv_group='Male') \
-        == cm.between_group_generalized_entropy_index()
+    bggei = between_group_generalized_entropy_error(y, y_pred, prot_attr='sex',
+                                                    priv_group='Male')
+    assert bggei == cm.between_group_generalized_entropy_index()
diff --git a/aif360/sklearn/utils.py b/aif360/sklearn/utils.py
new file mode 100644
index 00000000..e18646bf
--- /dev/null
+++ b/aif360/sklearn/utils.py
@@ -0,0 +1,53 @@
+from pandas.core.dtypes.common import is_list_like
+from sklearn.utils import check_consistent_length
+from sklearn.utils.validation import column_or_1d
+
+
+def check_inputs(X, y, sample_weight):
+    if not hasattr(X, 'index'):
+        raise TypeError("Expected `DataFrame`, got {} instead.".format(
+            type(X).__name__))
+    y = column_or_1d(y)
+    if sample_weight is not None:
+        sample_weight = column_or_1d(sample_weight)
+    else:
+        sample_weight = np.ones(X.shape[0])
+    check_consistent_length(X, y, sample_weight)
+    return X, y, sample_weight
+
+def check_groups(X, prot_attr):
+    """Validates ``X`` and returns ``groups`` and ``prot_attr``.
+
+    Args:
+        X (`pandas.Series` or `pandas.DataFrame`): .
+        prot_attr (single label or list-like): Protected attribute(s). If
+            ``None``, all protected attributes in ``X`` are used.
+
+    Returns:
+        (`pandas.Index`, list-like):
+
+            * **groups** (`pandas.Index`) -- Label (or tuple of labels) of
+              protected attribute for each sample in ``X``.
+            * **prot_attr** (list-like) -- Modified input. If input is a single
+              label, returns single-item list. If input is ``None`` returns list
+              of all protected attributes.
+    """
+    if not hasattr(X, 'index'):
+        raise TypeError(
+                "Expected `Series` or `DataFrame`, got {} instead.".format(
+                        type(X).__name__))
+
+    all_prot_attrs = X.index.names[1:]
+    if prot_attr is None:
+        prot_attr = all_prot_attrs
+    elif not is_list_like(prot_attr):
+        prot_attr = [prot_attr]
+
+    if any(p not in X.index.names for p in prot_attr):
+        raise ValueError("Some of the attributes provided are not present "
+                         "in the dataset. Expected a subset of:\n{}\nGot:\n"
+                         "{}".format(all_prot_attrs, prot_attr))
+
+    groups = X.index.droplevel(list(set(X.index.names) - set(prot_attr)))
+
+    return groups.to_flat_index(), prot_attr

From d8958bbd67f6f899d5e42adc76f346fab31f4d38 Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Mon, 24 Jun 2019 19:51:44 -0400
Subject: [PATCH 22/61] added __all__ to __init__s

---
 aif360/sklearn/metrics/__init__.py       | 19 ++++++++++++++++++-
 aif360/sklearn/preprocessing/__init__.py |  4 ++++
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/aif360/sklearn/metrics/__init__.py b/aif360/sklearn/metrics/__init__.py
index ceaef288..84aa3f1e 100644
--- a/aif360/sklearn/metrics/__init__.py
+++ b/aif360/sklearn/metrics/__init__.py
@@ -1 +1,18 @@
-from aif360.sklearn.metrics.metrics import *
+from aif360.sklearn.metrics.metrics import consistency_score
+from aif360.sklearn.metrics.metrics import specificity_score
+from aif360.sklearn.metrics.metrics import selection_rate
+from aif360.sklearn.metrics.metrics import disparate_impact_ratio
+from aif360.sklearn.metrics.metrics import statistical_parity_difference
+from aif360.sklearn.metrics.metrics import equal_opportunity_difference
+from aif360.sklearn.metrics.metrics import average_odds_difference
+from aif360.sklearn.metrics.metrics import average_odds_error
+from aif360.sklearn.metrics.metrics import generalized_entropy_error
+from aif360.sklearn.metrics.metrics import between_group_generalized_entropy_error
+
+__all__ = [
+    'consistency_score', 'specificity_score', 'selection_rate',
+    'disparate_impact_ratio', 'statistical_parity_difference',
+    'equal_opportunity_difference', 'average_odds_difference',
+    'average_odds_error', 'generalized_entropy_error',
+    'between_group_generalized_entropy_error'
+]
diff --git a/aif360/sklearn/preprocessing/__init__.py b/aif360/sklearn/preprocessing/__init__.py
index f49b7673..61a0431d 100644
--- a/aif360/sklearn/preprocessing/__init__.py
+++ b/aif360/sklearn/preprocessing/__init__.py
@@ -1 +1,5 @@
 from aif360.sklearn.preprocessing.reweighing import Reweighing, ReweighingMeta
+
+__all__ = [
+    'Reweighing', 'ReweighingMeta'
+]

From 0bd3837bfd8ac6f4ca9fc6f4d2876affd8fdadc1 Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Thu, 27 Jun 2019 12:35:31 -0400
Subject: [PATCH 23/61] updated notebook with reweighing example

---
 aif360/sklearn/examples/Getting Started.ipynb | 208 ++++++++++++------
 1 file changed, 146 insertions(+), 62 deletions(-)

diff --git a/aif360/sklearn/examples/Getting Started.ipynb b/aif360/sklearn/examples/Getting Started.ipynb
index 0df0db33..b65f8f78 100644
--- a/aif360/sklearn/examples/Getting Started.ipynb	
+++ b/aif360/sklearn/examples/Getting Started.ipynb	
@@ -15,12 +15,11 @@
    "source": [
     "import numpy as np\n",
     "import pandas as pd\n",
-    "from sklearn.pipeline import make_pipeline\n",
     "from sklearn.linear_model import LogisticRegression\n",
-    "from sklearn.metrics import recall_score\n",
-    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.metrics import accuracy_score, recall_score, make_scorer\n",
+    "from sklearn.model_selection import GridSearchCV, train_test_split\n",
     "\n",
-    "from aif360.sklearn.algorithms.preprocessing import Reweighing\n",
+    "from aif360.sklearn.preprocessing import ReweighingMeta\n",
     "from aif360.sklearn.datasets import fetch_adult\n",
     "from aif360.sklearn.metrics import disparate_impact_ratio"
    ]
@@ -68,6 +67,7 @@
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th></th>\n",
+       "      <th></th>\n",
        "      <th>age</th>\n",
        "      <th>workclass</th>\n",
        "      <th>education</th>\n",
@@ -83,6 +83,7 @@
        "      <th>native-country</th>\n",
        "    </tr>\n",
        "    <tr>\n",
+       "      <th></th>\n",
        "      <th>race</th>\n",
        "      <th>sex</th>\n",
        "      <th></th>\n",
@@ -102,6 +103,7 @@
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
+       "      <th>0</th>\n",
        "      <th>Non-white</th>\n",
        "      <th>Male</th>\n",
        "      <td>25.0</td>\n",
@@ -119,7 +121,8 @@
        "      <td>United-States</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th rowspan=\"2\" valign=\"top\">White</th>\n",
+       "      <th>1</th>\n",
+       "      <th>White</th>\n",
        "      <th>Male</th>\n",
        "      <td>38.0</td>\n",
        "      <td>Private</td>\n",
@@ -136,6 +139,8 @@
        "      <td>United-States</td>\n",
        "    </tr>\n",
        "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <th>White</th>\n",
        "      <th>Male</th>\n",
        "      <td>28.0</td>\n",
        "      <td>Local-gov</td>\n",
@@ -152,6 +157,7 @@
        "      <td>United-States</td>\n",
        "    </tr>\n",
        "    <tr>\n",
+       "      <th>3</th>\n",
        "      <th>Non-white</th>\n",
        "      <th>Male</th>\n",
        "      <td>44.0</td>\n",
@@ -169,6 +175,7 @@
        "      <td>United-States</td>\n",
        "    </tr>\n",
        "    <tr>\n",
+       "      <th>5</th>\n",
        "      <th>White</th>\n",
        "      <th>Male</th>\n",
        "      <td>34.0</td>\n",
@@ -190,37 +197,37 @@
        "</div>"
       ],
       "text/plain": [
-       "                 age  workclass     education  education-num  \\\n",
-       "race      sex                                                  \n",
-       "Non-white Male  25.0    Private          11th            7.0   \n",
-       "White     Male  38.0    Private       HS-grad            9.0   \n",
-       "          Male  28.0  Local-gov    Assoc-acdm           12.0   \n",
-       "Non-white Male  44.0    Private  Some-college           10.0   \n",
-       "White     Male  34.0    Private          10th            6.0   \n",
+       "                   age  workclass     education  education-num  \\\n",
+       "  race      sex                                                  \n",
+       "0 Non-white Male  25.0    Private          11th            7.0   \n",
+       "1 White     Male  38.0    Private       HS-grad            9.0   \n",
+       "2 White     Male  28.0  Local-gov    Assoc-acdm           12.0   \n",
+       "3 Non-white Male  44.0    Private  Some-college           10.0   \n",
+       "5 White     Male  34.0    Private          10th            6.0   \n",
        "\n",
-       "                    marital-status         occupation   relationship  \\\n",
-       "race      sex                                                          \n",
-       "Non-white Male       Never-married  Machine-op-inspct      Own-child   \n",
-       "White     Male  Married-civ-spouse    Farming-fishing        Husband   \n",
-       "          Male  Married-civ-spouse    Protective-serv        Husband   \n",
-       "Non-white Male  Married-civ-spouse  Machine-op-inspct        Husband   \n",
-       "White     Male       Never-married      Other-service  Not-in-family   \n",
+       "                      marital-status         occupation   relationship  \\\n",
+       "  race      sex                                                          \n",
+       "0 Non-white Male       Never-married  Machine-op-inspct      Own-child   \n",
+       "1 White     Male  Married-civ-spouse    Farming-fishing        Husband   \n",
+       "2 White     Male  Married-civ-spouse    Protective-serv        Husband   \n",
+       "3 Non-white Male  Married-civ-spouse  Machine-op-inspct        Husband   \n",
+       "5 White     Male       Never-married      Other-service  Not-in-family   \n",
        "\n",
-       "                     race   sex  capital-gain  capital-loss  hours-per-week  \\\n",
-       "race      sex                                                                 \n",
-       "Non-white Male  Non-white  Male           0.0           0.0            40.0   \n",
-       "White     Male      White  Male           0.0           0.0            50.0   \n",
-       "          Male      White  Male           0.0           0.0            40.0   \n",
-       "Non-white Male  Non-white  Male        7688.0           0.0            40.0   \n",
-       "White     Male      White  Male           0.0           0.0            30.0   \n",
+       "                       race   sex  capital-gain  capital-loss  hours-per-week  \\\n",
+       "  race      sex                                                                 \n",
+       "0 Non-white Male  Non-white  Male           0.0           0.0            40.0   \n",
+       "1 White     Male      White  Male           0.0           0.0            50.0   \n",
+       "2 White     Male      White  Male           0.0           0.0            40.0   \n",
+       "3 Non-white Male  Non-white  Male        7688.0           0.0            40.0   \n",
+       "5 White     Male      White  Male           0.0           0.0            30.0   \n",
        "\n",
-       "               native-country  \n",
-       "race      sex                  \n",
-       "Non-white Male  United-States  \n",
-       "White     Male  United-States  \n",
-       "          Male  United-States  \n",
-       "Non-white Male  United-States  \n",
-       "White     Male  United-States  "
+       "                 native-country  \n",
+       "  race      sex                  \n",
+       "0 Non-white Male  United-States  \n",
+       "1 White     Male  United-States  \n",
+       "2 White     Male  United-States  \n",
+       "3 Non-white Male  United-States  \n",
+       "5 White     Male  United-States  "
       ]
      },
      "execution_count": 2,
@@ -242,7 +249,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
@@ -267,6 +274,7 @@
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th></th>\n",
+       "      <th></th>\n",
        "      <th>age</th>\n",
        "      <th>education-num</th>\n",
        "      <th>race</th>\n",
@@ -276,6 +284,7 @@
        "      <th>hours-per-week</th>\n",
        "    </tr>\n",
        "    <tr>\n",
+       "      <th></th>\n",
        "      <th>race</th>\n",
        "      <th>sex</th>\n",
        "      <th></th>\n",
@@ -289,6 +298,7 @@
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
+       "      <th>7916</th>\n",
        "      <th>Non-white</th>\n",
        "      <th>Female</th>\n",
        "      <td>18.0</td>\n",
@@ -300,7 +310,8 @@
        "      <td>20.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th rowspan=\"4\" valign=\"top\">White</th>\n",
+       "      <th>26447</th>\n",
+       "      <th>White</th>\n",
        "      <th>Male</th>\n",
        "      <td>55.0</td>\n",
        "      <td>9.0</td>\n",
@@ -311,6 +322,8 @@
        "      <td>40.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
+       "      <th>20889</th>\n",
+       "      <th>White</th>\n",
        "      <th>Female</th>\n",
        "      <td>43.0</td>\n",
        "      <td>9.0</td>\n",
@@ -321,6 +334,8 @@
        "      <td>40.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
+       "      <th>30145</th>\n",
+       "      <th>White</th>\n",
        "      <th>Male</th>\n",
        "      <td>44.0</td>\n",
        "      <td>11.0</td>\n",
@@ -331,6 +346,8 @@
        "      <td>40.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
+       "      <th>7473</th>\n",
+       "      <th>White</th>\n",
        "      <th>Male</th>\n",
        "      <td>41.0</td>\n",
        "      <td>9.0</td>\n",
@@ -345,31 +362,33 @@
        "</div>"
       ],
       "text/plain": [
-       "                   age  education-num  race  sex  capital-gain  capital-loss  \\\n",
-       "race      sex                                                                  \n",
-       "Non-white Female  18.0            7.0   0.0  0.0           0.0           0.0   \n",
-       "White     Male    55.0            9.0   1.0  1.0           0.0           0.0   \n",
-       "          Female  43.0            9.0   1.0  0.0           0.0           0.0   \n",
-       "          Male    44.0           11.0   1.0  1.0        4386.0           0.0   \n",
-       "          Male    41.0            9.0   1.0  1.0           0.0           0.0   \n",
+       "                         age  education-num  race  sex  capital-gain  \\\n",
+       "      race      sex                                                    \n",
+       "7916  Non-white Female  18.0            7.0   0.0  0.0           0.0   \n",
+       "26447 White     Male    55.0            9.0   1.0  1.0           0.0   \n",
+       "20889 White     Female  43.0            9.0   1.0  0.0           0.0   \n",
+       "30145 White     Male    44.0           11.0   1.0  1.0        4386.0   \n",
+       "7473  White     Male    41.0            9.0   1.0  1.0           0.0   \n",
        "\n",
-       "                  hours-per-week  \n",
-       "race      sex                     \n",
-       "Non-white Female            20.0  \n",
-       "White     Male              40.0  \n",
-       "          Female            40.0  \n",
-       "          Male              40.0  \n",
-       "          Male              55.0  "
+       "                        capital-loss  hours-per-week  \n",
+       "      race      sex                                   \n",
+       "7916  Non-white Female           0.0            20.0  \n",
+       "26447 White     Male             0.0            40.0  \n",
+       "20889 White     Female           0.0            40.0  \n",
+       "30145 White     Male             0.0            40.0  \n",
+       "7473  White     Male             0.0            55.0  "
       ]
      },
-     "execution_count": 3,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "X, y, _ = fetch_adult(numeric_only=True)\n",
-    "X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, random_state=123)\n",
+    "X, y, sample_weight = fetch_adult(numeric_only=True)\n",
+    "(X_train, X_test,\n",
+    " y_train, y_test,\n",
+    " sw_train, sw_test) = train_test_split(X, y, sample_weight, train_size=0.7, random_state=123)\n",
     "X_train.head()"
    ]
   },
@@ -421,7 +440,7 @@
    ],
    "source": [
     "sex = y_test.index.get_level_values('sex')\n",
-    "disparate_impact_ratio(y_test, y_pred, groups=sex, priv_group='Male', pos_label='>50K')"
+    "disparate_impact_ratio(y_test, y_pred, prot_attr='sex', priv_group='Male', pos_label='>50K')"
    ]
   },
   {
@@ -435,21 +454,86 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Not yet implemented."
+    "`ReweighingMeta` is a workaround until changing sample weights can be handled properly in `Pipeline`/`GridSearchCV`"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 10,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Index([(7916, 'Non-white', 'Female'),      (26447, 'White', 'Male'),\n",
+      "          (20889, 'White', 'Female'),      (30145, 'White', 'Male'),\n",
+      "             (7473, 'White', 'Male'),      (29361, 'White', 'Male'),\n",
+      "            (12277, 'White', 'Male'),      (44372, 'White', 'Male'),\n",
+      "          (32291, 'White', 'Female'),    (44411, 'White', 'Female'),\n",
+      "       ...\n",
+      "            (38298, 'White', 'Male'),       (4173, 'White', 'Male'),\n",
+      "             (7854, 'White', 'Male'),    (16424, 'White', 'Female'),\n",
+      "             (2087, 'White', 'Male'),      (16120, 'White', 'Male'),\n",
+      "            (24476, 'White', 'Male'),     (8295, 'White', 'Female'),\n",
+      "             (1449, 'White', 'Male'),      (33323, 'White', 'Male')],\n",
+      "      dtype='object', length=6838)\n"
+     ]
+    },
+    {
+     "ename": "NameError",
+     "evalue": "name 'accuracy_score' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-10-b8e2e0cd7a17>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     11\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     12\u001b[0m \u001b[0mclf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mGridSearchCV\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrew\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mscoring\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mscoring\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcv\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 13\u001b[0;31m \u001b[0mclf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0;34m'sample_weight'\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0msw_train\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     14\u001b[0m \u001b[0mclf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mscore\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_test\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_test\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/anaconda/envs/aif360/lib/python3.5/site-packages/sklearn/model_selection/_search.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, groups, **fit_params)\u001b[0m\n\u001b[1;32m    685\u001b[0m                 \u001b[0;32mreturn\u001b[0m \u001b[0mresults\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    686\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 687\u001b[0;31m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_run_search\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mevaluate_candidates\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    688\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    689\u001b[0m         \u001b[0;31m# For multi-metric evaluation, store the best_index_, best_params_ and\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/anaconda/envs/aif360/lib/python3.5/site-packages/sklearn/model_selection/_search.py\u001b[0m in \u001b[0;36m_run_search\u001b[0;34m(self, evaluate_candidates)\u001b[0m\n\u001b[1;32m   1146\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m_run_search\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mevaluate_candidates\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1147\u001b[0m         \u001b[0;34m\"\"\"Search all candidates in param_grid\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1148\u001b[0;31m         \u001b[0mevaluate_candidates\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mParameterGrid\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparam_grid\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1149\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1150\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/anaconda/envs/aif360/lib/python3.5/site-packages/sklearn/model_selection/_search.py\u001b[0m in \u001b[0;36mevaluate_candidates\u001b[0;34m(candidate_params)\u001b[0m\n\u001b[1;32m    664\u001b[0m                                \u001b[0;32mfor\u001b[0m \u001b[0mparameters\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mtrain\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtest\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    665\u001b[0m                                in product(candidate_params,\n\u001b[0;32m--> 666\u001b[0;31m                                           cv.split(X, y, groups)))\n\u001b[0m\u001b[1;32m    667\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    668\u001b[0m                 \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mout\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/anaconda/envs/aif360/lib/python3.5/site-packages/joblib/parallel.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, iterable)\u001b[0m\n\u001b[1;32m    919\u001b[0m             \u001b[0;31m# remaining jobs.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    920\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_iterating\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 921\u001b[0;31m             \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdispatch_one_batch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0miterator\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    922\u001b[0m                 \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_iterating\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_original_iterator\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    923\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/anaconda/envs/aif360/lib/python3.5/site-packages/joblib/parallel.py\u001b[0m in \u001b[0;36mdispatch_one_batch\u001b[0;34m(self, iterator)\u001b[0m\n\u001b[1;32m    757\u001b[0m                 \u001b[0;32mreturn\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    758\u001b[0m             \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 759\u001b[0;31m                 \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_dispatch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtasks\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    760\u001b[0m                 \u001b[0;32mreturn\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    761\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/anaconda/envs/aif360/lib/python3.5/site-packages/joblib/parallel.py\u001b[0m in \u001b[0;36m_dispatch\u001b[0;34m(self, batch)\u001b[0m\n\u001b[1;32m    714\u001b[0m         \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_lock\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    715\u001b[0m             \u001b[0mjob_idx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_jobs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 716\u001b[0;31m             \u001b[0mjob\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_backend\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_async\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbatch\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcallback\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    717\u001b[0m             \u001b[0;31m# A job can complete so quickly than its callback is\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    718\u001b[0m             \u001b[0;31m# called before we get here, causing self._jobs to\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/anaconda/envs/aif360/lib/python3.5/site-packages/joblib/_parallel_backends.py\u001b[0m in \u001b[0;36mapply_async\u001b[0;34m(self, func, callback)\u001b[0m\n\u001b[1;32m    180\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mapply_async\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcallback\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    181\u001b[0m         \u001b[0;34m\"\"\"Schedule a func to be run\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 182\u001b[0;31m         \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mImmediateResult\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    183\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mcallback\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    184\u001b[0m             \u001b[0mcallback\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/anaconda/envs/aif360/lib/python3.5/site-packages/joblib/_parallel_backends.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, batch)\u001b[0m\n\u001b[1;32m    547\u001b[0m         \u001b[0;31m# Don't delay the application, to avoid keeping the input\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    548\u001b[0m         \u001b[0;31m# arguments in memory\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 549\u001b[0;31m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mresults\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbatch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    550\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    551\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/anaconda/envs/aif360/lib/python3.5/site-packages/joblib/parallel.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    223\u001b[0m         \u001b[0;32mwith\u001b[0m \u001b[0mparallel_backend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_backend\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_jobs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_n_jobs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    224\u001b[0m             return [func(*args, **kwargs)\n\u001b[0;32m--> 225\u001b[0;31m                     for func, args, kwargs in self.items]\n\u001b[0m\u001b[1;32m    226\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    227\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m__len__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/anaconda/envs/aif360/lib/python3.5/site-packages/joblib/parallel.py\u001b[0m in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m    223\u001b[0m         \u001b[0;32mwith\u001b[0m \u001b[0mparallel_backend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_backend\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_jobs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_n_jobs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    224\u001b[0m             return [func(*args, **kwargs)\n\u001b[0;32m--> 225\u001b[0;31m                     for func, args, kwargs in self.items]\n\u001b[0m\u001b[1;32m    226\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    227\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m__len__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/anaconda/envs/aif360/lib/python3.5/site-packages/sklearn/model_selection/_validation.py\u001b[0m in \u001b[0;36m_fit_and_score\u001b[0;34m(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, return_estimator, error_score)\u001b[0m\n\u001b[1;32m    552\u001b[0m         \u001b[0mfit_time\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mstart_time\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    553\u001b[0m         \u001b[0;31m# _score will return dict if is_multimetric is True\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 554\u001b[0;31m         \u001b[0mtest_scores\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mestimator\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX_test\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_test\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mscorer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mis_multimetric\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    555\u001b[0m         \u001b[0mscore_time\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mstart_time\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mfit_time\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    556\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mreturn_train_score\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/anaconda/envs/aif360/lib/python3.5/site-packages/sklearn/model_selection/_validation.py\u001b[0m in \u001b[0;36m_score\u001b[0;34m(estimator, X_test, y_test, scorer, is_multimetric)\u001b[0m\n\u001b[1;32m    595\u001b[0m     \"\"\"\n\u001b[1;32m    596\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mis_multimetric\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 597\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0m_multimetric_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mestimator\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX_test\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_test\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mscorer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    598\u001b[0m     \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    599\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0my_test\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/anaconda/envs/aif360/lib/python3.5/site-packages/sklearn/model_selection/_validation.py\u001b[0m in \u001b[0;36m_multimetric_score\u001b[0;34m(estimator, X_test, y_test, scorers)\u001b[0m\n\u001b[1;32m    625\u001b[0m             \u001b[0mscore\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mscorer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mestimator\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX_test\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    626\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 627\u001b[0;31m             \u001b[0mscore\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mscorer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mestimator\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX_test\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_test\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    628\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    629\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mhasattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mscore\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'item'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/anaconda/envs/aif360/lib/python3.5/site-packages/sklearn/metrics/scorer.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, estimator, X, y_true, sample_weight)\u001b[0m\n\u001b[1;32m     95\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     96\u001b[0m             return self._sign * self._score_func(y_true, y_pred,\n\u001b[0;32m---> 97\u001b[0;31m                                                  **self._kwargs)\n\u001b[0m\u001b[1;32m     98\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     99\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m<ipython-input-10-b8e2e0cd7a17>\u001b[0m in \u001b[0;36mscore_func\u001b[0;34m(y_true, y_pred, sample_weight)\u001b[0m\n\u001b[1;32m      5\u001b[0m     \u001b[0midx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0my_true\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_flat_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      6\u001b[0m     \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0midx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 7\u001b[0;31m     \u001b[0;32mreturn\u001b[0m \u001b[0maccuracy_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my_true\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_pred\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msample_weight\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msample_weight\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0midx\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      8\u001b[0m \u001b[0mscoring\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmake_scorer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mscore_func\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0;34m'sample_weight'\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0msample_weight\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      9\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mNameError\u001b[0m: name 'accuracy_score' is not defined"
+     ]
+    }
+   ],
    "source": [
-    "pipe = make_pipeline(Reweighing(), LinearRegression())\n",
-    "# sample_weight_ will be updated after it is fit\n",
-    "fit_params = {'linearregression__sample_weight':\n",
-    "              pipe.named_steps.reweighing.sample_weight_}\n",
-    "pipe.fit(X, y, **fit_params)"
+    "rew = ReweighingMeta(estimator=LogisticRegression(solver='liblinear'))\n",
+    "\n",
+    "# UGLY workaround for sklearn issue: https://stackoverflow.com/a/49598597\n",
+    "def score_func(y_true, y_pred, sample_weight):\n",
+    "    idx = y_true.index.to_flat_index()\n",
+    "    print(idx)\n",
+    "    return accuracy_score(y_true, y_pred, sample_weight=sample_weight[idx])\n",
+    "scoring = make_scorer(score_func, **{'sample_weight': sample_weight})\n",
+    "\n",
+    "params = {'estimator__C': [1, 10], 'reweigher__prot_attr': ['sex']}\n",
+    "\n",
+    "clf = GridSearchCV(rew, params, scoring=scoring, cv=5)\n",
+    "clf.fit(X_train, y_train, **{'sample_weight': sw_train})\n",
+    "clf.score(X_test, y_test)"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {

From 4107dd71c514700f72efe97ef0a93c4321e2237b Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Thu, 11 Jul 2019 16:13:51 -0400
Subject: [PATCH 24/61] initial adversarial debiasing port

---
 aif360/sklearn/inprocessing/__init__.py       |   5 +
 .../inprocessing/adversarial_debiasing.py     | 228 ++++++++++++++++++
 aif360/sklearn/utils.py                       |   1 +
 3 files changed, 234 insertions(+)
 create mode 100644 aif360/sklearn/inprocessing/__init__.py
 create mode 100644 aif360/sklearn/inprocessing/adversarial_debiasing.py

diff --git a/aif360/sklearn/inprocessing/__init__.py b/aif360/sklearn/inprocessing/__init__.py
new file mode 100644
index 00000000..863d3676
--- /dev/null
+++ b/aif360/sklearn/inprocessing/__init__.py
@@ -0,0 +1,5 @@
+from aif360.sklearn.inprocessing.adversarial_debiasing import AdversarialDebiasing
+
+__all__ = [
+    'AdversarialDebiasing'
+]
diff --git a/aif360/sklearn/inprocessing/adversarial_debiasing.py b/aif360/sklearn/inprocessing/adversarial_debiasing.py
new file mode 100644
index 00000000..e82e287d
--- /dev/null
+++ b/aif360/sklearn/inprocessing/adversarial_debiasing.py
@@ -0,0 +1,228 @@
+import numpy as np
+from scipy.special import softmax
+from sklearn.base import BaseEstimator, ClassifierMixin
+from sklearn.preprocessing import LabelEncoder
+from sklearn.utils import check_is_fitted, check_random_state
+import tensorflow as tf
+
+from aif360.sklearn.utils import check_inputs, check_groups
+
+
+class AdversarialDebiasing(BaseEstimator, ClassifierMixin):
+    """Adversarial debiasing is an in-processing technique that learns a
+    classifier to maximize prediction accuracy and simultaneously reduce an
+    adversary's ability to determine the protected attribute from the
+    predictions [#zhang18]_. This approach leads to a fair classifier as the
+    predictions cannot carry any group discrimination information that the
+    adversary can exploit.
+
+    References:
+        .. [#zhang18] B. H. Zhang, B. Lemoine, and M. Mitchell, "Mitigating
+           Unwanted Biases with Adversarial Learning," AAAI/ACM Conference on
+           Artificial Intelligence, Ethics, and Society, 2018.
+    """
+
+    def __init__(self, prot_attr=None, adversary_loss_weight=0.1, num_epochs=50,
+                 batch_size=128, classifier_num_hidden_units=200, debias=True,
+                 verbose=True, random_state=None):
+
+        self.prot_attr = prot_attr
+        self.adversary_loss_weight = adversary_loss_weight
+        self.num_epochs = num_epochs
+        self.batch_size = batch_size
+        self.classifier_num_hidden_units = classifier_num_hidden_units
+        self.debias = debias
+        self.verbose = verbose
+        self.random_state = random_state
+
+    @property
+    def classifier_logits_(self):
+        check_is_fitted(self, ['input_ph', 'keep_prob'])
+        with tf.variable_scope('classifier_model'):
+            W1 = tf.get_variable(
+                    'W1', [self.input_ph.shape[1], self.classifier_num_hidden_units],
+                    initializer=tf.contrib.layers.xavier_initializer())
+            b1 = tf.Variable(tf.zeros(shape=[self.classifier_num_hidden_units]),
+                    name='b1')
+
+            h1 = tf.nn.relu(tf.matmul(self.input_ph, W1) + b1)
+            h1 = tf.nn.dropout(h1, keep_prob=self.keep_prob)
+
+            W2 = tf.get_variable(
+                    'W2', [self.classifier_num_hidden_units, 1],
+                    initializer=tf.contrib.layers.xavier_initializer())
+            b2 = tf.Variable(tf.zeros(shape=[1]), name='b2')
+
+            pred_logits = tf.matmul(h1, W2) + b2
+
+        return pred_logits
+
+    @property
+    def adversary_logits_(self):
+        """Compute the adversary predictions for the protected attribute."""
+        check_is_fitted(self, ['classifier_logits_', 'true_labels_ph'])
+        with tf.variable_scope("adversary_model"):
+            c = tf.get_variable('c', initializer=tf.constant(1.0))
+            s = tf.sigmoid((1 + tf.abs(c)) * self.classifier_logits)
+
+            W2 = tf.get_variable('W2', [3, 1],
+                                 initializer=tf.contrib.layers.xavier_initializer())
+            b2 = tf.Variable(tf.zeros(shape=[1]), name='b2')
+
+            pred_prot_attr_logits = tf.matmul(
+                    tf.concat([s, s * self.true_labels_ph, s * (1.0 - self.true_labels_ph)], axis=1),
+                    W2) + b2
+            # pred_prot_attr_labels = tf.sigmoid(pred_prot_attr_logit)
+
+        return pred_prot_attr_logits
+
+    def _train(self, X, y, groups):
+
+
+    def fit(self, X, y):
+        rng = check_random_state(self.random_state)
+        # tf.random.seed(random_state)
+
+        groups, self.prot_attr_ = check_groups(X, self.prot_attr)
+        lb = LabelBinarizer()
+        y = lb.fit_transform(y)
+        # TODO: LabelEncoder for groups
+        self.groups_ = np.unique(groups)
+        self.classes_ = lb.classes_
+        self.sess_ = tf.Session()
+
+        n_samples, n_features = X.shape
+        n_classes = len(self.classes_)
+        n_groups = len(self.groups_)
+
+        with tf.variable_scope('adversarial_debiasing'):
+            n_samples, n_features = X.shape
+            n_classes = len(self.classes_)
+            n_groups = len(self.groups_)
+
+            # Setup placeholders
+            self.input_ph = tf.placeholder(tf.float32, shape=[None, n_features])
+            self.prot_attr_ph = tf.placeholder(tf.float32, shape=[None, n_groups])
+            self.true_labels_ph = tf.placeholder(tf.float32, shape=[None, n_classes])
+            self.keep_prob = tf.placeholder(tf.float32)
+
+            # Obtain classifier loss
+            loss_fn = (tf.nn.sigmoid_cross_entropy_with_logits if n_classes == 1
+                       else tf.nn.softmax_cross_entropy_with_logits)
+            # clf_loss = loss_fn(labels=self.true_labels_ph, logits=self.classifier_logits_)
+            # clf_loss = tf.reduce_sum(sample_weight * clf_loss) / tf.reduce_sum(sample_weight)
+            clf_loss = tf.reduce_mean(loss_fn(labels=self.true_labels_ph,
+                                              logits=self.classifier_logits_))
+
+            if self.debias:
+                # Obtain adversary loss
+                loss_fn = (tf.nn.sigmoid_cross_entropy_with_logits if n_groups == 1
+                           else tf.nn.softmax_cross_entropy_with_logits)
+                adv_loss = tf.reduce_mean(loss_fn(labels=self.prot_attr_ph,
+                                                  logits=self.adversary_logits_))
+
+            # Setup optimizers with learning rates
+            global_step = tf.Variable(0, trainable=False)
+            starter_learning_rate = 0.001
+            learning_rate = tf.train.exponential_decay(
+                    starter_learning_rate, global_step, 1000, 0.96, staircase=True)
+            clf_opt = tf.train.AdamOptimizer(learning_rate)
+            if self.debias:
+                adv_opt = tf.train.AdamOptimizer(learning_rate)
+
+            clf_vars = [var for var in tf.trainable_variables()
+                        if 'classifier_model' in var.name]
+            if self.debias:
+                adv_vars = [var for var in tf.trainable_variables()
+                            if 'adversary_model' in var.name]
+                # Update classifier parameters
+                adv_grads = {var: grad for (grad, var) in
+                        adv_opt.compute_gradients(adv_loss, var_list=clf_vars)}
+
+            normalize = lambda x: x / (tf.norm(x) + np.finfo(np.float32).tiny)
+
+            clf_grads = []
+            for (grad, var) in clf_opt.compute_gradients(clf_loss, var_list=clf_vars):
+                if self.debias:
+                    unit_adv_grad = normalize(adv_grads[var])
+                    # proj_{adv_grad} clf_grad:
+                    grad -= tf.reduce_sum(grad * unit_adv_grad) * unit_adv_grad
+                    grad -= self.adversary_loss_weight * adv_grads[var]
+                clf_grads.append((grad, var))
+            clf_minimizer = clf_opt.apply_gradients(clf_grads, global_step=global_step)
+
+            if self.debias:
+                # Update adversary parameters
+                adv_minimizer = adv_opt.minimize(adv_loss, var_list=adv_vars,
+                                                 global_step=global_step)
+
+            self.sess_.run(tf.global_variables_initializer())
+            self.sess_.run(tf.local_variables_initializer())
+
+            # Begin training
+            for epoch in range(self.num_epochs):
+                # TODO: why rng.choice(n_samples, n_samples)?
+                shuffled_ids = rng.shuffle(np.arange(n_samples))
+                for i in range(n_samples // self.batch_size):
+                    batch_ids = shuffled_ids[self.batch_size * i:self.batch_size * (i+1)]
+                    batch_features = X[batch_ids]
+                    batch_labels = y[batch_ids]
+                    batch_prot_attr = groups[batch_ids]
+                    batch_feed_dict = {self.input_ph: batch_features,
+                                       self.true_labels_ph: batch_labels,
+                                       self.prot_attr_ph: batch_prot_attr,
+                                       self.keep_prob: 0.8}
+                    if self.debias:
+                        _, _, clf_loss_value, adv_loss_value = (
+                                self.sess_.run([clf_minimizer, adv_minimizer,
+                                                clf_loss, adv_loss],
+                                               feed_dict=batch_feed_dict))
+                        if i % 200 == 0 and self.verbose:
+                            print("epoch {}; iter: {}; batch classifier loss: "
+                                  "{}; batch adversarial loss: {}".format(
+                                          epoch, i, clf_loss_value,
+                                          adv_loss_value))
+                    else:
+                        _, clf_loss_value = self.sess_.run(
+                                [clf_minimizer, clf_loss],
+                                feed_dict=batch_feed_dict)
+                        if i % 200 == 0 and self.verbose:
+                            print("epoch {}; iter: {}; batch classifier loss: "
+                                  "{}".format(epoch, i, clf_loss_value))
+
+        return self
+
+    def decision_function(self, X):
+        check_is_fitted(self, ['classes_', 'input_ph', 'keep_prob', 'classifier_logits_'])
+        n_samples = X.shape[0]
+        groups, _ = check_groups(X, self.prot_attr_)
+
+        samples_covered = 0
+        scores = np.empty((n_samples, len(self.classes_)))
+        while samples_covered < n_samples:
+            start = samples_covered
+            end = samples_covered + self.batch_size
+            if end > n_samples:
+                end = n_samples
+
+            batch_ids = np.arange(start, end)
+            batch_features = X[batch_ids]
+            batch_prot_attr = groups[batch_ids]
+
+            batch_feed_dict = {self.input_ph: batch_features,
+                               self.keep_prob: 1.0}
+
+            # batch_logits = self.sess_.run(self.classifier_logits_, feed_dict=batch_feed_dict)
+            scores[batch_ids] = self.sess_.run(self.classifier_logits_,
+                                               feed_dict=batch_feed_dict)
+            samples_covered += len(batch_features)
+
+        return scores
+
+    def predict_proba(self, X):
+        decision = self.decision_function(X)
+        return softmax(decision, axis=1)
+
+    def predict(self, X):
+        indices = self.decision_function(X).argmax(axis=1)
+        return self.classes_[indices]
diff --git a/aif360/sklearn/utils.py b/aif360/sklearn/utils.py
index e18646bf..6c850e70 100644
--- a/aif360/sklearn/utils.py
+++ b/aif360/sklearn/utils.py
@@ -1,3 +1,4 @@
+import numpy as np
 from pandas.core.dtypes.common import is_list_like
 from sklearn.utils import check_consistent_length
 from sklearn.utils.validation import column_or_1d

From df85e42318a11b6d84ea8b264c6173f03acf5ed5 Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Mon, 15 Jul 2019 22:27:44 -0400
Subject: [PATCH 25/61] multiclass/multigroup support for adv debiasing

---
 .../inprocessing/adversarial_debiasing.py     | 120 ++++++++----------
 1 file changed, 56 insertions(+), 64 deletions(-)

diff --git a/aif360/sklearn/inprocessing/adversarial_debiasing.py b/aif360/sklearn/inprocessing/adversarial_debiasing.py
index e82e287d..2d4bc7a0 100644
--- a/aif360/sklearn/inprocessing/adversarial_debiasing.py
+++ b/aif360/sklearn/inprocessing/adversarial_debiasing.py
@@ -22,10 +22,11 @@ class AdversarialDebiasing(BaseEstimator, ClassifierMixin):
            Artificial Intelligence, Ethics, and Society, 2018.
     """
 
-    def __init__(self, prot_attr=None, adversary_loss_weight=0.1, num_epochs=50,
-                 batch_size=128, classifier_num_hidden_units=200, debias=True,
-                 verbose=True, random_state=None):
+    def __init__(self, sess, prot_attr=None, adversary_loss_weight=0.1,
+                 num_epochs=50, batch_size=128, classifier_num_hidden_units=200,
+                 debias=True, verbose=True, random_state=None):
 
+        self.sess = sess
         self.prot_attr = prot_attr
         self.adversary_loss_weight = adversary_loss_weight
         self.num_epochs = num_epochs
@@ -37,10 +38,12 @@ def __init__(self, prot_attr=None, adversary_loss_weight=0.1, num_epochs=50,
 
     @property
     def classifier_logits_(self):
-        check_is_fitted(self, ['input_ph', 'keep_prob'])
+        check_is_fitted(self, ['input_ph', 'keep_prob', 'classes_'])
+        n_features = self.input_ph.shape[1]
+        n_classes = len(self.classes_)
         with tf.variable_scope('classifier_model'):
             W1 = tf.get_variable(
-                    'W1', [self.input_ph.shape[1], self.classifier_num_hidden_units],
+                    'W1', [n_features, self.classifier_num_hidden_units],
                     initializer=tf.contrib.layers.xavier_initializer())
             b1 = tf.Variable(tf.zeros(shape=[self.classifier_num_hidden_units]),
                     name='b1')
@@ -49,9 +52,9 @@ def classifier_logits_(self):
             h1 = tf.nn.dropout(h1, keep_prob=self.keep_prob)
 
             W2 = tf.get_variable(
-                    'W2', [self.classifier_num_hidden_units, 1],
+                    'W2', [self.classifier_num_hidden_units, n_classes],
                     initializer=tf.contrib.layers.xavier_initializer())
-            b2 = tf.Variable(tf.zeros(shape=[1]), name='b2')
+            b2 = tf.Variable(tf.zeros(shape=[n_classes]), name='b2')
 
             pred_logits = tf.matmul(h1, W2) + b2
 
@@ -60,72 +63,62 @@ def classifier_logits_(self):
     @property
     def adversary_logits_(self):
         """Compute the adversary predictions for the protected attribute."""
-        check_is_fitted(self, ['classifier_logits_', 'true_labels_ph'])
+        check_is_fitted(self, ['classifier_logits_', 'true_labels_ph', 'groups_'])
+        n_groups = len(self.groups_)
         with tf.variable_scope("adversary_model"):
             c = tf.get_variable('c', initializer=tf.constant(1.0))
-            s = tf.sigmoid((1 + tf.abs(c)) * self.classifier_logits)
+            s = tf.sigmoid((1 + tf.abs(c)) * self.classifier_logits_)
 
-            W2 = tf.get_variable('W2', [3, 1],
-                                 initializer=tf.contrib.layers.xavier_initializer())
-            b2 = tf.Variable(tf.zeros(shape=[1]), name='b2')
+            W2 = tf.get_variable('W2', [3, n_groups],
+                    initializer=tf.contrib.layers.xavier_initializer())
+            b2 = tf.Variable(tf.zeros(shape=[n_groups]), name='b2')
 
             pred_prot_attr_logits = tf.matmul(
-                    tf.concat([s, s * self.true_labels_ph, s * (1.0 - self.true_labels_ph)], axis=1),
+                    tf.concat([s, s * self.true_labels_ph,
+                               s * (1.0 - self.true_labels_ph)], axis=1),
                     W2) + b2
-            # pred_prot_attr_labels = tf.sigmoid(pred_prot_attr_logit)
 
         return pred_prot_attr_logits
 
-    def _train(self, X, y, groups):
-
-
     def fit(self, X, y):
         rng = check_random_state(self.random_state)
         # tf.random.seed(random_state)
 
         groups, self.prot_attr_ = check_groups(X, self.prot_attr)
-        lb = LabelBinarizer()
-        y = lb.fit_transform(y)
-        # TODO: LabelEncoder for groups
-        self.groups_ = np.unique(groups)
-        self.classes_ = lb.classes_
-        self.sess_ = tf.Session()
+        le = LabelEncoder()
+        y = le.fit_transform(y)
+        self.classes_ = le.classes_
+        groups = le.fit_transform(groups)
+        self.groups_ = le.classes_
 
         n_samples, n_features = X.shape
-        n_classes = len(self.classes_)
-        n_groups = len(self.groups_)
 
         with tf.variable_scope('adversarial_debiasing'):
-            n_samples, n_features = X.shape
-            n_classes = len(self.classes_)
-            n_groups = len(self.groups_)
-
             # Setup placeholders
             self.input_ph = tf.placeholder(tf.float32, shape=[None, n_features])
-            self.prot_attr_ph = tf.placeholder(tf.float32, shape=[None, n_groups])
-            self.true_labels_ph = tf.placeholder(tf.float32, shape=[None, n_classes])
+            self.prot_attr_ph = tf.placeholder(tf.float32, shape=[None, 1])
+            self.true_labels_ph = tf.placeholder(tf.float32, shape=[None, 1])
             self.keep_prob = tf.placeholder(tf.float32)
 
+            global_step = tf.train.get_or_create_global_step()
+            starter_learning_rate = 0.001
+            learning_rate = tf.train.exponential_decay(starter_learning_rate,
+                    global_step, 1000, 0.96, staircase=True)
+
             # Obtain classifier loss
-            loss_fn = (tf.nn.sigmoid_cross_entropy_with_logits if n_classes == 1
-                       else tf.nn.softmax_cross_entropy_with_logits)
-            # clf_loss = loss_fn(labels=self.true_labels_ph, logits=self.classifier_logits_)
-            # clf_loss = tf.reduce_sum(sample_weight * clf_loss) / tf.reduce_sum(sample_weight)
-            clf_loss = tf.reduce_mean(loss_fn(labels=self.true_labels_ph,
-                                              logits=self.classifier_logits_))
+            clf_loss = tf.reduce_mean(
+                    tf.nn.sparse_softmax_cross_entropy_with_logits(
+                            labels=self.true_labels_ph,
+                            logits=self.classifier_logits_))
 
             if self.debias:
                 # Obtain adversary loss
-                loss_fn = (tf.nn.sigmoid_cross_entropy_with_logits if n_groups == 1
-                           else tf.nn.softmax_cross_entropy_with_logits)
-                adv_loss = tf.reduce_mean(loss_fn(labels=self.prot_attr_ph,
-                                                  logits=self.adversary_logits_))
+                adv_loss = tf.reduce_mean(
+                        tf.nn.sparse_softmax_cross_entropy_with_logits(
+                                labels=self.prot_attr_ph,
+                                logits=self.adversary_logits_))
 
-            # Setup optimizers with learning rates
-            global_step = tf.Variable(0, trainable=False)
-            starter_learning_rate = 0.001
-            learning_rate = tf.train.exponential_decay(
-                    starter_learning_rate, global_step, 1000, 0.96, staircase=True)
+            # Setup optimizers
             clf_opt = tf.train.AdamOptimizer(learning_rate)
             if self.debias:
                 adv_opt = tf.train.AdamOptimizer(learning_rate)
@@ -135,9 +128,11 @@ def fit(self, X, y):
             if self.debias:
                 adv_vars = [var for var in tf.trainable_variables()
                             if 'adversary_model' in var.name]
-                # Update classifier parameters
+                # Compute grad wrt classifier parameters
                 adv_grads = {var: grad for (grad, var) in
                         adv_opt.compute_gradients(adv_loss, var_list=clf_vars)}
+                # Update adversary parameters (don't increment global step yet)
+                adv_min = adv_opt.minimize(adv_loss, var_list=adv_vars)
 
             normalize = lambda x: x / (tf.norm(x) + np.finfo(np.float32).tiny)
 
@@ -149,22 +144,17 @@ def fit(self, X, y):
                     grad -= tf.reduce_sum(grad * unit_adv_grad) * unit_adv_grad
                     grad -= self.adversary_loss_weight * adv_grads[var]
                 clf_grads.append((grad, var))
-            clf_minimizer = clf_opt.apply_gradients(clf_grads, global_step=global_step)
-
-            if self.debias:
-                # Update adversary parameters
-                adv_minimizer = adv_opt.minimize(adv_loss, var_list=adv_vars,
-                                                 global_step=global_step)
+            clf_min = clf_opt.apply_gradients(clf_grads, global_step=global_step)
 
-            self.sess_.run(tf.global_variables_initializer())
-            self.sess_.run(tf.local_variables_initializer())
+            self.sess.run(tf.global_variables_initializer())
 
             # Begin training
             for epoch in range(self.num_epochs):
                 # TODO: why rng.choice(n_samples, n_samples)?
                 shuffled_ids = rng.shuffle(np.arange(n_samples))
                 for i in range(n_samples // self.batch_size):
-                    batch_ids = shuffled_ids[self.batch_size * i:self.batch_size * (i+1)]
+                    batch_ids = shuffled_ids[self.batch_size * i:
+                                             self.batch_size * (i+1)]
                     batch_features = X[batch_ids]
                     batch_labels = y[batch_ids]
                     batch_prot_attr = groups[batch_ids]
@@ -174,8 +164,8 @@ def fit(self, X, y):
                                        self.keep_prob: 0.8}
                     if self.debias:
                         _, _, clf_loss_value, adv_loss_value = (
-                                self.sess_.run([clf_minimizer, adv_minimizer,
-                                                clf_loss, adv_loss],
+                                self.sess.run([clf_min, adv_min,
+                                               clf_loss, adv_loss],
                                                feed_dict=batch_feed_dict))
                         if i % 200 == 0 and self.verbose:
                             print("epoch {}; iter: {}; batch classifier loss: "
@@ -183,8 +173,8 @@ def fit(self, X, y):
                                           epoch, i, clf_loss_value,
                                           adv_loss_value))
                     else:
-                        _, clf_loss_value = self.sess_.run(
-                                [clf_minimizer, clf_loss],
+                        _, clf_loss_value = self.sess.run(
+                                [clf_min, clf_loss],
                                 feed_dict=batch_feed_dict)
                         if i % 200 == 0 and self.verbose:
                             print("epoch {}; iter: {}; batch classifier loss: "
@@ -193,9 +183,12 @@ def fit(self, X, y):
         return self
 
     def decision_function(self, X):
-        check_is_fitted(self, ['classes_', 'input_ph', 'keep_prob', 'classifier_logits_'])
+        check_is_fitted(self, ['classes_', 'input_ph', 'keep_prob',
+                               'classifier_logits_'])
         n_samples = X.shape[0]
         groups, _ = check_groups(X, self.prot_attr_)
+        le = LabelEncoder().fit(self.groups_)
+        groups = le.transform(groups)
 
         samples_covered = 0
         scores = np.empty((n_samples, len(self.classes_)))
@@ -212,9 +205,8 @@ def decision_function(self, X):
             batch_feed_dict = {self.input_ph: batch_features,
                                self.keep_prob: 1.0}
 
-            # batch_logits = self.sess_.run(self.classifier_logits_, feed_dict=batch_feed_dict)
-            scores[batch_ids] = self.sess_.run(self.classifier_logits_,
-                                               feed_dict=batch_feed_dict)
+            scores[batch_ids] = self.sess.run(self.classifier_logits_,
+                                              feed_dict=batch_feed_dict)
             samples_covered += len(batch_features)
 
         return scores

From d2d0ddcdc2fa9cfba108315d742f0430907b82a3 Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Tue, 30 Jul 2019 14:15:28 -0400
Subject: [PATCH 26/61] fix build errors

---
 aif360/sklearn/datasets/utils.py     |  5 +----
 aif360/sklearn/metrics/metrics.py    | 16 +++++++++++-----
 aif360/sklearn/tests/test_metrics.py |  4 +++-
 aif360/sklearn/utils.py              |  2 +-
 4 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/aif360/sklearn/datasets/utils.py b/aif360/sklearn/datasets/utils.py
index 4566c983..f3e10117 100644
--- a/aif360/sklearn/datasets/utils.py
+++ b/aif360/sklearn/datasets/utils.py
@@ -23,7 +23,7 @@ def standarize_dataset(df, protected_attributes, target, sample_weight=None,
         usecols (single label or list-like, optional): Column(s) to keep. All
             others are dropped.
         dropcols (single label or list-like, optional): Column(s) to drop.
-        numeric_only (bool): Drop all non-numeric feature columns.
+        numeric_only (bool): Drop all non-numeric, non-binary feature columns.
         dropna (bool): Drop rows with NAs.
 
     Returns:
@@ -58,10 +58,7 @@ def standarize_dataset(df, protected_attributes, target, sample_weight=None,
         >>> X_tr, X_te, y_tr, y_te = train_test_split(X, y)
     """
     df = df.set_index(protected_attributes, drop=False, append=True)
-    # df = df.set_index(sample_weight or np.ones(df.shape[0]), append=True)
-    # df.index = df.index.set_names('sample_weight', level=-1)
 
-    # TODO: convert to 1/0 if numeric_only?
     y = df.pop(target)
 
     # Column-wise drops
diff --git a/aif360/sklearn/metrics/metrics.py b/aif360/sklearn/metrics/metrics.py
index 4d87490e..eca1bf95 100644
--- a/aif360/sklearn/metrics/metrics.py
+++ b/aif360/sklearn/metrics/metrics.py
@@ -1,6 +1,7 @@
 import numpy as np
 from sklearn.metrics import make_scorer, recall_score
 from sklearn.neighbors import NearestNeighbors
+from sklearn.utils import check_X_y
 
 from aif360.sklearn.utils import check_groups
 
@@ -62,7 +63,8 @@ def ratio(func, y, *args, prot_attr=None, priv_group=1, sample_weight=None,
     arbitrary metric.
 
     Note: The optimal value of a ratio is 1. To make it a scorer, one must
-    subtract 1, take the absolute value, and set ``greater_is_better`` to False.
+    take the minimum of the ratio and its inverse, subtract it from 1, and set
+    ``greater_is_better`` to False.
 
     Unprivileged group is taken to be the inverse of the privileged group.
 
@@ -97,8 +99,10 @@ def make_difference_scorer(func):
                        greater_is_better=False)
 
 def make_ratio_scorer(func):
-    return make_scorer(lambda y, y_pred, **kw: abs(func(y, y_pred, **kw) - 1),
-                       greater_is_better=False)
+    def score_fn(y, y_pred, **kwargs):
+        ratio = func(y, y_pred, **kwargs)
+        return 1 - min(ratio, 1/ratio)
+    return make_scorer(score_fn, greater_is_better=False)
 
 
 # ================================ HELPERS =====================================
@@ -179,7 +183,7 @@ def generalized_entropy_error(y_true, y_pred, alpha=2, pos_label=1):
 def between_group_generalized_entropy_error(y_true, y_pred, prot_attr=None,
                                             priv_group=None, alpha=2,
                                             pos_label=1):
-    groups = check_groups(y_true, prot_attr)
+    groups, _ = check_groups(y_true, prot_attr)
     b = np.empty_like(y_true, dtype='float')
     if priv_group is not None:
         groups = [1 if g == priv_group else 0 for g in groups]
@@ -199,9 +203,11 @@ def coefficient_of_variation(b):
 # Is consistency_difference posible?
 # use sample_weight?
 def consistency_score(X, y, n_neighbors=5):
+    # cast as ndarrays
+    X, y = check_X_y(X, y)
     # learn a KNN on the features
     nbrs = NearestNeighbors(n_neighbors, algorithm='ball_tree').fit(X)
-    _, indices = nbrs.kneighbors(X)
+    indices = nbrs.kneighbors(X, return_distance=False)
 
     # compute consistency score
     return 1 - abs(y - y[indices].mean(axis=1)).mean()
diff --git a/aif360/sklearn/tests/test_metrics.py b/aif360/sklearn/tests/test_metrics.py
index e470f32e..0c040edd 100644
--- a/aif360/sklearn/tests/test_metrics.py
+++ b/aif360/sklearn/tests/test_metrics.py
@@ -1,4 +1,5 @@
 import numpy as np
+import pandas as pd
 from sklearn.linear_model import LogisticRegression
 
 from aif360.datasets import AdultDataset
@@ -13,7 +14,8 @@
 
 
 X, y, sample_weight = fetch_adult(numeric_only=True)
-y = y.factorize(sort=True)[0]
+# y = y.cat.rename_categories(range(len(y.cat.categories)))
+y = pd.Series(y.factorize(sort=True)[0], name=y.name, index=y.index)
 y_pred = LogisticRegression(solver='liblinear').fit(X, y,
         sample_weight=sample_weight).predict(X)
 adult = AdultDataset(instance_weights_name='fnlwgt', categorical_features=[],
diff --git a/aif360/sklearn/utils.py b/aif360/sklearn/utils.py
index 6c850e70..bfec0351 100644
--- a/aif360/sklearn/utils.py
+++ b/aif360/sklearn/utils.py
@@ -38,7 +38,7 @@ def check_groups(X, prot_attr):
                 "Expected `Series` or `DataFrame`, got {} instead.".format(
                         type(X).__name__))
 
-    all_prot_attrs = X.index.names[1:]
+    all_prot_attrs = [name for name in X.index.names if name]  # not None or ''
     if prot_attr is None:
         prot_attr = all_prot_attrs
     elif not is_list_like(prot_attr):

From 7a2414a3f1c048094c780aff33e253b5d487eed9 Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Mon, 12 Aug 2019 14:44:16 -0400
Subject: [PATCH 27/61] Add ensure_binary option to check_groups

---
 aif360/sklearn/utils.py | 33 +++++++++++++++++++++------------
 1 file changed, 21 insertions(+), 12 deletions(-)

diff --git a/aif360/sklearn/utils.py b/aif360/sklearn/utils.py
index bfec0351..1fb75d3f 100644
--- a/aif360/sklearn/utils.py
+++ b/aif360/sklearn/utils.py
@@ -16,39 +16,48 @@ def check_inputs(X, y, sample_weight):
     check_consistent_length(X, y, sample_weight)
     return X, y, sample_weight
 
-def check_groups(X, prot_attr):
-    """Validates ``X`` and returns ``groups`` and ``prot_attr``.
+def check_groups(arr, prot_attr, ensure_binary=False):
+    """Validates ``arr`` and returns ``groups`` and ``prot_attr``.
 
     Args:
-        X (`pandas.Series` or `pandas.DataFrame`): .
+        arr (`pandas.Series` or `pandas.DataFrame`): A Pandas object containing
+            protected attribute information in the index.
         prot_attr (single label or list-like): Protected attribute(s). If
-            ``None``, all protected attributes in ``X`` are used.
+            ``None``, all protected attributes in ``arr`` are used.
+        ensure_binary (bool): Raise an error if the resultant groups are not
+            binary.
 
     Returns:
-        (`pandas.Index`, list-like):
+        tuple:
 
             * **groups** (`pandas.Index`) -- Label (or tuple of labels) of
-              protected attribute for each sample in ``X``.
+              protected attribute for each sample in ``arr``.
             * **prot_attr** (list-like) -- Modified input. If input is a single
               label, returns single-item list. If input is ``None`` returns list
               of all protected attributes.
     """
-    if not hasattr(X, 'index'):
+    if not hasattr(arr, 'index'):
         raise TypeError(
                 "Expected `Series` or `DataFrame`, got {} instead.".format(
-                        type(X).__name__))
+                        type(arr).__name__))
 
-    all_prot_attrs = [name for name in X.index.names if name]  # not None or ''
+    all_prot_attrs = [name for name in arr.index.names if name]  # not None or ''
     if prot_attr is None:
         prot_attr = all_prot_attrs
     elif not is_list_like(prot_attr):
         prot_attr = [prot_attr]
 
-    if any(p not in X.index.names for p in prot_attr):
+    if any(p not in arr.index.names for p in prot_attr):
         raise ValueError("Some of the attributes provided are not present "
                          "in the dataset. Expected a subset of:\n{}\nGot:\n"
                          "{}".format(all_prot_attrs, prot_attr))
 
-    groups = X.index.droplevel(list(set(X.index.names) - set(prot_attr)))
+    groups = arr.index.droplevel(list(set(arr.index.names) - set(prot_attr)))
+    groups = groups.to_flat_index()
+
+    n_unique = groups.nunique()
+    if ensure_binary and n_unique != 2:
+        raise ValueError("Expected 2 protected attribute groups, got {}".format(
+                groups.unique() if n_unique > 5 else n_unique))
 
-    return groups.to_flat_index(), prot_attr
+    return groups, prot_attr

From aac9954e87f3666af7f60a540648d52d71d560c0 Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Tue, 29 Oct 2019 15:02:48 -0400
Subject: [PATCH 28/61] `numeric_only` converts index and label as well

---
 aif360/sklearn/datasets/openml_datasets.py | 11 ++-
 aif360/sklearn/datasets/utils.py           | 81 ++++------------------
 2 files changed, 20 insertions(+), 72 deletions(-)

diff --git a/aif360/sklearn/datasets/openml_datasets.py b/aif360/sklearn/datasets/openml_datasets.py
index 37122b17..562cd734 100644
--- a/aif360/sklearn/datasets/openml_datasets.py
+++ b/aif360/sklearn/datasets/openml_datasets.py
@@ -88,7 +88,7 @@ def fetch_adult(subset='all', data_home=None, binary_race=True, usecols=[],
                                              ordered=True).fillna('Non-white')
     df.sex = df.sex.cat.as_ordered()  # 'Female' < 'Male'
 
-    return standarize_dataset(df, protected_attributes=['race', 'sex'],
+    return standarize_dataset(df, prot_attr=['race', 'sex'],
                               target='annual-income', sample_weight='fnlwgt',
                               usecols=usecols, dropcols=dropcols,
                               numeric_only=numeric_only, dropna=dropna)
@@ -161,10 +161,9 @@ def fetch_german(data_home=None, binary_age=True, usecols=[], dropcols=[],
     df = df.join(personal_status.astype('category'))
     df.sex = df.sex.cat.as_ordered()  # 'female' < 'male'
 
-    return standarize_dataset(df, protected_attributes=['sex', age],
-                              target='credit-risk', usecols=usecols,
-                              dropcols=dropcols, numeric_only=numeric_only,
-                              dropna=dropna)
+    return standarize_dataset(df, prot_attr=['sex', age], target='credit-risk',
+                              usecols=usecols, dropcols=dropcols,
+                              numeric_only=numeric_only, dropna=dropna)
 
 def fetch_bank(data_home=None, percent10=False, usecols=[], dropcols='duration',
                numeric_only=False, dropna=False):
@@ -215,6 +214,6 @@ def fetch_bank(data_home=None, percent10=False, usecols=[], dropcols='duration',
     # replace 'unknown' marker with NaN
     df.apply(lambda s: s.cat.remove_categories('unknown', inplace=True)
              if hasattr(s, 'cat') and 'unknown' in s.cat.categories else s)
-    return standarize_dataset(df, protected_attributes='age', target='deposit',
+    return standarize_dataset(df, prot_attr='age', target='deposit',
                               usecols=usecols, dropcols=dropcols,
                               numeric_only=numeric_only, dropna=dropna)
diff --git a/aif360/sklearn/datasets/utils.py b/aif360/sklearn/datasets/utils.py
index f3e10117..964f34d9 100644
--- a/aif360/sklearn/datasets/utils.py
+++ b/aif360/sklearn/datasets/utils.py
@@ -1,21 +1,17 @@
 from collections import namedtuple
 
-import pandas as pd
 from pandas.core.dtypes.common import is_list_like
-from sklearn.compose import ColumnTransformer
-from sklearn.preprocessing import OneHotEncoder
-from sklearn.utils.validation import check_is_fitted
 
-def standarize_dataset(df, protected_attributes, target, sample_weight=None,
-                       usecols=[], dropcols=[], numeric_only=False, dropna=True):
+def standarize_dataset(df, prot_attr, target, sample_weight=None, usecols=[],
+                       dropcols=[], numeric_only=False, dropna=True):
     """Separate data, targets, and possibly sample weights and populate
     protected attributes as sample properties.
 
     Args:
         df (pandas.DataFrame): DataFrame with features and target together.
-        protected_attributes (single label or list-like): Label or list of
-            labels corresponding to protected attribute columns. Even if these
-            are dropped from the features, they remain in the index.
+        prot_attr (single label or list-like): Label or list of labels
+            corresponding to protected attribute columns. Even if these are
+            dropped from the features, they remain in the index.
         target (single label or list-like): Column label of the target (outcome)
             variable.
         sample_weight (single label, optional): Name of the column containing
@@ -47,85 +43,38 @@ def standarize_dataset(df, protected_attributes, target, sample_weight=None,
         >>> from sklearn.linear_model import LinearRegression
 
         >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=['X', 'y', 'Z'])
-        >>> train = standarize_dataset(df, protected_attributes='Z', target='y')
+        >>> train = standarize_dataset(df, prot_attr='Z', target='y')
         >>> reg = LinearRegression().fit(*train)
 
         >>> import numpy as np
         >>> from sklearn.datasets import make_classification
         >>> from sklearn.model_selection import train_test_split
         >>> df = pd.DataFrame(np.hstack(make_classification(n_features=5)))
-        >>> X, y = standarize_dataset(df, protected_attributes=0, target=5)
+        >>> X, y = standarize_dataset(df, prot_attr=0, target=5)
         >>> X_tr, X_te, y_tr, y_te = train_test_split(X, y)
     """
-    df = df.set_index(protected_attributes, drop=False, append=True)
-
-    y = df.pop(target)
-
     # Column-wise drops
-    df = df.drop(dropcols, axis=1)
+    df = df.drop(columns=dropcols)
     if usecols:
         if not is_list_like(usecols):
             # make sure we don't return a Series instead of a DataFrame
             usecols = [usecols]
         df = df[usecols]
+
     if numeric_only:
-        # binary categorical columns -> 1/0
         for col in df.select_dtypes('category'):
-            # TODO: allow any size ordered categorical?
-            if len(df[col].cat.categories) == 2 and df[col].cat.ordered:
+            if df[col].cat.ordered:
                 df[col] = df[col].factorize(sort=True)[0]
         df = df.select_dtypes(['number', 'bool'])
-        # upcast all feature dimensions to a consistent numerical dtype
-        df = df.apply(pd.to_numeric, axis=1)
+
     # Index-wise drops
     if dropna:
-        notna = df.notna().all(axis=1) & y.notna()
-        df = df.loc[notna]
-        y = y.loc[notna]
+        df.dropna()
+
+    df = df.set_index(prot_attr, drop=False, append=True)
+    y = df.pop(target)
 
     if sample_weight is not None:
         return namedtuple('WeightedDataset', ['X', 'y', 'sample_weight'])(
                           df, y, df.pop(sample_weight).rename('sample_weight'))
     return namedtuple('Dataset', ['X', 'y'])(df, y)
-
-def make_onehot_transformer():
-    """Shortcut for encoding categorical features as one-hot vectors.
-
-    Note:
-        This changes the column order.
-
-    Returns:
-        sklearn.compose.ColumnTransformer: Class capable of transforming
-        categorical features in X to one-hot features.
-    """
-    class PandasOutOneHotTransformer(ColumnTransformer):
-        def __init__(self):
-            ohe = ('onehotencoder', OneHotEncoder(),
-                   lambda X: X.dtypes == 'category')
-            super().__init__([ohe], remainder='passthrough')
-
-        def get_feature_names(self):
-            check_is_fitted(self, 'transformers_')
-            dummies = self.named_transformers_.onehotencoder.get_feature_names(
-                    input_features=self.ohe_input_features_)
-            passthroughs = self.passthrough_features_
-            return list(dummies) + list(passthroughs)
-
-        def fit(self, X, y=None):
-            self.ohe_input_features_ = X.columns[X.dtypes == 'category']
-            self.passthrough_features_ = X.columns[X.dtypes != 'category']
-            return super().fit(X, y=y)
-
-        def fit_transform(self, X, y=None):
-            Xt = super().fit_transform(X, y=y)
-            self.ohe_input_features_ = X.columns[X.dtypes == 'category']
-            self.passthrough_features_ = X.columns[X.dtypes != 'category']
-            columns = self.get_feature_names()
-            return pd.DataFrame(Xt, columns=columns, index=X.index)
-
-        def transform(self, X):
-            Xt = super().transform(X)
-            columns = self.get_feature_names()
-            return pd.DataFrame(Xt, columns=columns, index=X.index)
-
-    return PandasOutOneHotTransformer()

From dc317cf496b767a0e41dee4a8ad8a2c990993642 Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Tue, 29 Oct 2019 15:06:35 -0400
Subject: [PATCH 29/61] changed Reweighing to return X, sample_weight

removed Reweighing.sample_weight_ attribute
---
 aif360/sklearn/preprocessing/reweighing.py | 26 ++++++++++++----------
 aif360/sklearn/tests/test_reweighing.py    |  4 ++--
 2 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/aif360/sklearn/preprocessing/reweighing.py b/aif360/sklearn/preprocessing/reweighing.py
index 5a80c457..c73b96fe 100644
--- a/aif360/sklearn/preprocessing/reweighing.py
+++ b/aif360/sklearn/preprocessing/reweighing.py
@@ -16,8 +16,6 @@ class Reweighing(BaseEstimator):
             transformer.
         classes_ (array, shape (n_classes,)): A list of class labels known to
             the transformer.
-        sample_weight_ (array, shape (n_samples,)): New sample weights after
-            transformation. See examples for details.
         reweigh_factors_ (array, shape (n_groups, n_labels)): Reweighing factors
             for each combination of group and class labels used to debias
             samples. Existing sample weights are multiplied by the corresponding
@@ -61,12 +59,14 @@ def fit_transform(self, X, y, sample_weight=None):
             sample_weight (array-like, optional): Sample weights.
 
         Returns:
-            X: Unchanged samples. Only the sample weights are different after
-            transformation (see the `sample_weight_` attribute).
+            tuple:
+
+                **X** -- Unchanged samples.
+                **sample_weight** -- Transformed sample weights.
         """
         X, y, sample_weight = check_inputs(X, y, sample_weight)
 
-        self.sample_weight_ = np.empty_like(sample_weight)
+        sample_weight_t = np.empty_like(sample_weight)
         groups, self.prot_attr_ = check_groups(X, self.prot_attr)
         # TODO: maintain categorical ordering
         self.groups_ = np.unique(groups)
@@ -82,16 +82,13 @@ def N_(i): return sample_weight[i].sum()
                 g_and_c = (groups == g) & (y == c)
                 if np.any(g_and_c):
                     W_gc = N_(groups == g) * N_(y == c) / (N * N_(g_and_c))
-                    self.sample_weight_[g_and_c] = W_gc * sample_weight[g_and_c]
+                    sample_weight_t[g_and_c] = W_gc * sample_weight[g_and_c]
                     self.reweigh_factors_[i, j] = W_gc
-        return X
+        return X, sample_weight_t
 
 
 class ReweighingMeta(BaseEstimator, MetaEstimatorMixin):
     def __init__(self, estimator, reweigher=Reweighing()):
-        if not has_fit_parameter(estimator, 'sample_weight'):
-            raise TypeError("`estimator` (type: {}) does not have fit parameter"
-                            " `sample_weight`.".format(type(estimator)))
         self.reweigher = reweigher
         self.estimator = estimator
 
@@ -100,11 +97,16 @@ def _estimator_type(self):
         return self.estimator._estimator_type
 
     def fit(self, X, y, sample_weight=None):
+        if not has_fit_parameter(self.estimator, 'sample_weight'):
+            raise TypeError("`estimator` (type: {}) does not have fit parameter"
+                            " `sample_weight`.".format(type(self.estimator)))
+
         self.reweigher_ = clone(self.reweigher)
         self.estimator_ = clone(self.estimator)
 
-        self.reweigher_.fit_transform(X, y, sample_weight=sample_weight)
-        self.estimator_.fit(X, y, sample_weight=self.reweigher_.sample_weight_)
+        X, sample_weight = self.reweigher_.fit_transform(X, y,
+                sample_weight=sample_weight)
+        self.estimator_.fit(X, y, sample_weight=sample_weight)
         return self
 
     @if_delegate_has_method('estimator_')
diff --git a/aif360/sklearn/tests/test_reweighing.py b/aif360/sklearn/tests/test_reweighing.py
index f1e2a223..97631043 100644
--- a/aif360/sklearn/tests/test_reweighing.py
+++ b/aif360/sklearn/tests/test_reweighing.py
@@ -24,12 +24,12 @@ def test_reweighing_sex():
                               privileged_groups=[{'sex': 1}])
     adult_fair = orig_rew.fit_transform(adult)
     rew = Reweighing('sex')
-    rew.fit_transform(X, y, sample_weight=sample_weight)
+    _, new_sample_weight = rew.fit_transform(X, y, sample_weight=sample_weight)
 
     # assert np.allclose([[orig_rew.w_up_unfav, orig_rew.w_up_fav],
     #                     [orig_rew.w_p_unfav, orig_rew.w_p_fav]],
     #                    rew.reweigh_factors_)
-    assert np.allclose(adult_fair.instance_weights, rew.sample_weight_)
+    assert np.allclose(adult_fair.instance_weights, new_sample_weight)
 
 def test_reweighing_intersection():
     rew = Reweighing()

From 0f184c3b5e71bd59e9dd291eb5f4b410a60f9ce5 Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Tue, 29 Oct 2019 15:08:34 -0400
Subject: [PATCH 30/61] made sample_weight optional in check_inputs

---
 aif360/sklearn/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aif360/sklearn/utils.py b/aif360/sklearn/utils.py
index 1fb75d3f..28db1e61 100644
--- a/aif360/sklearn/utils.py
+++ b/aif360/sklearn/utils.py
@@ -4,7 +4,7 @@
 from sklearn.utils.validation import column_or_1d
 
 
-def check_inputs(X, y, sample_weight):
+def check_inputs(X, y, sample_weight=None):
     if not hasattr(X, 'index'):
         raise TypeError("Expected `DataFrame`, got {} instead.".format(
             type(X).__name__))

From ec4a1de138adb74c08fc7f773358ddd1e1fef722 Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Tue, 29 Oct 2019 15:11:46 -0400
Subject: [PATCH 31/61] matched tests to new numeric dataset format

---
 aif360/sklearn/tests/test_metrics.py | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/aif360/sklearn/tests/test_metrics.py b/aif360/sklearn/tests/test_metrics.py
index 0c040edd..c0a1c6e9 100644
--- a/aif360/sklearn/tests/test_metrics.py
+++ b/aif360/sklearn/tests/test_metrics.py
@@ -1,5 +1,4 @@
 import numpy as np
-import pandas as pd
 from sklearn.linear_model import LogisticRegression
 
 from aif360.datasets import AdultDataset
@@ -14,8 +13,6 @@
 
 
 X, y, sample_weight = fetch_adult(numeric_only=True)
-# y = y.cat.rename_categories(range(len(y.cat.categories)))
-y = pd.Series(y.factorize(sort=True)[0], name=y.name, index=y.index)
 y_pred = LogisticRegression(solver='liblinear').fit(X, y,
         sample_weight=sample_weight).predict(X)
 adult = AdultDataset(instance_weights_name='fnlwgt', categorical_features=[],
@@ -44,27 +41,27 @@ def test_selection_rate():
     assert select == cm.selection_rate()
 
 def test_disparate_impact():
-    di = disparate_impact_ratio(y, y_pred, prot_attr='sex', priv_group='Male',
+    di = disparate_impact_ratio(y, y_pred, prot_attr='sex',
                                 sample_weight=sample_weight)
     assert di == cm.disparate_impact()
 
 def test_statistical_parity():
     stat = statistical_parity_difference(y, y_pred, prot_attr='sex',
-            priv_group='Male', sample_weight=sample_weight)
+                                         sample_weight=sample_weight)
     assert stat == cm.statistical_parity_difference()
 
 def test_equal_opportunity():
     eopp = equal_opportunity_difference(y, y_pred, prot_attr='sex',
-            priv_group='Male', sample_weight=sample_weight)
+                                        sample_weight=sample_weight)
     assert eopp == cm.equal_opportunity_difference()
 
 def test_average_odds_difference():
-    aod = average_odds_difference(y, y_pred, prot_attr='sex', priv_group='Male',
+    aod = average_odds_difference(y, y_pred, prot_attr='sex',
                                   sample_weight=sample_weight)
     assert np.isclose(aod, cm.average_odds_difference())
 
 def test_average_odds_error():
-    aoe = average_odds_error(y, y_pred, prot_attr='sex', priv_group='Male',
+    aoe = average_odds_error(y, y_pred, prot_attr='sex',
                              sample_weight=sample_weight)
     assert np.isclose(aoe, cm.average_abs_odds_difference())
 
@@ -73,6 +70,5 @@ def test_generalized_entropy_index():
     assert np.isclose(gei, cm.generalized_entropy_index())
 
 def test_between_group_generalized_entropy_index():
-    bggei = between_group_generalized_entropy_error(y, y_pred, prot_attr='sex',
-                                                    priv_group='Male')
+    bggei = between_group_generalized_entropy_error(y, y_pred, prot_attr='sex')
     assert bggei == cm.between_group_generalized_entropy_index()

From f8c4fc5cda17ad79fb194a29219a39e6853047c4 Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Tue, 29 Oct 2019 17:20:07 -0400
Subject: [PATCH 32/61] added generalized_fnr/fpr metrics

also added warning when dividing by zero in ratio
---
 aif360/sklearn/metrics/__init__.py | 19 +-------
 aif360/sklearn/metrics/metrics.py  | 77 +++++++++++++++++++++---------
 2 files changed, 56 insertions(+), 40 deletions(-)

diff --git a/aif360/sklearn/metrics/__init__.py b/aif360/sklearn/metrics/__init__.py
index 84aa3f1e..ceaef288 100644
--- a/aif360/sklearn/metrics/__init__.py
+++ b/aif360/sklearn/metrics/__init__.py
@@ -1,18 +1 @@
-from aif360.sklearn.metrics.metrics import consistency_score
-from aif360.sklearn.metrics.metrics import specificity_score
-from aif360.sklearn.metrics.metrics import selection_rate
-from aif360.sklearn.metrics.metrics import disparate_impact_ratio
-from aif360.sklearn.metrics.metrics import statistical_parity_difference
-from aif360.sklearn.metrics.metrics import equal_opportunity_difference
-from aif360.sklearn.metrics.metrics import average_odds_difference
-from aif360.sklearn.metrics.metrics import average_odds_error
-from aif360.sklearn.metrics.metrics import generalized_entropy_error
-from aif360.sklearn.metrics.metrics import between_group_generalized_entropy_error
-
-__all__ = [
-    'consistency_score', 'specificity_score', 'selection_rate',
-    'disparate_impact_ratio', 'statistical_parity_difference',
-    'equal_opportunity_difference', 'average_odds_difference',
-    'average_odds_error', 'generalized_entropy_error',
-    'between_group_generalized_entropy_error'
-]
+from aif360.sklearn.metrics.metrics import *
diff --git a/aif360/sklearn/metrics/metrics.py b/aif360/sklearn/metrics/metrics.py
index eca1bf95..4adadda0 100644
--- a/aif360/sklearn/metrics/metrics.py
+++ b/aif360/sklearn/metrics/metrics.py
@@ -1,17 +1,20 @@
+import warnings
+
 import numpy as np
 from sklearn.metrics import make_scorer, recall_score
 from sklearn.neighbors import NearestNeighbors
 from sklearn.utils import check_X_y
+from sklearn.exceptions import UndefinedMetricWarning
 
 from aif360.sklearn.utils import check_groups
 
 
 __all__ = [
-    'consistency_score', 'specificity_score', 'selection_rate',
+    'base_rate', 'consistency_score', 'specificity_score', 'selection_rate',
     'disparate_impact_ratio', 'statistical_parity_difference',
     'equal_opportunity_difference', 'average_odds_difference',
-    'average_odds_error', 'generalized_entropy_error',
-    'between_group_generalized_entropy_error'
+    'average_odds_error', 'generalized_entropy_error', 'generalized_fnr',
+    'between_group_generalized_entropy_error', 'generalized_fpr'
 ]
 
 # ============================= META-METRICS ===================================
@@ -88,9 +91,18 @@ def ratio(func, y, *args, prot_attr=None, priv_group=1, sample_weight=None,
     unpriv = map(lambda a: a[~idx], (y,) + args)
     priv = map(lambda a: a[idx], (y,) + args)
     if sample_weight is not None:
-        return (func(*unpriv, sample_weight=sample_weight[~idx], **kwargs)
-              / func(*priv, sample_weight=sample_weight[idx], **kwargs))
-    return func(*unpriv, **kwargs) / func(*priv, **kwargs)
+        numerator = func(*unpriv, sample_weight=sample_weight[~idx], **kwargs)
+        denominator = func(*priv, sample_weight=sample_weight[idx], **kwargs)
+    else:
+        numerator = func(*unpriv, **kwargs)
+        denominator = func(*priv, **kwargs)
+
+    if denominator == 0:
+        warnings.warn("The ratio is ill-defined and being set to 0.0 because "
+                      "the {} for privileged samples is 0.".format(func.__name__),
+                      UndefinedMetricWarning)
+
+    return numerator / denominator
 
 
 # =========================== SCORER FACTORIES =================================
@@ -106,6 +118,7 @@ def score_fn(y, y_pred, **kwargs):
 
 
 # ================================ HELPERS =====================================
+# TODO: make this more general
 def specificity_score(y_true, y_pred, neg_label=0, sample_weight=None):
     """Compute the specificity or true negative rate.
 
@@ -118,12 +131,32 @@ def specificity_score(y_true, y_pred, neg_label=0, sample_weight=None):
     return recall_score(y_true, y_pred, pos_label=neg_label,
                         sample_weight=sample_weight)
 
-def base_rate(y, y_pred=None, pos_label=1, sample_weight=None):
-    return np.average(y == pos_label, weights=sample_weight)
+def base_rate(y_true, y_pred=None, pos_label=1, sample_weight=None):
+    return np.average(y_true == pos_label, weights=sample_weight)
 
 def selection_rate(y_true, y_pred, pos_label=1, sample_weight=None):
     return base_rate(y_pred, pos_label=pos_label, sample_weight=sample_weight)
 
+def generalized_fpr(y_true, y_pred, pos_label=1, sample_weight=None):
+    idx = (y_true != pos_label)
+    if not np.any(idx):
+        warnings.warn("generalized_fpr is ill-defined because there are no true"
+                      " negatives in y_true.", UndefinedMetricWarning)
+        return 0.
+    if sample_weight is None:
+        return y_pred[idx].mean()
+    return np.average(y_pred[idx], weights=sample_weight[idx])
+
+def generalized_fnr(y_true, y_pred, pos_label=1, sample_weight=None):
+    idx = (y_true == pos_label)
+    if not np.any(idx):
+        warnings.warn("generalized_fnr is ill-defined because there are no true"
+                      " positives in y_true.", UndefinedMetricWarning)
+        return 0.
+    if sample_weight is None:
+        return 1 - y_pred[idx].mean()
+    return 1 - np.average(y_pred[idx], weights=sample_weight[idx])
+
 
 # ============================ GROUP FAIRNESS ==================================
 def statistical_parity_difference(*y, prot_attr=None, priv_group=1, pos_label=1,
@@ -144,25 +177,25 @@ def equal_opportunity_difference(y_true, y_pred, prot_attr=None, priv_group=1,
                       priv_group=priv_group, pos_label=pos_label,
                       sample_weight=sample_weight)
 
-def average_odds_difference(y_true, y_pred, prot_attr=None, priv_group=1, pos_label=1,
-                            neg_label=0, sample_weight=None):
-    tnr_diff = difference(specificity_score, y_true, y_pred, prot_attr=prot_attr,
-                          priv_group=priv_group, neg_label=neg_label,
-                          sample_weight=sample_weight)
+def average_odds_difference(y_true, y_pred, prot_attr=None, priv_group=1,
+                            pos_label=1, neg_label=0, sample_weight=None):
+    fpr_diff = -difference(specificity_score, y_true, y_pred,
+                           prot_attr=prot_attr, priv_group=priv_group,
+                           neg_label=neg_label, sample_weight=sample_weight)
     tpr_diff = difference(recall_score, y_true, y_pred, prot_attr=prot_attr,
                           priv_group=priv_group, pos_label=pos_label,
                           sample_weight=sample_weight)
-    return (tpr_diff - tnr_diff) / 2
+    return (tpr_diff + fpr_diff) / 2
 
-def average_odds_error(y_true, y_pred, prot_attr=None, priv_group=1, pos_label=1,
-                       neg_label=0, sample_weight=None):
-    tnr_diff = difference(specificity_score, y_true, y_pred, prot_attr=prot_attr,
-                          priv_group=priv_group, neg_label=neg_label,
-                          sample_weight=sample_weight)
+def average_odds_error(y_true, y_pred, prot_attr=None, priv_group=1,
+                       pos_label=1, neg_label=0, sample_weight=None):
+    fpr_diff = -difference(specificity_score, y_true, y_pred,
+                           prot_attr=prot_attr, priv_group=priv_group,
+                           neg_label=neg_label, sample_weight=sample_weight)
     tpr_diff = difference(recall_score, y_true, y_pred, prot_attr=prot_attr,
                           priv_group=priv_group, pos_label=pos_label,
                           sample_weight=sample_weight)
-    return (abs(tnr_diff) + abs(tpr_diff)) / 2
+    return (abs(tpr_diff) + abs(fpr_diff)) / 2
 
 
 # ========================== INDIVIDUAL FAIRNESS ===============================
@@ -223,8 +256,8 @@ def sensitivity_score(y_true, y_pred, pos_label=1, sample_weight=None):
 #     return 1 - recall_score(y_true, y_pred, pos_label=pos_label,
 #                             sample_weight=sample_weight)
 
-# def false_positive_rate_error(y_true, y_pred, pos_label=1, sample_weight=None):
-#     return 1 - specificity_score(y_true, y_pred, pos_label=pos_label,
+# def false_positive_rate_error(y_true, y_pred, neg_label=0, sample_weight=None):
+#     return 1 - specificity_score(y_true, y_pred, neg_label=neg_label,
 #                                  sample_weight=sample_weight)
 
 def mean_difference(*y, prot_attr=None, priv_group=1, pos_label=1, sample_weight=None):

From 7ce2f42f0e7cd713e9d5100a01f1dc72a53143eb Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Tue, 29 Oct 2019 17:27:12 -0400
Subject: [PATCH 33/61] fixed dataset_processing

changed bank dataset target to return bool
---
 aif360/sklearn/datasets/openml_datasets.py |  3 +--
 aif360/sklearn/datasets/utils.py           | 28 ++++++++++++----------
 aif360/sklearn/tests/test_datasets.py      | 16 ++++++-------
 3 files changed, 25 insertions(+), 22 deletions(-)

diff --git a/aif360/sklearn/datasets/openml_datasets.py b/aif360/sklearn/datasets/openml_datasets.py
index 562cd734..45d8cd7f 100644
--- a/aif360/sklearn/datasets/openml_datasets.py
+++ b/aif360/sklearn/datasets/openml_datasets.py
@@ -209,8 +209,7 @@ def fetch_bank(data_home=None, percent10=False, usecols=[], dropcols='duration',
                   'housing', 'loan', 'contact', 'day', 'month', 'duration',
                   'campaign', 'pdays', 'previous', 'poutcome', 'deposit']
     # remap target
-    df.deposit = df.deposit.cat.rename_categories({'1': 'no', '2': 'yes'})
-    # df.deposit = df.deposit.cat.as_ordered()
+    df.deposit = df.deposit.map({'1': False, '2': True})
     # replace 'unknown' marker with NaN
     df.apply(lambda s: s.cat.remove_categories('unknown', inplace=True)
              if hasattr(s, 'cat') and 'unknown' in s.cat.categories else s)
diff --git a/aif360/sklearn/datasets/utils.py b/aif360/sklearn/datasets/utils.py
index 964f34d9..e714026b 100644
--- a/aif360/sklearn/datasets/utils.py
+++ b/aif360/sklearn/datasets/utils.py
@@ -35,8 +35,8 @@ def standarize_dataset(df, prot_attr, target, sample_weight=None, usecols=[],
             * **sample_weight** (`pandas.Series`, optional) -- Sample weights.
 
     Note:
-        The order of execution for the dropping parameters is: dropcols ->
-        usecols -> numeric_only -> dropna.
+        The order of execution for the dropping parameters is: numeric_only ->
+        dropcols -> usecols -> dropna.
 
     Examples:
         >>> import pandas as pd
@@ -53,6 +53,17 @@ def standarize_dataset(df, prot_attr, target, sample_weight=None, usecols=[],
         >>> X, y = standarize_dataset(df, prot_attr=0, target=5)
         >>> X_tr, X_te, y_tr, y_te = train_test_split(X, y)
     """
+    # TODO: warn user if label in prot_attr, target, or dropcols is already dropped
+    # TODO: error message if label in usecols is already dropped
+    if numeric_only:
+        for col in df.select_dtypes('category'):
+            if df[col].cat.ordered:
+                df[col] = df[col].factorize(sort=True)[0]
+        df = df.select_dtypes(['number', 'bool'])
+
+    df = df.set_index(prot_attr, drop=False, append=True)
+    y = df.pop(target)
+
     # Column-wise drops
     df = df.drop(columns=dropcols)
     if usecols:
@@ -61,18 +72,11 @@ def standarize_dataset(df, prot_attr, target, sample_weight=None, usecols=[],
             usecols = [usecols]
         df = df[usecols]
 
-    if numeric_only:
-        for col in df.select_dtypes('category'):
-            if df[col].cat.ordered:
-                df[col] = df[col].factorize(sort=True)[0]
-        df = df.select_dtypes(['number', 'bool'])
-
     # Index-wise drops
     if dropna:
-        df.dropna()
-
-    df = df.set_index(prot_attr, drop=False, append=True)
-    y = df.pop(target)
+        notna = df.notna().all(axis=1) & y.notna()
+        df = df.loc[notna]
+        y = y.loc[notna]
 
     if sample_weight is not None:
         return namedtuple('WeightedDataset', ['X', 'y', 'sample_weight'])(
diff --git a/aif360/sklearn/tests/test_datasets.py b/aif360/sklearn/tests/test_datasets.py
index 4253bcd8..05974f1e 100644
--- a/aif360/sklearn/tests/test_datasets.py
+++ b/aif360/sklearn/tests/test_datasets.py
@@ -5,12 +5,12 @@
 import pytest
 
 from aif360.sklearn.datasets import fetch_adult, fetch_bank, fetch_german
-from aif360.sklearn.datasets import standarize_dataset, make_onehot_transformer
+from aif360.sklearn.datasets import standarize_dataset
 
 
 df = pd.DataFrame([[1, 2, 3, 'a'], [5, 6, 7, 'b'], [np.NaN, 10, 11, 'c']],
                   columns=['X1', 'X2', 'y', 'Z'])
-basic = partial(standarize_dataset, df=df, protected_attributes='Z', target='y',
+basic = partial(standarize_dataset, df=df, prot_attr='Z', target='y',
                 dropna=False)
 
 def test_standardize_dataset_basic():
@@ -43,16 +43,16 @@ def test_usecols_dropcols_basic():
         basic(usecols=['X1', 'X2'], dropcols='X2')
 
 def test_dropna_basic():
-    basic_dropna = partial(standarize_dataset, df=df, protected_attributes='Z',
+    basic_dropna = partial(standarize_dataset, df=df, prot_attr='Z',
                            target='y', dropna=True)
     assert basic_dropna().X.shape == (2, 3)
     assert basic(dropcols='X1').X.shape == (3, 2)
 
 def test_numeric_only_basic():
-    assert basic(numeric_only=True).X.shape == (3, 2)
-    assert (basic(numeric_only=True).X.dtypes == 'float').all()
-    assert basic(dropcols='Z', numeric_only=True).X.shape == (3, 2)
-    assert (basic(dropcols='X1', numeric_only=True).X.dtypes == 'int').all()
+    assert basic(prot_attr='X2', numeric_only=True).X.shape == (3, 2)
+    with pytest.raises(KeyError):
+        assert (basic(prot_attr='X2', dropcols='Z', numeric_only=True).X.shape
+                == (3, 2))
 
 def test_fetch_adult():
     adult = fetch_adult()
@@ -76,4 +76,4 @@ def test_fetch_bank():
 
 def test_onehot_transformer():
     X, y = fetch_german()
-    assert len(make_onehot_transformer().fit_transform(X).columns) == 63
+    assert len(pd.get_dummies(X).columns) == 63

From 973a7741f04790334b3673412103b6678576bf09 Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Tue, 29 Oct 2019 17:32:24 -0400
Subject: [PATCH 34/61] initial calibrated equalized odds port

bug fix in old implementation (weighted cost was calculated incorrectly)
---
 .../calibrated_eq_odds_postprocessing.py      |   2 +-
 aif360/sklearn/postprocessing/__init__.py     | 123 ++++++++++++++++++
 .../calibrated_equalized_odds.py              | 114 ++++++++++++++++
 .../tests/test_calibrated_equalized_odds.py   |  47 +++++++
 4 files changed, 285 insertions(+), 1 deletion(-)
 create mode 100644 aif360/sklearn/postprocessing/__init__.py
 create mode 100644 aif360/sklearn/postprocessing/calibrated_equalized_odds.py
 create mode 100644 aif360/sklearn/tests/test_calibrated_equalized_odds.py

diff --git a/aif360/algorithms/postprocessing/calibrated_eq_odds_postprocessing.py b/aif360/algorithms/postprocessing/calibrated_eq_odds_postprocessing.py
index 471e2b66..4bae2ed9 100644
--- a/aif360/algorithms/postprocessing/calibrated_eq_odds_postprocessing.py
+++ b/aif360/algorithms/postprocessing/calibrated_eq_odds_postprocessing.py
@@ -208,4 +208,4 @@ def weighted_cost(fp_rate, fn_rate, cm, privileged):
             * (1 - cm.base_rate(privileged=privileged))) +
            (fn_rate / norm_const
             * cm.generalized_false_negative_rate(privileged=privileged)
-            * (1 - cm.base_rate(privileged=privileged))))
+            * cm.base_rate(privileged=privileged)))
diff --git a/aif360/sklearn/postprocessing/__init__.py b/aif360/sklearn/postprocessing/__init__.py
new file mode 100644
index 00000000..49e89d42
--- /dev/null
+++ b/aif360/sklearn/postprocessing/__init__.py
@@ -0,0 +1,123 @@
+from logging import warning
+
+import numpy as np
+import pandas as pd
+from sklearn.base import BaseEstimator, MetaEstimatorMixin, clone
+from sklearn.model_selection import train_test_split
+from sklearn.utils.metaestimators import if_delegate_has_method
+from sklearn.utils.validation import check_is_fitted
+
+from aif360.sklearn.postprocessing.calibrated_equalized_odds import CalibratedEqualizedOdds
+
+
+class PostProcessingMeta(BaseEstimator, MetaEstimatorMixin):
+    """
+    Attributes:
+        estimator_: Cloned ``estimator``.
+        postprocessor_: Cloned ``postprocessor``.
+        use_proba_ (bool): Determined depending on the postprocessor type if
+            `use_proba` is None.
+    """
+
+    def __init__(self, estimator, postprocessor=CalibratedEqualizedOdds(),
+                 use_proba=None, val_size=0.25, **options):
+        """
+        Args:
+            estimator (sklearn.BaseEstimator): Original estimator.
+            postprocessor: Post-processing algorithm.
+            use_proba (bool): Use ``self.estimator_.predict_proba()`` instead of
+                ``self.estimator_.predict()`` as input to postprocessor. If
+                ``None``, defaults to ``True`` if the postprocessor supports it.
+            val_size (int or float): Size of validation set used to fit the
+                postprocessor. The estimator fits on the remainder of the
+                training set.
+                See :func:`~sklearn.model_selection.train_test_split` for
+                details.
+            **options: Keyword options passed through to
+                :func:`~sklearn.model_selection.train_test_split`.
+                Note: 'train_size' and 'test_size' will be ignored in favor of
+                ``val_size``.
+        """
+        self.estimator = estimator
+        self.postprocessor = postprocessor
+        self.val_size = val_size
+        self.options = options
+
+    @property
+    def _estimator_type(self):
+        return self.postprocessor._estimator_type
+
+    def fit(self, X, y, pos_label=1, sample_weight=None):
+        self.pos_label_ = pos_label
+        self.use_proba_ = isinstance(self.postprocessor, CalibratedEqualizedOdds)
+        if self.use_proba_ and not hasattr(self.estimator, 'predict_proba'):
+            raise TypeError("`estimator` (type: {}) does not implement method "
+                            "`predict_proba()`.".format(type(self.estimator)))
+
+        if 'train_size' in self.options or 'test_size' in self.options:
+            warning("'train_size' and 'test_size' are ignored in favor of 'val_size'")
+        options_ = self.options.copy()
+        options_['test_size'] = self.val_size
+        if 'train_size' in options_:
+            del options_['train_size']
+
+        self.estimator_ = clone(self.estimator)
+        self.postprocessor_ = clone(self.postprocessor)
+
+        if sample_weight is not None:
+            X_est, X_post, y_est, y_post, sw_est, sw_post = train_test_split(
+                    X, y, sample_weight, **options_)
+            self.estimator_.fit(X_est, y_est, sample_weight=sw_est)
+        else:
+            X_est, X_post, y_est, y_post = train_test_split(X, y, **options_)
+            self.estimator_.fit(X_est, y_est)
+
+        pos_idx = np.nonzero(self.estimator_.classes_ == pos_label)[0][0]
+        y_pred = (self.estimator_.predict(X_post) if not self.use_proba_ else
+                  self.estimator_.predict_proba(X_post)[:, pos_idx])
+        self.postprocessor_.fit(y_post, y_pred, pos_label=pos_label,
+                sample_weight=None if sample_weight is None else sw_post)
+        return self
+
+    @property
+    def classes_(self):
+        # order of postprocessor.classes_ may differ from estimator_.classes_
+        check_is_fitted(self.postprocessor_, 'classes_')
+        return self.postprocessor_.classes_
+
+    @if_delegate_has_method('postprocessor_')
+    def predict(self, X):
+        pos_idx = np.nonzero(self.estimator_.classes_ == self.pos_label_)[0][0]
+        y_pred = (self.estimator_.predict(X) if not self.use_proba_ else
+                  self.estimator_.predict_proba(X)[:, pos_idx])
+        y_pred = pd.Series(y_pred, index=X.index)
+        return self.postprocessor_.predict(y_pred)
+
+    @if_delegate_has_method('postprocessor_')
+    def predict_proba(self, X):
+        pos_idx = np.nonzero(self.estimator_.classes_ == self.pos_label_)[0][0]
+        y_pred = (self.estimator_.predict(X) if not self.use_proba_ else
+                  self.estimator_.predict_proba(X)[:, pos_idx])
+        y_pred = pd.Series(y_pred, index=X.index)
+        return self.postprocessor_.predict_proba(y_pred)
+
+    @if_delegate_has_method('postprocessor_')
+    def predict_log_proba(self, X):
+        pos_idx = np.nonzero(self.estimator_.classes_ == self.pos_label_)[0][0]
+        y_pred = (self.estimator_.predict(X) if not self.use_proba_ else
+                  self.estimator_.predict_proba(X)[:, pos_idx])
+        y_pred = pd.Series(y_pred, index=X.index)
+        return self.postprocessor_.predict_log_proba(y_pred)
+
+    @if_delegate_has_method('postprocessor_')
+    def score(self, X, y, sample_weight=None):
+        pos_idx = np.nonzero(self.estimator_.classes_ == self.pos_label_)[0][0]
+        y_pred = (self.estimator_.predict(X) if not self.use_proba_ else
+                  self.estimator_.predict_proba(X)[:, pos_idx])
+        y_pred = pd.Series(y_pred, index=X.index)
+        return self.postprocessor_.score(y_pred, y, sample_weight=sample_weight)
+
+
+__all__ = [
+    'CalibratedEqualizedOdds', 'PostProcessingMeta'
+]
diff --git a/aif360/sklearn/postprocessing/calibrated_equalized_odds.py b/aif360/sklearn/postprocessing/calibrated_equalized_odds.py
new file mode 100644
index 00000000..322d331a
--- /dev/null
+++ b/aif360/sklearn/postprocessing/calibrated_equalized_odds.py
@@ -0,0 +1,114 @@
+import numpy as np
+from sklearn.base import BaseEstimator, ClassifierMixin
+from sklearn.preprocessing import LabelEncoder
+from sklearn.utils import check_random_state
+
+from aif360.sklearn.metrics import base_rate, generalized_fnr, generalized_fpr
+from aif360.sklearn.utils import check_groups
+
+
+class CalibratedEqualizedOdds(BaseEstimator, ClassifierMixin):
+    """Calibrated equalized odds postprocessing is a post-processing technique
+    that optimizes over calibrated classifier score outputs to find
+    probabilities with which to change output labels with an equalized odds
+    objective [#pleiss17]_.
+
+    References:
+        .. [#pleiss17] `G. Pleiss, M. Raghavan, F. Wu, J. Kleinberg, and
+           K. Q. Weinberger, "On Fairness and Calibration," Conference on Neural
+           Information Processing Systems, 2017.
+           <https://arxiv.org/pdf/1709.02012.pdf>`_
+
+    Adapted from:
+    https://github.com/gpleiss/equalized_odds_and_calibration/blob/master/calib_eq_odds.py
+    """
+    def __init__(self, prot_attr=None, cost_constraint='weighted',
+                 random_state=None):
+        """
+        Args:
+            prot_attr (single label or list-like, optional): Protected
+                attribute(s) to use as sensitive attribute(s) in the post-
+                processing. If more than one attribute, all combinations of
+                values (intersections) are considered. Default is ``None``
+                meaning all protected attributes from the dataset are used.
+                Note: This algorithm requires there be exactly 2 groups
+                (privileged and unprivileged).
+            cost_constraint ('fpr', 'fnr', or 'weighted'):
+            random_state (int or numpy.RandomState, optional):
+        """
+        self.prot_attr = prot_attr
+        self.cost_constraint = cost_constraint
+        self.random_state = random_state
+
+    def fit(self, y_true, y_pred, pos_label=1, sample_weight=None):
+        groups, self.prot_attr_ = check_groups(y_true, self.prot_attr)
+        self.classes_ = np.unique(y_true)
+        self.groups_ = np.unique(groups)
+
+        if pos_label not in self.classes_:
+            raise ValueError('pos_label={} is not present in y_true. The valid '
+                             'values are:\n{}'.format(pos_label, self.classes_))
+
+        if len(self.groups_) != 2:
+            raise ValueError('prot_attr={}\nyielded {} groups:\n{}\nbut this '
+                             'algorithm requires a binary division of the '
+                             'data.'.format(self.prot_attr_, len(self.groups_),
+                                            self.groups_))
+
+        # ensure self.classes_ = [neg_label, pos_label]
+        self.classes_ = np.append(np.delete(self.classes_, pos_label), pos_label)
+
+        def args(grp_idx, triv=False):
+            i = (groups == self.groups_[grp_idx])
+            pred = (np.full_like(y_pred, self.base_rates_[grp_idx]) if triv else
+                    y_pred)
+            return dict(y_true=y_true[i], y_pred=pred[i], pos_label=pos_label,
+                        sample_weight=sample_weight[i] if sample_weight is not None else None)
+
+        self.base_rates_ = [base_rate(**args(i)) for i in range(2)]
+
+        def weighted_cost(grp_idx, triv=False):
+            fpr = generalized_fpr(**args(grp_idx, triv=triv))
+            fnr = generalized_fnr(**args(grp_idx, triv=triv))
+            base_rate = self.base_rates_[grp_idx]
+            if self.cost_constraint == 'fpr':
+                return fpr
+            elif self.cost_constraint == 'fnr':
+                return fnr
+            elif self.cost_constraint == 'weighted':
+                return fpr * (1 - base_rate) + fnr * base_rate
+            else:
+                raise ValueError("`cost_constraint` must be one of: 'fpr', "
+                                 "'fnr', or 'weighted'")
+
+        costs = [weighted_cost(i) for i in range(2)]
+        self.mix_rates_ = [(costs[1] - costs[0])
+                           / (weighted_cost(0, triv=True) - costs[0]),
+                           (costs[0] - costs[1])
+                           / (weighted_cost(1, triv=True) - costs[1])]
+        self.mix_rates_[np.argmax(costs)] = 0
+
+        return self
+
+    def predict_proba(self, y_pred):
+        rng = check_random_state(self.random_state)
+
+        groups, _ = check_groups(y_pred, self.prot_attr_)
+        if not set(np.unique(groups)) <= set(self.groups_):
+            raise ValueError('The protected groups from y_pred:\n{}\ndo not '
+                             'match those from the training set:\n{}'.format(
+                                     np.unique(groups), self.groups_))
+
+        yt = np.empty_like(y_pred)
+        for grp_idx in range(2):
+            i = (groups == self.groups_[grp_idx])
+            to_replace = (rng.rand(sum(i)) < self.mix_rates_[grp_idx])
+            new_preds = y_pred[i].copy()
+            new_preds[to_replace] = self.base_rates_[grp_idx]
+            yt[i] = new_preds
+
+        return np.stack([1 - yt, yt], axis=-1)
+
+    def predict(self, y_pred):
+        scores = self.predict_proba(y_pred)
+        return self.classes_[scores.argmax(axis=1)]
diff --git a/aif360/sklearn/tests/test_calibrated_equalized_odds.py b/aif360/sklearn/tests/test_calibrated_equalized_odds.py
new file mode 100644
index 00000000..247ba4c8
--- /dev/null
+++ b/aif360/sklearn/tests/test_calibrated_equalized_odds.py
@@ -0,0 +1,47 @@
+import numpy as np
+from sklearn.linear_model import LogisticRegression
+
+from aif360.datasets import AdultDataset
+from aif360.sklearn.datasets import fetch_adult
+from aif360.algorithms.postprocessing import CalibratedEqOddsPostprocessing
+from aif360.sklearn.postprocessing import CalibratedEqualizedOdds, PostProcessingMeta
+
+
+X, y, sample_weight = fetch_adult(numeric_only=True)
+adult = AdultDataset(instance_weights_name='fnlwgt', categorical_features=[],
+        features_to_keep=['age', 'education-num', 'capital-gain', 'capital-loss',
+                          'hours-per-week'], features_to_drop=[])
+
+def test_calib_eq_odds_sex():
+    logreg = LogisticRegression(solver='lbfgs', max_iter=500)
+    y_pred = logreg.fit(X, y, sample_weight=sample_weight).predict_proba(X)[:, 1]
+    adult_pred = adult.copy()
+    adult_pred.scores = y_pred
+    orig_cal_eq_odds = CalibratedEqOddsPostprocessing(
+            unprivileged_groups=[{'sex': 0}], privileged_groups=[{'sex': 1}])
+    orig_cal_eq_odds.fit(adult, adult_pred)
+    cal_eq_odds = CalibratedEqualizedOdds('sex')
+    cal_eq_odds.fit(y, y_pred, sample_weight=sample_weight)
+
+    assert np.isclose(orig_cal_eq_odds.priv_mix_rate, cal_eq_odds.mix_rates_[1])
+    assert np.isclose(orig_cal_eq_odds.unpriv_mix_rate, cal_eq_odds.mix_rates_[0])
+
+def test_postprocessingmeta():
+    logreg = LogisticRegression(solver='lbfgs', max_iter=500)
+
+    adult_est, adult_post = adult.split([0.75], shuffle=False)
+    logreg.fit(adult_est.features, adult_est.labels.ravel())
+    y_pred = logreg.predict_proba(adult_post.features)[:, 1]
+    adult_pred = adult_post.copy()
+    adult_pred.scores = y_pred
+    orig_cal_eq_odds = CalibratedEqOddsPostprocessing(
+            unprivileged_groups=[{'sex': 0}], privileged_groups=[{'sex': 1}])
+    orig_cal_eq_odds.fit(adult_post, adult_pred)
+
+    cal_eq_odds = PostProcessingMeta(estimator=logreg,
+            postprocessor=CalibratedEqualizedOdds('sex'), shuffle=False)
+    cal_eq_odds.fit(X, y, sample_weight=sample_weight)
+
+    assert np.allclose([orig_cal_eq_odds.unpriv_mix_rate,
+                        orig_cal_eq_odds.priv_mix_rate],
+                       cal_eq_odds.postprocessor_.mix_rates_)

From 40cad96cc4c7340c5d3380483aedc78f1f0cb2f6 Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Wed, 30 Oct 2019 16:46:25 -0400
Subject: [PATCH 35/61] fixed adversarial debiasing reproducibility

---
 .../inprocessing/adversarial_debiasing.py     |  15 +-
 .../inprocessing/adversarial_debiasing.py     | 195 ++++++++++--------
 .../tests/test_adversarial_debiasing.py       |  77 +++++++
 requirements.txt                              |   2 +-
 4 files changed, 198 insertions(+), 91 deletions(-)
 create mode 100644 aif360/sklearn/tests/test_adversarial_debiasing.py

diff --git a/aif360/algorithms/inprocessing/adversarial_debiasing.py b/aif360/algorithms/inprocessing/adversarial_debiasing.py
index 02da1217..3297a96b 100644
--- a/aif360/algorithms/inprocessing/adversarial_debiasing.py
+++ b/aif360/algorithms/inprocessing/adversarial_debiasing.py
@@ -80,14 +80,14 @@ def _classifier_model(self, features, features_dim, keep_prob):
         """
         with tf.variable_scope("classifier_model"):
             W1 = tf.get_variable('W1', [features_dim, self.classifier_num_hidden_units],
-                                  initializer=tf.contrib.layers.xavier_initializer())
+                                  initializer=tf.contrib.layers.xavier_initializer(seed=self.seed1))
             b1 = tf.Variable(tf.zeros(shape=[self.classifier_num_hidden_units]), name='b1')
 
             h1 = tf.nn.relu(tf.matmul(features, W1) + b1)
-            h1 = tf.nn.dropout(h1, keep_prob=keep_prob)
+            h1 = tf.nn.dropout(h1, keep_prob=keep_prob, seed=self.seed2)
 
             W2 = tf.get_variable('W2', [self.classifier_num_hidden_units, 1],
-                                 initializer=tf.contrib.layers.xavier_initializer())
+                                 initializer=tf.contrib.layers.xavier_initializer(seed=self.seed3))
             b2 = tf.Variable(tf.zeros(shape=[1]), name='b2')
 
             pred_logit = tf.matmul(h1, W2) + b2
@@ -103,7 +103,7 @@ def _adversary_model(self, pred_logits, true_labels):
             s = tf.sigmoid((1 + tf.abs(c)) * pred_logits)
 
             W2 = tf.get_variable('W2', [3, 1],
-                                 initializer=tf.contrib.layers.xavier_initializer())
+                                 initializer=tf.contrib.layers.xavier_initializer(seed=self.seed4))
             b2 = tf.Variable(tf.zeros(shape=[1]), name='b2')
 
             pred_protected_attribute_logit = tf.matmul(tf.concat([s, s * true_labels, s * (1.0 - true_labels)], axis=1), W2) + b2
@@ -123,6 +123,8 @@ def fit(self, dataset):
         """
         if self.seed is not None:
             np.random.seed(self.seed)
+        ii32 = np.iinfo(np.int32)
+        self.seed1, self.seed2, self.seed3, self.seed4 = np.random.randint(ii32.min, ii32.max, size=4)
 
         # Map the dataset labels to 0 and 1.
         temp_labels = dataset.labels.copy()
@@ -177,14 +179,15 @@ def fit(self, dataset):
 
             if self.debias:
                 # Update adversary parameters
-                adversary_minimizer = adversary_opt.minimize(pred_protected_attributes_loss, var_list=adversary_vars, global_step=global_step)
+                with tf.control_dependencies([classifier_minimizer]):
+                    adversary_minimizer = adversary_opt.minimize(pred_protected_attributes_loss, var_list=adversary_vars)#, global_step=global_step)
 
             self.sess.run(tf.global_variables_initializer())
             self.sess.run(tf.local_variables_initializer())
 
             # Begin training
             for epoch in range(self.num_epochs):
-                shuffled_ids = np.random.choice(num_train_samples, num_train_samples)
+                shuffled_ids = np.random.choice(num_train_samples, num_train_samples, replace=False)
                 for i in range(num_train_samples//self.batch_size):
                     batch_ids = shuffled_ids[self.batch_size*i: self.batch_size*(i+1)]
                     batch_features = dataset.features[batch_ids]
diff --git a/aif360/sklearn/inprocessing/adversarial_debiasing.py b/aif360/sklearn/inprocessing/adversarial_debiasing.py
index 2d4bc7a0..1ba8a248 100644
--- a/aif360/sklearn/inprocessing/adversarial_debiasing.py
+++ b/aif360/sklearn/inprocessing/adversarial_debiasing.py
@@ -1,8 +1,9 @@
 import numpy as np
-from scipy.special import softmax
+import scipy.special
 from sklearn.base import BaseEstimator, ClassifierMixin
 from sklearn.preprocessing import LabelEncoder
-from sklearn.utils import check_is_fitted, check_random_state
+from sklearn.utils import check_random_state
+from sklearn.utils.validation import check_is_fitted
 import tensorflow as tf
 
 from aif360.sklearn.utils import check_inputs, check_groups
@@ -22,12 +23,13 @@ class AdversarialDebiasing(BaseEstimator, ClassifierMixin):
            Artificial Intelligence, Ethics, and Society, 2018.
     """
 
-    def __init__(self, sess, prot_attr=None, adversary_loss_weight=0.1,
-                 num_epochs=50, batch_size=128, classifier_num_hidden_units=200,
-                 debias=True, verbose=True, random_state=None):
+    def __init__(self, prot_attr=None, scope_name='classifier',
+                 adversary_loss_weight=0.1, num_epochs=50, batch_size=128,
+                 classifier_num_hidden_units=200, debias=True, verbose=False,
+                 random_state=None):
 
-        self.sess = sess
         self.prot_attr = prot_attr
+        self.scope_name = scope_name
         self.adversary_loss_weight = adversary_loss_weight
         self.num_epochs = num_epochs
         self.batch_size = batch_size
@@ -36,87 +38,103 @@ def __init__(self, sess, prot_attr=None, adversary_loss_weight=0.1,
         self.verbose = verbose
         self.random_state = random_state
 
-    @property
-    def classifier_logits_(self):
-        check_is_fitted(self, ['input_ph', 'keep_prob', 'classes_'])
-        n_features = self.input_ph.shape[1]
-        n_classes = len(self.classes_)
-        with tf.variable_scope('classifier_model'):
-            W1 = tf.get_variable(
-                    'W1', [n_features, self.classifier_num_hidden_units],
-                    initializer=tf.contrib.layers.xavier_initializer())
-            b1 = tf.Variable(tf.zeros(shape=[self.classifier_num_hidden_units]),
-                    name='b1')
-
-            h1 = tf.nn.relu(tf.matmul(self.input_ph, W1) + b1)
-            h1 = tf.nn.dropout(h1, keep_prob=self.keep_prob)
-
-            W2 = tf.get_variable(
-                    'W2', [self.classifier_num_hidden_units, n_classes],
-                    initializer=tf.contrib.layers.xavier_initializer())
-            b2 = tf.Variable(tf.zeros(shape=[n_classes]), name='b2')
-
-            pred_logits = tf.matmul(h1, W2) + b2
-
-        return pred_logits
-
-    @property
-    def adversary_logits_(self):
-        """Compute the adversary predictions for the protected attribute."""
-        check_is_fitted(self, ['classifier_logits_', 'true_labels_ph', 'groups_'])
-        n_groups = len(self.groups_)
-        with tf.variable_scope("adversary_model"):
-            c = tf.get_variable('c', initializer=tf.constant(1.0))
-            s = tf.sigmoid((1 + tf.abs(c)) * self.classifier_logits_)
-
-            W2 = tf.get_variable('W2', [3, n_groups],
-                    initializer=tf.contrib.layers.xavier_initializer())
-            b2 = tf.Variable(tf.zeros(shape=[n_groups]), name='b2')
-
-            pred_prot_attr_logits = tf.matmul(
-                    tf.concat([s, s * self.true_labels_ph,
-                               s * (1.0 - self.true_labels_ph)], axis=1),
-                    W2) + b2
-
-        return pred_prot_attr_logits
-
     def fit(self, X, y):
+        X, y, _ = check_inputs(X, y)
         rng = check_random_state(self.random_state)
-        # tf.random.seed(random_state)
+        ii32 = np.iinfo(np.int32)
+        seed1, seed2, seed3, seed4 = rng.randint(ii32.min, ii32.max, size=4)
+
+        tf.reset_default_graph()
+        self.sess_ = tf.Session()
 
         groups, self.prot_attr_ = check_groups(X, self.prot_attr)
         le = LabelEncoder()
         y = le.fit_transform(y)
         self.classes_ = le.classes_
+        groups = groups.map(str)  # BUG: LabelEncoder converts to ndarray which removes tuple formatting
         groups = le.fit_transform(groups)
         self.groups_ = le.classes_
 
+        n_classes = len(self.classes_)
+        n_groups = len(self.groups_)
+        # use sigmoid for binary case
+        if n_classes == 2:
+            n_classes = 1
+        if n_groups == 2:
+            n_groups = 1
+
         n_samples, n_features = X.shape
 
-        with tf.variable_scope('adversarial_debiasing'):
+        with tf.variable_scope(self.scope_name):
             # Setup placeholders
             self.input_ph = tf.placeholder(tf.float32, shape=[None, n_features])
             self.prot_attr_ph = tf.placeholder(tf.float32, shape=[None, 1])
             self.true_labels_ph = tf.placeholder(tf.float32, shape=[None, 1])
             self.keep_prob = tf.placeholder(tf.float32)
 
-            global_step = tf.train.get_or_create_global_step()
-            starter_learning_rate = 0.001
-            learning_rate = tf.train.exponential_decay(starter_learning_rate,
-                    global_step, 1000, 0.96, staircase=True)
+            # Create classifier
+            with tf.variable_scope('classifier_model'):
+                W1 = tf.get_variable(
+                        'W1', [n_features, self.classifier_num_hidden_units],
+                        initializer=tf.initializers.glorot_uniform(seed=seed1))
+                b1 = tf.Variable(tf.zeros(shape=[self.classifier_num_hidden_units]),
+                        name='b1')
+
+                h1 = tf.nn.relu(tf.matmul(self.input_ph, W1) + b1)
+                h1 = tf.nn.dropout(h1, rate=1-self.keep_prob, seed=seed2)
+
+                W2 = tf.get_variable(
+                        'W2', [self.classifier_num_hidden_units, n_classes],
+                        initializer=tf.initializers.glorot_uniform(seed=seed3))
+                b2 = tf.Variable(tf.zeros(shape=[n_classes]), name='b2')
+
+                self.classifier_logits_ = tf.matmul(h1, W2) + b2
 
             # Obtain classifier loss
-            clf_loss = tf.reduce_mean(
-                    tf.nn.sparse_softmax_cross_entropy_with_logits(
-                            labels=self.true_labels_ph,
-                            logits=self.classifier_logits_))
+            if self.classifier_logits_.shape[1] == 1:
+                clf_loss = tf.reduce_mean(
+                        tf.nn.sigmoid_cross_entropy_with_logits(
+                                labels=self.true_labels_ph,
+                                logits=self.classifier_logits_))
+            else:
+                clf_loss = tf.reduce_mean(
+                        tf.nn.sparse_softmax_cross_entropy_with_logits(
+                                labels=tf.squeeze(tf.cast(self.true_labels_ph,
+                                                          tf.int32)),
+                                logits=self.classifier_logits_))
 
             if self.debias:
+                # Create adversary
+                with tf.variable_scope("adversary_model"):
+                    c = tf.get_variable('c', initializer=tf.constant(1.0))
+                    s = tf.sigmoid((1 + tf.abs(c)) * self.classifier_logits_)
+
+                    W2 = tf.get_variable('W2', [3, n_groups],
+                            initializer=tf.initializers.glorot_uniform(seed=seed4))
+                    b2 = tf.Variable(tf.zeros(shape=[n_groups]), name='b2')
+
+                    self.adversary_logits_ = tf.matmul(
+                            tf.concat([s, s * self.true_labels_ph,
+                                       s * (1.0 - self.true_labels_ph)], axis=1),
+                            W2) + b2
+
                 # Obtain adversary loss
-                adv_loss = tf.reduce_mean(
-                        tf.nn.sparse_softmax_cross_entropy_with_logits(
-                                labels=self.prot_attr_ph,
-                                logits=self.adversary_logits_))
+                if self.adversary_logits_.shape[1] == 1:
+                    adv_loss = tf.reduce_mean(
+                            tf.nn.sigmoid_cross_entropy_with_logits(
+                                    labels=self.prot_attr_ph,
+                                    logits=self.adversary_logits_))
+                else:
+                    adv_loss = tf.reduce_mean(
+                            tf.nn.sparse_softmax_cross_entropy_with_logits(
+                                    labels=tf.squeeze(tf.cast(self.prot_attr_ph,
+                                                              tf.int32)),
+                                    logits=self.adversary_logits_))
+
+            global_step = tf.train.get_or_create_global_step()
+            starter_learning_rate = 0.001
+            learning_rate = tf.train.exponential_decay(starter_learning_rate,
+                    global_step, 1000, 0.96, staircase=True)
 
             # Setup optimizers
             clf_opt = tf.train.AdamOptimizer(learning_rate)
@@ -131,8 +149,6 @@ def fit(self, X, y):
                 # Compute grad wrt classifier parameters
                 adv_grads = {var: grad for (grad, var) in
                         adv_opt.compute_gradients(adv_loss, var_list=clf_vars)}
-                # Update adversary parameters (don't increment global step yet)
-                adv_min = adv_opt.minimize(adv_loss, var_list=adv_vars)
 
             normalize = lambda x: x / (tf.norm(x) + np.finfo(np.float32).tiny)
 
@@ -144,27 +160,30 @@ def fit(self, X, y):
                     grad -= tf.reduce_sum(grad * unit_adv_grad) * unit_adv_grad
                     grad -= self.adversary_loss_weight * adv_grads[var]
                 clf_grads.append((grad, var))
+
             clf_min = clf_opt.apply_gradients(clf_grads, global_step=global_step)
+            if self.debias:
+                with tf.control_dependencies([clf_min]):
+                    adv_min = adv_opt.minimize(adv_loss, var_list=adv_vars)
 
-            self.sess.run(tf.global_variables_initializer())
+            self.sess_.run(tf.global_variables_initializer())
 
             # Begin training
             for epoch in range(self.num_epochs):
-                # TODO: why rng.choice(n_samples, n_samples)?
-                shuffled_ids = rng.shuffle(np.arange(n_samples))
+                shuffled_ids = rng.permutation(n_samples)
                 for i in range(n_samples // self.batch_size):
                     batch_ids = shuffled_ids[self.batch_size * i:
                                              self.batch_size * (i+1)]
-                    batch_features = X[batch_ids]
-                    batch_labels = y[batch_ids]
-                    batch_prot_attr = groups[batch_ids]
+                    batch_features = X.iloc[batch_ids]
+                    batch_labels = y[batch_ids][:, np.newaxis]
+                    batch_prot_attr = groups[batch_ids][:, np.newaxis]
                     batch_feed_dict = {self.input_ph: batch_features,
                                        self.true_labels_ph: batch_labels,
                                        self.prot_attr_ph: batch_prot_attr,
                                        self.keep_prob: 0.8}
                     if self.debias:
                         _, _, clf_loss_value, adv_loss_value = (
-                                self.sess.run([clf_min, adv_min,
+                                self.sess_.run([clf_min, adv_min,
                                                clf_loss, adv_loss],
                                                feed_dict=batch_feed_dict))
                         if i % 200 == 0 and self.verbose:
@@ -173,7 +192,7 @@ def fit(self, X, y):
                                           epoch, i, clf_loss_value,
                                           adv_loss_value))
                     else:
-                        _, clf_loss_value = self.sess.run(
+                        _, clf_loss_value = self.sess_.run(
                                 [clf_min, clf_loss],
                                 feed_dict=batch_feed_dict)
                         if i % 200 == 0 and self.verbose:
@@ -186,12 +205,12 @@ def decision_function(self, X):
         check_is_fitted(self, ['classes_', 'input_ph', 'keep_prob',
                                'classifier_logits_'])
         n_samples = X.shape[0]
-        groups, _ = check_groups(X, self.prot_attr_)
-        le = LabelEncoder().fit(self.groups_)
-        groups = le.transform(groups)
+        n_classes = len(self.classes_)
+        if n_classes == 2:
+            n_classes = 1
 
         samples_covered = 0
-        scores = np.empty((n_samples, len(self.classes_)))
+        scores = np.empty((n_samples, n_classes))
         while samples_covered < n_samples:
             start = samples_covered
             end = samples_covered + self.batch_size
@@ -199,22 +218,30 @@ def decision_function(self, X):
                 end = n_samples
 
             batch_ids = np.arange(start, end)
-            batch_features = X[batch_ids]
-            batch_prot_attr = groups[batch_ids]
+            batch_features = X.iloc[batch_ids]
 
             batch_feed_dict = {self.input_ph: batch_features,
                                self.keep_prob: 1.0}
 
-            scores[batch_ids] = self.sess.run(self.classifier_logits_,
+            scores[batch_ids] = self.sess_.run(self.classifier_logits_,
                                               feed_dict=batch_feed_dict)
             samples_covered += len(batch_features)
 
-        return scores
+        return scores.ravel() if scores.shape[1] == 1 else scores
 
     def predict_proba(self, X):
         decision = self.decision_function(X)
-        return softmax(decision, axis=1)
+
+        if decision.ndim == 1:
+            decision_2d = np.c_[np.zeros_like(decision), decision]
+        else:
+            decision_2d = decision
+        return scipy.special.softmax(decision_2d, axis=1)
 
     def predict(self, X):
-        indices = self.decision_function(X).argmax(axis=1)
+        scores = self.decision_function(X)
+        if scores.ndim == 1:
+            indices = (scores > 0).astype(np.int)
+        else:
+            indices = scores.argmax(axis=1)
         return self.classes_[indices]
diff --git a/aif360/sklearn/tests/test_adversarial_debiasing.py b/aif360/sklearn/tests/test_adversarial_debiasing.py
new file mode 100644
index 00000000..c28fb17c
--- /dev/null
+++ b/aif360/sklearn/tests/test_adversarial_debiasing.py
@@ -0,0 +1,77 @@
+import numpy as np
+from sklearn.model_selection import GridSearchCV
+from sklearn.metrics import accuracy_score
+import tensorflow as tf
+
+from aif360.datasets import AdultDataset
+from aif360.sklearn.datasets import fetch_adult
+from aif360.algorithms.inprocessing import AdversarialDebiasing as OldAdversarialDebiasing
+from aif360.sklearn.inprocessing import AdversarialDebiasing
+
+
+X, y, sample_weight = fetch_adult(numeric_only=True)
+adult = AdultDataset(instance_weights_name='fnlwgt', categorical_features=[],
+        features_to_keep=['age', 'education-num', 'capital-gain', 'capital-loss',
+                          'hours-per-week'], features_to_drop=[])
+
+def test_adv_debias_old_reproduce():
+    sess = tf.Session()
+    old_adv_deb = OldAdversarialDebiasing(unprivileged_groups=[{'sex': 0}],
+                                          privileged_groups=[{'sex': 1}],
+                                          scope_name='old_classifier',
+                                          sess=sess, num_epochs=5, seed=123)
+    old_preds = old_adv_deb.fit_predict(adult)
+    sess.close()
+    tf.reset_default_graph()
+    sess = tf.Session()
+    old_adv_deb2 = OldAdversarialDebiasing(unprivileged_groups=[{'sex': 0}],
+                                          privileged_groups=[{'sex': 1}],
+                                          scope_name='old_classifier',
+                                          sess=sess, num_epochs=5, seed=123)
+    old_preds2 = old_adv_deb2.fit_predict(adult)
+    sess.close()
+
+    assert np.allclose(old_preds.labels, old_preds2.labels)
+
+def test_adv_debias_old():
+    tf.reset_default_graph()
+    sess = tf.Session()
+    old_adv_deb = OldAdversarialDebiasing(unprivileged_groups=[{'sex': 0}],
+                                          privileged_groups=[{'sex': 1}],
+                                          scope_name='old_classifier',
+                                          sess=sess, num_epochs=5, seed=123)
+    old_preds = old_adv_deb.fit_predict(adult)
+    sess.close()
+    adv_deb = AdversarialDebiasing('sex', num_epochs=5, random_state=123)
+    new_preds = adv_deb.fit(X, y).predict(X)
+    adv_deb.sess_.close()
+    assert np.allclose(old_preds.labels.flatten(), new_preds)
+
+def test_adv_debias_reproduce():
+    adv_deb = AdversarialDebiasing('sex', num_epochs=5, random_state=123)
+    new_preds = adv_deb.fit(X, y).predict(X)
+    adv_deb.sess_.close()
+    new_acc = accuracy_score(y, new_preds)
+
+    adv_deb2 = AdversarialDebiasing('sex', num_epochs=5, random_state=123)
+    new_preds = adv_deb2.fit(X, y).predict(X)
+    adv_deb.sess_.close()
+
+    assert new_acc == accuracy_score(y, new_preds)
+
+def test_adv_debias_intersection():
+    adv_deb = AdversarialDebiasing(scope_name='intersect', num_epochs=5)
+    adv_deb.fit(X, y)
+    adv_deb.sess_.close()
+    assert adv_deb.adversary_logits_.shape[1] == 4
+
+def test_adv_debias_grid():
+    adv_deb = AdversarialDebiasing('sex', num_epochs=10, random_state=123)
+
+    params = {'debias': [True, False]}
+
+    clf = GridSearchCV(adv_deb, params, cv=3)
+    clf.fit(X, y)
+
+    clf.best_estimator_.sess_.close()
+    assert clf.best_params_ == {'debias': False}
diff --git a/requirements.txt b/requirements.txt
index 767db283..bf52ab8c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,7 +9,7 @@ numpy>=1.16
 matplotlib
 pandas>=0.24
 pytest>=3.5.0
-scipy
+scipy>=1.2.0
 scikit-learn
 cvxpy>=1.0
 scs==2.1.0

From dc410a2e86b53ed85c046ad1d38bbc1002b5286f Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Wed, 30 Oct 2019 18:06:47 -0400
Subject: [PATCH 36/61] updated Getting Started notebook

---
 aif360/sklearn/examples/Getting Started.ipynb | 452 +++++++++++++-----
 1 file changed, 337 insertions(+), 115 deletions(-)

diff --git a/aif360/sklearn/examples/Getting Started.ipynb b/aif360/sklearn/examples/Getting Started.ipynb
index b65f8f78..026bf790 100644
--- a/aif360/sklearn/examples/Getting Started.ipynb	
+++ b/aif360/sklearn/examples/Getting Started.ipynb	
@@ -13,15 +13,20 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "%matplotlib inline\n",
+    "import matplotlib.pyplot as plt\n",
     "import numpy as np\n",
     "import pandas as pd\n",
+    "import tensorflow as tf\n",
     "from sklearn.linear_model import LogisticRegression\n",
-    "from sklearn.metrics import accuracy_score, recall_score, make_scorer\n",
+    "from sklearn.metrics import accuracy_score\n",
     "from sklearn.model_selection import GridSearchCV, train_test_split\n",
     "\n",
     "from aif360.sklearn.preprocessing import ReweighingMeta\n",
+    "from aif360.sklearn.inprocessing import AdversarialDebiasing\n",
+    "from aif360.sklearn.postprocessing import CalibratedEqualizedOdds, PostProcessingMeta\n",
     "from aif360.sklearn.datasets import fetch_adult\n",
-    "from aif360.sklearn.metrics import disparate_impact_ratio"
+    "from aif360.sklearn.metrics import disparate_impact_ratio, average_odds_error, generalized_fpr, generalized_fnr"
    ]
   },
   {
@@ -249,7 +254,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
@@ -298,88 +303,88 @@
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
-       "      <th>7916</th>\n",
-       "      <th>Non-white</th>\n",
-       "      <th>Female</th>\n",
-       "      <td>18.0</td>\n",
+       "      <th>0</th>\n",
+       "      <th>0</th>\n",
+       "      <th>1</th>\n",
+       "      <td>25.0</td>\n",
        "      <td>7.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
        "      <td>0.0</td>\n",
        "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>20.0</td>\n",
+       "      <td>40.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>26447</th>\n",
-       "      <th>White</th>\n",
-       "      <th>Male</th>\n",
-       "      <td>55.0</td>\n",
+       "      <th>1</th>\n",
+       "      <th>1</th>\n",
+       "      <th>1</th>\n",
+       "      <td>38.0</td>\n",
        "      <td>9.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>1.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
        "      <td>0.0</td>\n",
        "      <td>0.0</td>\n",
-       "      <td>40.0</td>\n",
+       "      <td>50.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>20889</th>\n",
-       "      <th>White</th>\n",
-       "      <th>Female</th>\n",
-       "      <td>43.0</td>\n",
-       "      <td>9.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.0</td>\n",
+       "      <th>2</th>\n",
+       "      <th>1</th>\n",
+       "      <th>1</th>\n",
+       "      <td>28.0</td>\n",
+       "      <td>12.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
        "      <td>0.0</td>\n",
        "      <td>0.0</td>\n",
        "      <td>40.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>30145</th>\n",
-       "      <th>White</th>\n",
-       "      <th>Male</th>\n",
+       "      <th>3</th>\n",
+       "      <th>0</th>\n",
+       "      <th>1</th>\n",
        "      <td>44.0</td>\n",
-       "      <td>11.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>4386.0</td>\n",
+       "      <td>10.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>7688.0</td>\n",
        "      <td>0.0</td>\n",
        "      <td>40.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>7473</th>\n",
-       "      <th>White</th>\n",
-       "      <th>Male</th>\n",
-       "      <td>41.0</td>\n",
-       "      <td>9.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>1.0</td>\n",
+       "      <th>4</th>\n",
+       "      <th>1</th>\n",
+       "      <th>0</th>\n",
+       "      <td>18.0</td>\n",
+       "      <td>10.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
        "      <td>0.0</td>\n",
        "      <td>0.0</td>\n",
-       "      <td>55.0</td>\n",
+       "      <td>30.0</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "                         age  education-num  race  sex  capital-gain  \\\n",
-       "      race      sex                                                    \n",
-       "7916  Non-white Female  18.0            7.0   0.0  0.0           0.0   \n",
-       "26447 White     Male    55.0            9.0   1.0  1.0           0.0   \n",
-       "20889 White     Female  43.0            9.0   1.0  0.0           0.0   \n",
-       "30145 White     Male    44.0           11.0   1.0  1.0        4386.0   \n",
-       "7473  White     Male    41.0            9.0   1.0  1.0           0.0   \n",
+       "             age  education-num  race  sex  capital-gain  capital-loss  \\\n",
+       "  race sex                                                               \n",
+       "0 0    1    25.0            7.0     0    1           0.0           0.0   \n",
+       "1 1    1    38.0            9.0     1    1           0.0           0.0   \n",
+       "2 1    1    28.0           12.0     1    1           0.0           0.0   \n",
+       "3 0    1    44.0           10.0     0    1        7688.0           0.0   \n",
+       "4 1    0    18.0           10.0     1    0           0.0           0.0   \n",
        "\n",
-       "                        capital-loss  hours-per-week  \n",
-       "      race      sex                                   \n",
-       "7916  Non-white Female           0.0            20.0  \n",
-       "26447 White     Male             0.0            40.0  \n",
-       "20889 White     Female           0.0            40.0  \n",
-       "30145 White     Male             0.0            40.0  \n",
-       "7473  White     Male             0.0            55.0  "
+       "            hours-per-week  \n",
+       "  race sex                  \n",
+       "0 0    1              40.0  \n",
+       "1 1    1              50.0  \n",
+       "2 1    1              40.0  \n",
+       "3 0    1              40.0  \n",
+       "4 1    0              30.0  "
       ]
      },
-     "execution_count": 9,
+     "execution_count": 3,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -387,11 +392,43 @@
    "source": [
     "X, y, sample_weight = fetch_adult(numeric_only=True)\n",
     "(X_train, X_test,\n",
-    " y_train, y_test,\n",
-    " sw_train, sw_test) = train_test_split(X, y, sample_weight, train_size=0.7, random_state=123)\n",
+    " y_train, y_test) = train_test_split(X, y, train_size=0.7, shuffle=False)\n",
     "X_train.head()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "the protected attribute information is replicated in the labels:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "   race  sex\n",
+       "0  0     1      0\n",
+       "1  1     1      0\n",
+       "2  1     1      1\n",
+       "3  0     1      1\n",
+       "4  1     0      0\n",
+       "Name: annual-income, dtype: int64"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "y_train.head()"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -408,11 +445,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.823858595509452"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "y_pred = LogisticRegression(solver='liblinear').fit(X_train, y_train).predict(X_test)"
+    "y_pred = LogisticRegression(solver='liblinear').fit(X_train, y_train).predict(X_test)\n",
+    "accuracy_score(y_test, y_pred)"
    ]
   },
   {
@@ -424,23 +473,54 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "0.19176335549523604"
+       "0.19826239080897468"
       ]
      },
-     "execution_count": 5,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "sex = y_test.index.get_level_values('sex')\n",
-    "disparate_impact_ratio(y_test, y_pred, prot_attr='sex', priv_group='Male', pos_label='>50K')"
+    "disparate_impact_ratio(y_test, y_pred, prot_attr='sex')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "And similarly, we can assess how close the predictions are to equality of odds.\n",
+    "\n",
+    "`average_odds_error()` computes the (unweighted) average of the absolute values of the true positive rate (TPR) difference and false positive rate (FPR) difference, i.e.:\n",
+    "\n",
+    "$\\tfrac{1}{2}\\left(|FPR_{D = \\text{unprivileged}} - FPR_{D = \\text{privileged}}|\n",
+    "           + |TPR_{D = \\text{unprivileged}} - TPR_{D = \\text{privileged}}|\\right)$"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.12427040384779571"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "average_odds_error(y_test, y_pred, prot_attr='sex')"
    ]
   },
   {
@@ -459,7 +539,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 8,
    "metadata": {
     "scrolled": false
    },
@@ -468,70 +548,212 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Index([(7916, 'Non-white', 'Female'),      (26447, 'White', 'Male'),\n",
-      "          (20889, 'White', 'Female'),      (30145, 'White', 'Male'),\n",
-      "             (7473, 'White', 'Male'),      (29361, 'White', 'Male'),\n",
-      "            (12277, 'White', 'Male'),      (44372, 'White', 'Male'),\n",
-      "          (32291, 'White', 'Female'),    (44411, 'White', 'Female'),\n",
-      "       ...\n",
-      "            (38298, 'White', 'Male'),       (4173, 'White', 'Male'),\n",
-      "             (7854, 'White', 'Male'),    (16424, 'White', 'Female'),\n",
-      "             (2087, 'White', 'Male'),      (16120, 'White', 'Male'),\n",
-      "            (24476, 'White', 'Male'),     (8295, 'White', 'Female'),\n",
-      "             (1449, 'White', 'Male'),      (33323, 'White', 'Male')],\n",
-      "      dtype='object', length=6838)\n"
+      "0.8147819559134648\n",
+      "{'estimator__C': 10, 'reweigher__prot_attr': 'sex'}\n"
      ]
-    },
+    }
+   ],
+   "source": [
+    "rew = ReweighingMeta(estimator=LogisticRegression(solver='liblinear'))\n",
+    "\n",
+    "params = {'estimator__C': [1, 10], 'reweigher__prot_attr': ['sex']}\n",
+    "\n",
+    "clf = GridSearchCV(rew, params, scoring='accuracy', cv=5)\n",
+    "clf.fit(X_train, y_train)\n",
+    "print(clf.score(X_test, y_test))\n",
+    "print(clf.best_params_)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.639237550613212"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "disparate_impact_ratio(y_test, clf.predict(X_test), prot_attr='sex')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Rather than trying to weight accuracy and fairness, we can try a fair in-processing algorithm:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
     {
-     "ename": "NameError",
-     "evalue": "name 'accuracy_score' is not defined",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
-      "\u001b[0;32m<ipython-input-10-b8e2e0cd7a17>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     11\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     12\u001b[0m \u001b[0mclf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mGridSearchCV\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrew\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mscoring\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mscoring\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcv\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 13\u001b[0;31m \u001b[0mclf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0;34m'sample_weight'\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0msw_train\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     14\u001b[0m \u001b[0mclf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mscore\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_test\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_test\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/anaconda/envs/aif360/lib/python3.5/site-packages/sklearn/model_selection/_search.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, groups, **fit_params)\u001b[0m\n\u001b[1;32m    685\u001b[0m                 \u001b[0;32mreturn\u001b[0m \u001b[0mresults\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    686\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 687\u001b[0;31m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_run_search\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mevaluate_candidates\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    688\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    689\u001b[0m         \u001b[0;31m# For multi-metric evaluation, store the best_index_, best_params_ and\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/anaconda/envs/aif360/lib/python3.5/site-packages/sklearn/model_selection/_search.py\u001b[0m in \u001b[0;36m_run_search\u001b[0;34m(self, evaluate_candidates)\u001b[0m\n\u001b[1;32m   1146\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m_run_search\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mevaluate_candidates\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1147\u001b[0m         \u001b[0;34m\"\"\"Search all candidates in param_grid\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1148\u001b[0;31m         \u001b[0mevaluate_candidates\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mParameterGrid\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparam_grid\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1149\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1150\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/anaconda/envs/aif360/lib/python3.5/site-packages/sklearn/model_selection/_search.py\u001b[0m in \u001b[0;36mevaluate_candidates\u001b[0;34m(candidate_params)\u001b[0m\n\u001b[1;32m    664\u001b[0m                                \u001b[0;32mfor\u001b[0m \u001b[0mparameters\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mtrain\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtest\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    665\u001b[0m                                in product(candidate_params,\n\u001b[0;32m--> 666\u001b[0;31m                                           cv.split(X, y, groups)))\n\u001b[0m\u001b[1;32m    667\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    668\u001b[0m                 \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mout\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/anaconda/envs/aif360/lib/python3.5/site-packages/joblib/parallel.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, iterable)\u001b[0m\n\u001b[1;32m    919\u001b[0m             \u001b[0;31m# remaining jobs.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    920\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_iterating\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 921\u001b[0;31m             \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdispatch_one_batch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0miterator\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    922\u001b[0m                 \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_iterating\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_original_iterator\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    923\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/anaconda/envs/aif360/lib/python3.5/site-packages/joblib/parallel.py\u001b[0m in \u001b[0;36mdispatch_one_batch\u001b[0;34m(self, iterator)\u001b[0m\n\u001b[1;32m    757\u001b[0m                 \u001b[0;32mreturn\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    758\u001b[0m             \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 759\u001b[0;31m                 \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_dispatch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtasks\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    760\u001b[0m                 \u001b[0;32mreturn\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    761\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/anaconda/envs/aif360/lib/python3.5/site-packages/joblib/parallel.py\u001b[0m in \u001b[0;36m_dispatch\u001b[0;34m(self, batch)\u001b[0m\n\u001b[1;32m    714\u001b[0m         \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_lock\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    715\u001b[0m             \u001b[0mjob_idx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_jobs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 716\u001b[0;31m             \u001b[0mjob\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_backend\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_async\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbatch\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcallback\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    717\u001b[0m             \u001b[0;31m# A job can complete so quickly than its callback is\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    718\u001b[0m             \u001b[0;31m# called before we get here, causing self._jobs to\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/anaconda/envs/aif360/lib/python3.5/site-packages/joblib/_parallel_backends.py\u001b[0m in \u001b[0;36mapply_async\u001b[0;34m(self, func, callback)\u001b[0m\n\u001b[1;32m    180\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mapply_async\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcallback\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    181\u001b[0m         \u001b[0;34m\"\"\"Schedule a func to be run\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 182\u001b[0;31m         \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mImmediateResult\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    183\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mcallback\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    184\u001b[0m             \u001b[0mcallback\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/anaconda/envs/aif360/lib/python3.5/site-packages/joblib/_parallel_backends.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, batch)\u001b[0m\n\u001b[1;32m    547\u001b[0m         \u001b[0;31m# Don't delay the application, to avoid keeping the input\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    548\u001b[0m         \u001b[0;31m# arguments in memory\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 549\u001b[0;31m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mresults\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbatch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    550\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    551\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/anaconda/envs/aif360/lib/python3.5/site-packages/joblib/parallel.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    223\u001b[0m         \u001b[0;32mwith\u001b[0m \u001b[0mparallel_backend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_backend\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_jobs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_n_jobs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    224\u001b[0m             return [func(*args, **kwargs)\n\u001b[0;32m--> 225\u001b[0;31m                     for func, args, kwargs in self.items]\n\u001b[0m\u001b[1;32m    226\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    227\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m__len__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/anaconda/envs/aif360/lib/python3.5/site-packages/joblib/parallel.py\u001b[0m in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m    223\u001b[0m         \u001b[0;32mwith\u001b[0m \u001b[0mparallel_backend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_backend\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_jobs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_n_jobs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    224\u001b[0m             return [func(*args, **kwargs)\n\u001b[0;32m--> 225\u001b[0;31m                     for func, args, kwargs in self.items]\n\u001b[0m\u001b[1;32m    226\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    227\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m__len__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/anaconda/envs/aif360/lib/python3.5/site-packages/sklearn/model_selection/_validation.py\u001b[0m in \u001b[0;36m_fit_and_score\u001b[0;34m(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, return_estimator, error_score)\u001b[0m\n\u001b[1;32m    552\u001b[0m         \u001b[0mfit_time\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mstart_time\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    553\u001b[0m         \u001b[0;31m# _score will return dict if is_multimetric is True\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 554\u001b[0;31m         \u001b[0mtest_scores\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mestimator\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX_test\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_test\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mscorer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mis_multimetric\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    555\u001b[0m         \u001b[0mscore_time\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mstart_time\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mfit_time\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    556\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mreturn_train_score\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/anaconda/envs/aif360/lib/python3.5/site-packages/sklearn/model_selection/_validation.py\u001b[0m in \u001b[0;36m_score\u001b[0;34m(estimator, X_test, y_test, scorer, is_multimetric)\u001b[0m\n\u001b[1;32m    595\u001b[0m     \"\"\"\n\u001b[1;32m    596\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mis_multimetric\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 597\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0m_multimetric_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mestimator\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX_test\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_test\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mscorer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    598\u001b[0m     \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    599\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0my_test\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/anaconda/envs/aif360/lib/python3.5/site-packages/sklearn/model_selection/_validation.py\u001b[0m in \u001b[0;36m_multimetric_score\u001b[0;34m(estimator, X_test, y_test, scorers)\u001b[0m\n\u001b[1;32m    625\u001b[0m             \u001b[0mscore\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mscorer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mestimator\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX_test\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    626\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 627\u001b[0;31m             \u001b[0mscore\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mscorer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mestimator\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX_test\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_test\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    628\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    629\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mhasattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mscore\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'item'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/anaconda/envs/aif360/lib/python3.5/site-packages/sklearn/metrics/scorer.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, estimator, X, y_true, sample_weight)\u001b[0m\n\u001b[1;32m     95\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     96\u001b[0m             return self._sign * self._score_func(y_true, y_pred,\n\u001b[0;32m---> 97\u001b[0;31m                                                  **self._kwargs)\n\u001b[0m\u001b[1;32m     98\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     99\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m<ipython-input-10-b8e2e0cd7a17>\u001b[0m in \u001b[0;36mscore_func\u001b[0;34m(y_true, y_pred, sample_weight)\u001b[0m\n\u001b[1;32m      5\u001b[0m     \u001b[0midx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0my_true\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_flat_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      6\u001b[0m     \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0midx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 7\u001b[0;31m     \u001b[0;32mreturn\u001b[0m \u001b[0maccuracy_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my_true\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_pred\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msample_weight\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msample_weight\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0midx\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      8\u001b[0m \u001b[0mscoring\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmake_scorer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mscore_func\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0;34m'sample_weight'\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0msample_weight\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      9\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;31mNameError\u001b[0m: name 'accuracy_score' is not defined"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "WARNING:tensorflow:From /anaconda/envs/aif360/lib/python3.5/site-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.\n",
+      "Instructions for updating:\n",
+      "Colocations handled automatically by placer.\n"
      ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "0.8218794786050638"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
-    "rew = ReweighingMeta(estimator=LogisticRegression(solver='liblinear'))\n",
+    "adv_deb = AdversarialDebiasing(prot_attr='sex')\n",
+    "adv_deb.fit(X_train, y_train)\n",
+    "adv_deb.score(X_test, y_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.022611763594614448"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "average_odds_error(y_test, adv_deb.predict(X_test), prot_attr='sex')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Note that `AdversarialDebiasing` creates a TensorFlow session which we should close when we're finished to free up resources:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "adv_deb.sess_.close()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Finally, let's try a post-processor, `CalibratedEqualizedOdds`.\n",
     "\n",
-    "# UGLY workaround for sklearn issue: https://stackoverflow.com/a/49598597\n",
-    "def score_func(y_true, y_pred, sample_weight):\n",
-    "    idx = y_true.index.to_flat_index()\n",
-    "    print(idx)\n",
-    "    return accuracy_score(y_true, y_pred, sample_weight=sample_weight[idx])\n",
-    "scoring = make_scorer(score_func, **{'sample_weight': sample_weight})\n",
+    "Since the post-processor needs to be trained on data unseen by the original estimator, we will use the `PostProcessingMeta` class which splits the data and trains the estimator and post-processor with their own split."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.7676926226711254"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cal_eq_odds = CalibratedEqualizedOdds('sex', cost_constraint='fnr')\n",
+    "log_reg = LogisticRegression(solver='liblinear')\n",
+    "postproc = PostProcessingMeta(estimator=log_reg, postprocessor=cal_eq_odds)\n",
     "\n",
-    "params = {'estimator__C': [1, 10], 'reweigher__prot_attr': ['sex']}\n",
+    "postproc.fit(X_train, y_train)\n",
+    "accuracy_score(y_test, postproc.predict(X_test))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfUAAAEKCAYAAAALjMzdAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzs3Xdck1f7P/DPCXsLiGwRgRDCcFEUR92K/VkUseJotY66H63Vjm+X1qq1j9papFq11Yrax1W1rmprK9hqawVF2UslArIEwoaEnN8fSWyAAEEJCXjer1dekHvlusM4Oec+93URSikYhmEYhun8OJoOgGEYhmGY9sEadYZhGIbpIlijzjAMwzBdBGvUGYZhGKaLYI06wzAMw3QRrFFnGIZhmC5CrY06ISSIEJJKCMkghLynZH1PQsgVQshtQshdQshLCuv+T7ZfKiFkvDrjZBiGYZiugKjrPnVCiA6ANABjAWQDuAlgBqU0SWGbPQBuU0p3EUL4AC5QSnvJvv8fgAAADgAuA+BSSuvVEizDMAzDdAHq7KkHAMiglN6jlNYBOAJgUqNtKABz2fcWAHJl308CcIRSWkspvQ8gQ3Y8hmEYhmGaoavGYzsCeKjwPBvAwEbbrAPwCyHkPwBMAIxR2PfvRvs6Nn4BQshCAAsBwMTEZACPx2uXwDUtIQEwNgZ699Z0JAzTUGxsbBGl1EbTcTAMo5w6G3VVzADwPaV0GyEkEMBBQoiPqjtTSvcA2AMA/v7+NCYmRk1hdhyJRNqgv/EG8Pnnmo6GYRoihGRpOgaGYZqnzkY9B4CzwnMn2TJF8wEEAQCl9C9CiCGA7iru2yUVFgK1tUDPnpqOhGEYhuls1HlN/SYAD0KIKyFEH8B0AGcabSMAMBoACCFeAAwBFMq2m04IMSCEuALwAPCPGmPVGlmyfhBr1BmGYZi2UltPnVIqJoQsB3AJgA6AfZTSRELIegAxlNIzAFYD2EsIWQXppLnXqXQ6fiIh5BiAJABiAMuel5nvAoH0q4uLZuNgGIZhOh+1XlOnlF4AcKHRso8Vvk8CMKSZfTcC2KjO+LSRvFFnPXWmK4mNje2hq6v7LQAfsKRXDPO0JAASxGLxggEDBhQo20DTE+WYRrKyADMzwMJC05EwTPvR1dX91s7OzsvGxqaEw+GoJzkGw3RxEomEFBYW8vPy8r4FEKxsG/aJWcsIBNKhd0I0HQnDtCsfGxubMtagM8zT43A41MbGRgjpiJfybTowHkYFAgEbeme6JA5r0Bnm2cn+jpptu1mjrmVYo84wDMM8Ldaoa5HKSqCoiDXqDMMwzNNhjboWeShLqstuZ2MY9Th48GA3QsiA27dvG8qXpaam6nt4eHgDwLlz58xGjhzp/qyvExoa2mv//v2WABAWFuYSGxtrCADGxsb9nuW4586dM/v1119N2rqfo6Oj76NHj1SaGB0eHm49e/bsdutaDB8+3L2oqEgHADZs2NCjd+/e3sHBwa6HDx+2eP/99+3a63XkJBIJBg0axC0uLuYAgI6OzgAej8eXP1JTU/Xb+zXlnva9y83N1R02bJhHe8TAZr9rEXY7G8Oo15EjR6z69+9fERkZadWvX7/c1vd4dkePHm1Tal2RSAQ9PT2l637//XczU1PT+rFjx1a2S3AdIDo6OkP+/XfffWdz+fLlNDc3N5FskVDV47T0vig6duyYhbe3d7WVlZUEAAwMDCQpKSlJre2nSQ4ODmJbW1vRL7/8YjJu3Lhn+tmyRl2LsGxyzPNg3jw4JyTAuD2P6eODqn37GhSQakIoFHJu3rxpevny5dTg4GCPL7/8UuVGXSwWY+nSpU5XrlyxIITQOXPmFH3wwQcFa9assb948WK32tpajr+/f8Xhw4ezOJyGA6ABAQGeW7duffjiiy9WAcD8+fOdo6OjzW1sbEQ//vjjPQcHB3FAQICnj49P1T///GMaGhpa7OnpWbN582Z7kUjEsbS0FB89evReVVUVJzIy0obD4dBjx45Zb9++XeDn51czd+5cl5ycHH0A+OKLLwTjxo2rzMvL0wkNDe2dn5+vP2DAgIrmSmyfOHHC/OOPP3asr68nVlZW4r/++itNcf0PP/xg0TgOZ2dn8fnz501Xr17dEwAIIbh+/XpKWVmZTmhoaO+Kigqd+vp6smPHjqygoKAKR0dH35iYmOTVq1c7ZGdnG0yYMMFj1qxZRZaWlvUxMTEmkZGRgtzcXF1l5/HWW2853Lt3z0AgEBg4OjrWrl279tHcuXNdRSIRkUgk+PHHHzN9fX1rFWM+fPiw1aJFi4pa+3kuW7bM6dq1a2Z1dXXkjTfeKHj77beLzp07Z/bJJ584mJubi1NTU42Dg4OLfX19q3fu3GlbW1tLTp06lent7V3b3Pui+BrNnZOy987S0lIyefLk0sjISOtnbdTZ8LsWEQgAHR3AwUHTkTBM1/PDDz90GzFihNDPz6/W0tJS/Mcff6j8wWLbtm02AoFAPykpKTEtLS1pwYIFjwHg7bffLkhISEhOT09PrK6u5hw5cqTFDBPV1dUcf3//yoyMjMQhQ4aUv/fee0/+2uvq6khCQkLyJ598kj927NiKuLi4lOTk5KSpU6cWr1+/3s7T07Nu9uzZhYsXL85PSUlJCgoKqli0aJHzW2+9lZ+QkJB86tSpzMWLF/cCgPfee88hMDCwIiMjIzEkJKT00aNHTYacc3NzdZcvX97r5MmTmampqUmnT5/ObLyNsjhk74ddeHh4VkpKStLff/+dYmpqKtm3b5/V6NGjhSkpKUnJycmJAwcOrGr0/gt69Oghio6OTlu7dm2DxCnNnQcApKenG169ejX17Nmz93fs2GGzdOnS/JSUlKS7d+8mu7q61jWOOTY21nTIkCFPGsba2lqOfOh97NixbgCwffv27hYWFvUJCQnJd+7cST5w4IBNSkqKPgCkpKQY7du3T5Cenp5w4sQJ67S0NMP4+Pjk1157rWjbtm09WnpfVDknZe8dAAwZMqTyn3/+MVXya9MmrKeuRQQCwNER0GU/FaYLa61HrS7Hjh2zWrFiRQEAhIaGFh88eNBq2LBhVa3tBwC///67+eLFiwvlw7+2trb1APDzzz+bffHFF3Y1NTWc0tJSXT6fX40WhpQ5HA4WLFhQDADz5s17PGXKlCfX72fMmFEs//7+/fv6kydPdiosLNSrq6vjODs71yo73rVr18zT09ON5M8rKip0hEIh5++//zY7efJkBgBMnz5duGjRoiZptqOiokwCAgLKeTxeneI5KWoujkGDBlWsWbPGedq0acUzZswocXNzkwwaNKhy0aJFvUQiEWfq1KklgwcPrm75XW39PAAgKCio1NTUlAJAYGBg5datW+2zs7P1p0+fXtK4lw4AQqFQ19LSUiJ/rmz4/fLly+YpKSnGZ86csQSA8vJynaSkJEN9fX3q6+tb6eLiIgKAnj171k6YMEEIAH369KmOjo42a+l9UeWclL13gHQIvqCg4Jmv97OeuhbJymJD7wyjDvn5+Tp///232bJly1wcHR19IyIi7M6ePWspkUha37kZVVVVZPXq1S4nT57MTEtLS3r11VeLampq2vQ/lShkmTIzM3sSzPLly3suXbq0IC0tLSkiIiKrtrZW6XEppbh161ZySkpKUkpKSlJBQcFdCwuLpz+pRpqLY9OmTXnffvttVnV1NWfYsGG827dvG06YMKHi6tWrqY6OjnXz5s1zjYiIsFb1dVo6DxMTkyfns3jx4uKffvopw8jISDJx4kSPM2fOmDU+lo6ODq2vb7lUCKWUbNu2TSB/vZycnPgpU6aUAYCBgcGTaxUcDgeGhoZU/n19fT1p6X1R5ZyUvXeA9PfJwMDgmX92rFHXIvJscgzDtK+DBw9ahoSEFOfm5sbn5OTE5+Xl3XVycqq7dOmSSsOdo0ePLtu9e3d3kUg6vys/P1+nqqqKAwB2dnZioVDIOXv2rGVrx5FIJJDPiv/++++tAwICypVtV15ertOzZ0+RfDv5cjMzs/ry8nId+fOhQ4eWffbZZz3kz69fv24EAIMGDSqX73fs2DHzsrIyHTQyYsSIyn/++cdMPuycn5/fZJvm4khMTDQICAio3rhxY56fn19lQkKCYVpamr6Tk5No9erVRbNnzy68deuWypc3mjuPxpKSkvS9vLxqP/zww4Lx48eXxsXFNdnO1dW1Jjk52aCl1xs7dqxw165dNrW1tQQA7t69a1BWVqZye9jc+6LKOSl77wAgISHBkMvlqjy60RzWqGuJ+nogO5v11BlGHY4fP241ZcqUEsVlkyZNKjl06JCVKvuvWrWq0MnJqY7H43l7enryv/vuO6vu3bvXz5o1q9DLy8t75MiR3D59+rQ6wcnIyEjyzz//mHh4eHhfvXrV7LPPPnukbLsPPvggd8aMGW7e3t5e1tbWTyZghYaGlp4/f74bj8fjX7x40XTPnj0Pb926ZcLlcvlubm7eERERNgCwefPm3GvXrpm6u7t7nzx50tLe3r7JtWcHBwdxeHj4g5CQEHdPT09+SEhIb1Xj+O9//9vDw8PDm8vl8vX09OjUqVOFly5dMvPy8vL28vLi//jjj1bvvPNOvirvLQA0dx6NHTp0yIrL5XrzeDx+cnKy0aJFix433mbcuHHCX375pUkPXtGqVauKeDxeja+vr5eHh4f3G2+84SISiVROzt3c+6LKOSl77wDg119/NQsKClL5boDmkOZmRXY2/v7+NCYmRtNhPLXcXOn19F27gMWLNR0NwyhHCImllPq3db87d+486NOnT4szkhmmPWRlZenNmDGj1/Xr19M1HUtb+Pv7e/78888ZNjY2rZYZv3PnTvc+ffr0UraO9dS1BLudjWEY5tm5uLiI5s2bVyRPPtMZ5Obm6q5cuTJflQa9NWyetZaQJ55h19QZhmGezYIFC0pa30p7ODg4iF977bXS9jhWp/kk09XJG3VnZ83GwTAMw3RerFHXEllZQLdugLm5piNhGIZhOiu1NuqEkCBCSCohJIMQ8p6S9V8SQuJkjzRCSKnCunqFdWfUGac2YLezMQzDMM9KbdfUCSE6AL4GMBZANoCbhJAzlNInmX0opasUtv8PAMUKRtWU0r7qik/bsDrqDMMwzLNSZ089AEAGpfQepbQOwBEAk1rYfgaA/6kxHq3GsskxjPqx0qut62qlVwkhAyZNmuQqXy8SiWBpadmntZ/z0/4u1NTUEH9/f095oqKOps7Z745AgxzP2QAGKtuQEOICwBXA7wqLDQkhMQDEADZTSk+rK1BNKysDSkvZ8DvDqBsrvdrxNF161cjISJKammpUUVFBTE1N6alTp8xtbW3V1uIaGhrS4cOHl3377bdWS5YsKW59j/alLRPlpgM4QSlVvEfPRZbkYiaA7YQQt8Y7EUIWEkJiCCExhYWFHRVru3so++jDeurMc2HePGcEBHi262PevFbvG5GXXt2/f/+DU6dOqZRJTk4sFmPhwoVO8kxgGzdu7AEAa9assffx8fHy8PDwnjFjhouyXPIBAQGeV69efZIydf78+c7u7u7egYGB3NzcXF35NvPmzXP28fHx2rBhg+0PP/xg4efnx/Py8uIPHjyY+/DhQ93U1FT9yMhIm2+++cZWnlEuNzdXd/z48W4+Pj5ePj4+Xr/88osJAOTl5ekMGTLEw93d3TssLMylpdKrfD7fy9PTkx8YGMhtvF5ZHABw/vx5U3nlMy8vL35JSQknKytLz9/f35PH4/E9PDy8L168aAr8O0owc+bMnvLSq5988kkPxRGB5s7jrbfecpg8ebJr//79eVOmTHGNiYkx9PX19eLxeHwul8uPj49vkg728OHDViEhIQ1uDxszZozw+PHj3QDgf//7n1VoaOiTxvbKlSvGffv25Xl5efH79evHu3PnTpNjlpWVcV555ZVevr6+Xl5eXvxDhw51A4Dm4pk6dWrpkSNH2vQ71l7U2ajnAFD8Q3OSLVNmOhoNvVNKc2Rf7wGIQsPr7fJt9lBK/Sml/jY2SrMKdgry29lYo84w6sNKrzb0vJReBYDXXnut+OjRo5ZVVVUkOTnZODAw8Mn6Pn361Ny8eTMlOTk5ae3atTnvvPOOU+Njvv/++/YjR44si4+PT/7jjz9SP/zwQ6eysjJOc/G88MIL1Xfv3m3zZZL2oM7h95sAPAghrpA25tMh7XU3QAjhAbAE8JfCMksAVZTSWkJIdwBDAPxXjbFqFMsmxzxX9u1jpVfBSq+qch7As5deBYCBAwdWZ2dnG+zdu9dqzJgxDX4+xcXFOmFhYa4PHjwwJIRQZTngo6KizC9dutQtPDzcDgBqa2tJRkaGfnPx6OrqQk9Pj5aUlHAax6JuauupU0rFAJYDuAQgGcAxSmkiIWQ9ISRYYdPpAI7QhuNDXgBiCCF3AFyB9Jp6g3q4XYlAAOjpAfb2mo6EYbomVnr16XSl0qtBQUGla9eudZ49e3aD69zvvvuu4/Dhw8vT09MTz549m1FXV6e0jOqJEycy5PE9evQovn///jUtxSMSiYixsXGHF1dR6zV1SukFSimXUupGKd0oW/YxpfSMwjbrKKXvNdrvOqXUl1LaR/b1O3XGqWkCAeDkBHC0ZYYDw3QxrPQqK726ZMmSojVr1uQGBAQ0GEEoKyvTcXJyqgOA3bt3d1f2uiNHjizbtm2brfxD4LVr14xaiicvL0+nW7duYsXa7B2FNSNagN3OxjDqxUqvstKrbm5uog8//LCg8fJ33303b926dU5eXl58sVhpFVVs3rw5VywWEx6Px3d3d/f+8MMPHVuK5+effzZvPMzfUVjpVS3g4gKMGAEcOKDpSBimZaz0KqPttKH06rhx49y2bt2a7efnp3QuxLNipVe1mFgM5OSwnjrDMEx70HTp1ZqaGhIcHFyqrga9Naz0qobl5gL19axRZxiGaS+aLL1qaGhIly9f3uSyQEdhPXUNY3XUGYZhmPbCGnUNY4lnGIZhmPbCGnUNkzfqzq0muWQYhmGYlrFGXcOysgBra8BEIwkFGUY7SSTAb7/BJDIS3X77DSbPkCPmiczMTL3Ro0e7ubi4+Dg7O/vMnTvXuaampkn2MAB48OCBXlBQUJNbvBpTrEDWVm+99ZbDxx9/bKvq9s9a4U3Rf//7Xxt5cpjbt28bynO4JyYmGvTr14/3rMcPCgrqnZSUpA9Ic79zuVy+PFf801SZU1VnrazWnlijrmECAbuezjCKjh6FhYMD/IKDwV26FL1efhlcBwf4HT2KFvOqt0QikWDy5MnuwcHBpVlZWQn3799PqKys5KxcudKx8bYikQi9evUSXbx48V5rx42Ojs7o3r170/RlWu6dd94plE/mOn78eLfg4OCS5OTkJG9v79rbt2+nqHociUSCxtnbYmJiDOvr6wmfz39yb3x0dHSaPBubNlaYU6yspulYnhVr1DVMIGDX0xlG7uhRWMyZg975+dCrqgKnshI61dXg5OdDb84c9H7ahv3s2bNmBgYGkpUrVz4GpLm5v/nmm4dHjx7tXl5ezgkPD7ceNWqU+6BBg7iDBw/2VKyxXl5eznnppZd6u7m5eY8dO9bNz8+PJ6+6Jq9Alpqaqt+7d2/v6dOnu7i7u3sPGTLEo6KiggDAtm3buvv4+Hh5enryx48f71ZeXt7i/92HDx/qjh071s3T05Pv6enZpGcrFAo5gYGBXD6f78Xlcp9UDCsrK+OMGDHC3dPTk+/h4eG9d+9eSwBYunSpo5ubmzeXy+UvXLjQCfh3lODo0aMWe/bssf3+++9tBg4cyAUajgh89NFHtj4+Pl5cLpe/atUqB0Baf75Xr14+ISEhvbhcrndmZmaDYjHff/+99csvv9ygSpoyzR3b1dXVOzQ0tFevXr18goODXU+fPm3Wv39/nouLi8+VK1eMga5XWa09sUZdgyhl2eQYRk4iAVasgEttrfL/S7W14KxcCZenGYqPj4836tOnT4PiLVZWVhJ7e/u6pKQkAwBITEw0/umnnzJv3ryZqrjdli1bbLp161afmZmZuGnTppykpCSlw8cCgcBwxYoVBRkZGYkWFhb1kZGRlgAwa9askoSEhOTU1NQkT0/P6vDwcKWpSOUWL17cc9iwYeWpqalJiYmJSf37969RXG9sbCw5f/58RlJSUnJ0dHTa+++/7ySRSHDy5ElzOzs7UWpqalJ6enrilClTyvLy8nQuXLhgmZ6enpiWlpa0adOmBhnswsLChPLKbzdu3EhTXHfy5EnzjIwMw7t37yYnJycnxcXFGf/888+msnM1WL58eWFGRkYil8ttkK3uxo0bpoMGDWrwXg8fPpzL4/H4fn5+vNaO/fDhQ8N33303PzMzMyEzM9Pw8OHD1jExMSkbN27M3rhxoz3Q9SqrtSd2n7oGCYVAeTkbfmcYALhyBSYVFWjx+nR5OXSiomAyahTafQh32LBhZcoqlV2/ft105cqVBQDwwgsv1HC5XKWV3RwdHWvllcn69etX9eDBAwMAiI2NNfr4448dy8vLdSorK3WGDx/eYvrQ69evm504ceI+IB1RsLa2bhCTRCIhb775ptPff/9tyuFwUFBQoJ+dna3bv3//6g8++MB5yZIljpMmTRIGBQVViEQiGBgYSMLCwnpNnDixNCwsTOXUpRcvXjS/evWqOZ/P5wNAVVUVJyUlxbB379519vb2daNHj1b6MygsLNSzs7NrcHE6Ojo6zd7e/kkO1paO7ejoWCvPz87lcqtHjRpVxuFw0L9//6oNGzY4AF2vslp7Yj11DWK3szHMv3JyoEcIWsxbTQhodjb02npsHx+f6jt37jQoMFJcXMx59OiRPp/PrwWkPeC2HleRvr7+k9h1dHSoWCwmALBw4ULXiIgIQVpaWtK7776b21zFNVXt3r3b6vHjx7rx8fHJKSkpSdbW1qLq6mqOn59f7a1bt5J8fX2rP/roI8c1a9bY6+npIS4uLnnq1Kkl586d6zZixAgPVV+HUoo333zzkfxauEAgSFi1alUR0PJ7ZWBgIKmurm7xHFs6tuL7yOFwYGhoSAFAR0cH9fX1BOh6ldXaE2vUNYjVUWeYfzk6QiSRQOlsdDlKQZyc0OYpysHBweU1NTUc+YxvsViMpUuXOr/yyitFiiVPlQkMDKw4cuSIJQDExsYapqWlKa0g1pyqqipOz549RbW1tUSVa7ZDhgwp37Jli408zsePHzcYvRAKhTrdu3cXGRgY0LNnz5rl5ubqA9IZ+2ZmZpKlS5cWv/XWW3lxcXHGQqGQI+vVCr/55puHKSkpKldOmzBhQtnBgwe7y+ua379/Xy8nJ6fV0V0PDw+lVdLa49hyXa2yWntiw+8axLLJMcy/Ro5EpZkZ6qurm+9smJmhfsSItg+9czgcnD59OmPhwoUuW7ZssZdIJBg1apQwPDw8p7V933777cJp06b1cnNz83Zzc6txd3evsbS0VHnG+3vvvZcbEBDgZWVlJe7fv39FRUVFi5cYdu3aJXj99ddduFxudw6Hg4iIiKwxY8Y8OecFCxYUT5gwwZ3L5fL9/PyqXF1dawDpMP///d//OXE4HOjq6tKdO3dmlZaW6kycONG9traWAMCnn376UNW4p0yZUpaYmGj4wgsv8ABp7/zw4cP3dXV1W2z0JkyYUPr777+bTZ48WWlZ2Wc5tty7776bt2DBAtfPP//cYezYsUon5W3evDl34cKFPXk8Hl8ikRBnZ+faK1euZBw6dMjq2LFj1rq6utTGxkb06aefPgI0W1mtPbEqbRr07rvAV18BVVWsljrTOai7Spt89ruyyXIGBpAcOIB7YWHo0H+8YrEYdXV1xNjYmCYmJhqMGzeOm5mZmSAfFmYaqqioIEOGDPGMjY1N0dXtPP1GdVdWa08tVWnrPO94FyQQSDPJsQadYaRkDfa9lSvhUl4OHUJAKQUxM0P9V18hq6MbdEB6S9uwYcM8RSIRoZTiyy+/zGINevNMTU3pxx9/nHv//n19Dw+PJnXctZGmK6u1J9ZT16DBgwEjI+C33zQdCcOopqPqqUskQFQUTLKzoefkBNGIEahkH34ZRor11LWUQACMG6fpKBhG+3A4gDpuW2OYrk6tn30JIUGEkFRCSAYh5D0l678khMTJHmmEkFKFdXMIIemyxxx1xqkJIpG0ljqb+d6FPH4M/PWXNKsQwzCMBqitUSeE6AD4GsAEAHwAMwghfMVtKKWrKKV9KaV9AewAcFK2rxWAtQAGAggAsJYQYqmuWDUhO1v6v5816l3I3r3SayoZGZqOhGGY55Q6e+oBADIopfcopXUAjgCY1ML2MwD8T/b9eAC/UkqLKaUlAH4FEKTGWDscu52tixGLgV27gFGjAA+V83swDMO0K3U26o4AFO+JzJYta4IQ4gLAFcDvbdmXELKQEBJDCIkpLCxsl6A7Cssm18WcPSv9of7nP5qOpGuQ1l41QWRkN/z2mwnaofYqK736r44uvTpgwABPxfU8Ho8vL5jTHMWiOm01ePBgbmFh4VP9XDo7bZlPOh3ACUppm0oYUkr3UEr9KaX+NjY2agpNPeSNulOTMgRMpxQRIf2ENnGipiPp/I4etYCDgx+Cg7lYurQXXn6ZCwcHPxw9ykqvtpOOLr1aWVmpk5GRoQcAt27dMmyn02jWjBkzHm/durVzNQrtRJ2Neg4AZ4XnTrJlykzHv0Pvbd23U8rKAnr0kN7SxnRyiYnA778DS5cCnSjZhlY6etQCc+b0Rn6+HqqqOKis1EF1NQf5+XqYM6f30zbsrPSqZkuvTp48uTgyMtIKACIjI61CQ0OL5etSU1P1BwwY4Mnn8734fL5X4/MFpAmAFi1a5CSPZcuWLd0BICsrS8/f399T3vO/ePGiKQBMnz699OTJk9Ytvc9dlTob9ZsAPAghroQQfUgb7jONNyKE8ABYAvhLYfElAOMIIZayCXLjZMu6DIGAXU/v9OQ9lIgIwMAAmD9fs/F0dtLaqy5oruBJbS0HK1e6PM1QPCu9qtnSqzNmzCg5e/asJQBcunSp25QpU540+g4ODuI//vgjLSkpKfno0aP3Vq1a1eSi5Pbt27tbWFjUJyQkJN+5cyf5wIEDNikpKfr79u2zGj16tDAlJSUpOTlVXsC/AAAgAElEQVQ5ceDAgVUAYGNjU19XV0fy8vKeuyF4tXUrKKViQshySBtjHQD7KKWJhJD1AGIopfIGfjqAI1QhCw6ltJgQ8imkHwwAYD2ltBhdiEAA8PkNl9XXAzrP3a9gJ5WSAgQGAr/8AkRGAjNnAt1b/F/NtObKFRO0khcd5eU6iIoywahRrPRqJyq92qNHj3oLCwvxnj17LN3d3atNTU2ffDKrq6sj8+fPd0lKSjLicDjIyspqUgzm8uXL5ikpKcZnzpyxBIDy8nKdpKQkw0GDBlUuWrSol0gk4kydOrVE/v4DgLW1tVggEOjb2dlVNz5eV6bWa+qU0guUUi6l1I1SulG27GOFBh2U0nWU0ib3sFNK91FK3WWP/eqMs6NRKh1+V5wkl5IibRNSU5vfj9ESlALz5gFlZcC0adLk/cuXazqqzi8nRw+EtHyTPyEU2dms9GonLL06derUknfeecdlxowZDTpoGzdutO3Ro4coOTk5KT4+PkkkEikro0q2bdsmkMeSk5MTP2XKlLIJEyZUXL16NdXR0bFu3rx5rvLJf4C0fvqz/kw7I22ZKPdcKS6WtgPy4XfFNmLePJa7ROv9+CNw9650uDgrC/D0BPr313RUnZ+jowgSSYulV0EpgZMTK73aCUuvzpo1q2TZsmV5U6ZMKWt8Pvb29iIdHR3s3LnTuvHEOwAYO3ascNeuXTbyanN37941KCsr46Slpek7OTmJVq9eXTR79uzCW7duGQPSCXyFhYV6np6enT6Xe1uxWT0a0Ph2thMngNu3pW3ErVvSNmPqVM3Fx7SgogJYvBiolI08Ugrk5Eifmyi91MqoauTISpiZ1UNJL+8JM7N6jBjBSq92wtKrlpaWko0bN+Y13v7NN98sCA0NdTty5Ij1qFGjhEZGRk0+ZK1atarowYMHBr6+vl6UUmJlZSW6cOFC5qVLl8zCw8PtdHV1qbGxcf3hw4fvA8Cff/5p3K9fv0o9vTYP6nR6rKCLBpw+DYSEADExwIMH0hFcxbk/HA5w/DgwZYrGQmSas3q1NMlMtcJlOkND6cz3bds0F1cHUXtBF/nsd2VD1AYGEhw4cA9tuC7cHljp1bbRhtKrc+fOdZ48eXLppEmTmq3p3pm1VNCFDb9rgLynnpYGhIWhyWReiUTa0F+40PGxMS1ISWnaoANATY10OZsQ8ezCwoQ4cOAebG1FMDaWwMSkHsbGEtjaijTRoAPSW9oCAgJ4np6e/JCQEDdWerVliqVXNRWDj49PdVdt0FvTZXrqrq6udO3atQ2WeXt744UXXoBIJMLhw4eb7NO3b1/07dsXVVVVOHbsWJP1/v7+8PHxgVAoxKlTp5qsDwwMhKenJ4qKinDu3Lkm61988UX07t0beXl5uHjx4pPlmZnSYi43b45GfLwznJ0fYvTopvVX//knCAkJdrh//x6uXr3aZP3EiRPRvXt3pKam4q+//mqyPiQkBBYWFkhISICyUYxp06bB2NgYcXFxiIuLa7J+1qxZ0NPTw82bN5GYmNhk/euvvw4AuH79OtLSGtwNAz09PcyaNQsAEB0djfv37zdYb2xsjGnTpgEALl++jOzs7Abrzc3NMUU2VHHx4kXk5TUctbO2tsbLL78MADh79iweP37cYL2dnR2CgqSZhU+ePImysgaX8eDk5IQxY8YAAI4dO4aqqoYTml1dXTF8+HAAwOHDhyESiaTXSGTH4aalYfD16wCA72XvA8zNgX7SW3y19XdPbvTo0XB2dsbDhw/xm5Lav0FBQbCzs8O9ew1/9+bOndshpVdltVdNkJ2tBycnEUaMqASrvcowAFjpVa1TWwvo6QHCVvocJSXAjRvSJDWMhlVXA+WtfPAvL5duxzIKPTtp7VVWepVh2qjL9NQ70zX1gQOlHb4UFZIx7twJLFmi/piYVlAKDBki/ZSlLPkJhwMMGgT8+SdAWp7A3Zmp/Zp6I2IxS9LHMI2xa+paRiCQFvJqbTSRw3kymstoGiHAvn2AfjOXCQ0MpOu7cIPe0W7fhqGVFfreuYMmt0cxDKMca9Q7WG0tkJcH+Pu3noDMxkbaq2e0BI+nvGCLkZF0OMXTs+k65qlIJMDcuehVUQGd119Hr3Yo0sYwzwXWqHewh7K7RF1cgP37m08Lq6PDOn5aqa6u6Q/F2BhYv14z8XRRBw7AMi0NRpQCqakwjoxEt2c9po6OzgB54Y8JEyb0bq2wijLr16/v8TT7dQZtLXUaGhraa//+/Zbt8dqNS92+/PLLrlwul//JJ5/0ePPNNx1Onz5t9izHP3jwYLc1a9bYA9JiNj169PDj8Xh8Ho/HX7p0qdKS4O1FXvSnrfstXLjQ6cyZM20+7y75y6nN5LezubgAL70EHDvWdBiew5Euf+mljo+PaYFAAJw7B0ya9G+iGRMTYPdulnimHQmF4KxahZ7V1dL/T9XV4Lz5JlzKyp7t/5WBgYEkJSUlKT09PVFPT49u27atzaU5d+/ebVtRUfHUcSgrVcqgQalbgUCge+fOHZO0tLSktWvXFmzfvj23cSKblohETRMOfvHFF3arV68ulD9fvHhxvjzl7M6dO7WyAuiaNWsKPv/8c7u27sca9Q7WOJvclCnAkSP/Tpg2NASOHmWJZ7TSrl3Sr19+Cfj5ST999enDfljtbM0aONTUNPzfVFMDzurVcGiv1xg6dGhFRkaGAQCsW7fO1sPDw9vDw8N7/fr1PQDlZUw3bNjQo6CgQG/48OFceZlSReHh4dajR492CwgI8HRxcfFZvXq1PaC8VOnu3butuFwu38PDw3vJkiVPeoonTpww5/P5Xp6envzAwECuPJZXXnmll6+vr5eXl9eTUqsxMTGGvr6+Xjwej8/lcvnx8fEGzZVf/eOPP4xfeOEFT29vb6+hQ4d6ZGVl6cmXy0u8fvHFF83eZ/PBBx/Ycblcvqenp9Ke7Zo1a+x9fHy8PDw8vGfMmOEikV0v2bBhQw952deJEyf2BoDz58+bynvJXl5e/JKSEo7iKMGYMWO4BQUF+jwej3/x4kVTxRGB5s4jICDAc968ec4+Pj5eGzZssFWM7e7duwb6+voSe3t7cUu/Ey0de/78+c4+Pj5evXv39o6OjjYeN26cm4uLi8+KFSue/E6OGTPGzdvb28vd3d1769atSi+u7ty500r+M5s5c6aLWCyGWCxGaGhoLw8PD2/56AQAcLncutLSUl2BQNC2Xj6ltNkHpNXVrrS0jbY8BgwYQDuDdesoJYTS2tp/l0kklAYGUsrhUDp4sPQ5o2Wqqym1tqY0JET6PDmZ0m7dKE1J0WxcHQzSCott/vuMi4t7QCmNae1x6xZNMDCgEuntBg0fBgZUEhdH41U5jrKHkZFRPaU0pq6uLmbUqFElmzdvzrp69WqSh4dHlVAovFVaWnrLzc2t+s8//0zcv39/RlhYWKF836KiotuU0hgHB4fa3NzcOGXH/+qrr+5379697tGjR7fLy8tj3d3dq6Ojo5NSUlLuEkLo5cuXkymlMffv379jZ2dXm5OTE1dXVxczcODAssjIyIycnJw4W1vbuuTk5LuU0pi8vLzblNKYZcuWPfr666/vUUpjCgsLb7u4uNQIhcJbs2fPzt+5c+c9SmlMdXV1bHl5eayyuGtqamL79u1bkZOTE0cpjdmzZ0/m1KlTiyilMR4eHlUXLlxIoZTGLFy4MM/d3b268XkdPXo0rW/fvhVlZWW3FOOaMmVK0b59+zIVl1FKYyZNmvT48OHD6ZTSGBsbm7qqqqpYeeyU0piRI0eWXrp0KZlSGlNaWnqrrq4uJiUl5a78tRW/V3ydls7jhRdeKJ81a1aBsp/L9u3b7y9YsCBP/nzVqlW5NjY2dZ6enlWenp5VJ06cSGvt2IsXL35EKY1Zv369wMbGpu7Bgwd3qqqqYnv06FH36NGj24rvgfxnL18u/52JjY1NGDlyZGlNTU0spTRm1qxZBTt27Lh/9erVpMDAQKE8Pvn7RCmNCQsLK9y/f39G43OS/T0p/Vtr8RMApbSeECIhhFhQSjs8k1NXJBAA9vYNJ1HLJ1YHBrLr6FrryBHg8eN/q7HxeEBREauV247kk+OUjJ4CAEQi4PXX0Ss2FqlPk4emtraWw+Px+AAwcODA8pUrVxZt2bLF5qWXXio1NzeXAMD/+3//r+TKlStmwcHBwsZlTFV5jaFDh5bZ2dnVy48VFRVlGhYWVqpYqvTPP/80GTRoULmDg4MYAMLCwoqjo6NNdXR0aEBAQDmPx6sDAHkZ2KioKPNLly51Cw8Pt5OdB8nIyNAPDAys3Lp1q312drb+9OnTS3x9fWuVlV+9efOmYXp6utGoUaO40vdZAhsbG1FRUZFOeXm5zoQJEyoAYN68eY9///13i8bn9Ouvv5q/+uqrTwrfKCtP+/PPP5t98cUXdjU1NZzS0lJdPp9fDUDo6elZHRIS4hocHFw6a9asUgAYNGhQxZo1a5ynTZtWPGPGjBI3NzeVpkHevXvXQNl5yNc3rv4m9+jRIz0bG5sGvfTFixfnr1+/Pl/+vLn3SL4+JCSkFAD69OlT7e7uXu3i4iICAGdn59p79+7p29nZVX/++ee258+f7wYAeXl5eomJiYZ2dnZPci1cvHjRLCEhwbhPnz5eAFBTU8Pp0aOHOCwsrPThw4cGc+bMcX755ZeFISEhTzJl2djYiHNyctqUmU+Vbn0FgHhCyK8AngRIKV3RlhdipASChiVX5VgbocUoBXbsALy9gZEj/13OfljtKikJBgkJMGluprtEAhIfD9PkZBh4e6PN1bfk19RV2VZexvTHH3+0+OijjxwvX75ctnXr1keK20RGRnbbtGmTAwDs2bPnAQCQRp/I5c+fpQQopRQnTpzI6NOnT4Nz7t+/f82wYcMqT506ZTFx4kSPHTt2ZAUHB5c3jnvatGml7u7u1XFxcQ0yYxQVFbXLL3BVVRVZvXq1y40bN5Lc3d1Fb731lkNNTQ0HAK5cuZL+888/m/30008WW7dutU9NTU3ctGlT3uTJk4U//fSTxbBhw3jnz59PV+X9oZQSZech11y1PSMjI4lQKGytA9viseVpgTkcDgwMDJ4kd+FwOBCLxeTcuXNm0dHRZjExMSlmZmaSgIAAz8blZyml5JVXXnn89ddfN7mGn5CQkHTq1Cnzb775xubo0aNWx48ffwAANTU1RFmBm5ao8nn3JICPAFwFEKvwYJ5Cc406wNoIrfX339LyecuXs2EUNeLzUevjg0oOB0ozYnE4oL6+qPDyanuD3pyRI0dWXLhwoVt5eTmnrKyMc+HCBcuRI0eWKytjCgAmJib18lKks2fPLpVPtnrxxRerAODPP/80z8/P16moqCAXLlzoNnz48CY9/GHDhlXeuHHD7NGjR7pisRjHjx+3GjFiRMWIESMq//nnH7OUlBR9AMjPz9eRxVi2bds2W/l16mvXrhkBQFJSkr6Xl1fthx9+WDB+/PjSuLg4I2Vx+/n51RQXF+tevnzZBJD29GNiYgy7d+9eb2ZmVn/p0iVTAPj++++VloUdP3582aFDh7rLZ/3L45KrqqriAICdnZ1YKBRyzp49awkA9fX1yMzM1H/55ZfLv/7665yKigodoVCok5iYaBAQEFC9cePGPD8/v8qEhARDVX5WzZ1Ha/t5e3vXZGZmtpjr4GmPLVdaWqpjYWFRb2ZmJrl9+7bhnTt3msycDQoKKjt37pylvHxtfn6+Tlpamv6jR4906+vr8frrr5d+9tlnOfHx8U/K42ZmZhr26dOnuvGxWtJqT51SeqAtB2SaR6m0UQ8O1nQkTJtERAAWFsCrr2o6ki6NwwH278eDwEDwa5U023p6wPff40F7poAfOnRo1cyZMx/379/fCwBee+21wiFDhlT/+OOP5o3LmALAnDlzioKCgri2trZ1N27cSGt8PD8/v8rg4GC3vLw8/alTpz5+8cUXq1JTUxsMn7q4uIjWrl2bM3z4cC6llIwZM6b01VdfLQWA8PDwByEhIe4SiQTW1tai69evp2/evDl34cKFPXk8Hl8ikRBnZ+faK1euZBw6dMjq2LFj1rq6utTGxkb06aefPvrzzz9NGsdtaGhIjxw5krlixYqe5eXlOvX19WTJkiX5/v7+Nd99992DBQsW9CKEYMSIEWWNzwcApk6dWnbr1i3jvn37eunp6dExY8YIIyIinvQ2u3fvXj9r1qxCLy8vbxsbG3GfPn0qAUAsFpOZM2e6lpeX61BKyYIFCwq6d+9ev3r1aofr16+bE0Kop6dn9dSpU4UCgaDVGqktnUdL+40fP77ivffec5ZIJOA088vztMeWCw0NFe7Zs8emd+/e3r17966RvweKBgwYUPPhhx/mjB49miuRSKCnp0fDw8MFxsbGkvnz5/eSSCQEANavX58NSD9YPHjwwODFF19sU7rkVtPEEkKGAFgHwAXSDwEEAKWU9m5pv47WGdLEFhQAtrbSkVz5pVlGy+XlSYdWli2Tznp/znVEmtg33oDTwYOwqa39dyTRwACS115D4d69yG5pX00KDw+3jomJMYmMjBRoOhamoblz5zpPmjSptC23xmlaZGRkt9jYWOOvvvoqt/G6Z00T+x2ALwAMBfACAH/Z11YRQoIIIamEkAxCyHvNbDONEJJECEkkhPygsLyeEBIne5xR5fW0XePb2ZhOYPdu6QytpUs1HclzY9s25BoaosF1RENDSLZtQ5N/bgyjivXr1z+qrKzsVLdwi8Vi8tFHH+W3vmVDqkyUE1JKf27rgQkhOgC+BjAWQDaAm4SQM5TSJIVtPAD8H4AhlNISQojifZLVlNK+bX1dbZaVJf3KGvVOoq4O+OYbYMIEabJ+pkOYm0Py5ZcQLFuGXtXV4BgZQbJ9O7LMzaHVyWJXrFjxGMDjVjdkOpyzs7N41qxZneoOrnnz5pU8zX6qfHK5QgjZQggJJIT0lz9U2C8AQAal9B6ltA7AEQCTGm3zBoCvKaUlAEApLWhT9J2MYjY5phM4eVI6/M6ulXS4OXNQwuWimhDA0xNVs2ejVNMxMUxnoEpPXV5SRPE6GgUwqpX9HAE8VHierXAsOS4AEEKuQZroZh2l9KJsnSEhJAaAGMBmSunpxi9ACFkIYCEA9OwE3V+BADA1Bbo9cxZrpkNERADu7kBQkKYjee7IJ82NfFHM+/573XadHMcwXVmzjTohZCWl9CsAH1FK/1Tj63sAGAHACcBVQogvpbQUgAulNIcQ0hvA74SQeEpppuLOlNI9APYA0olyaoqx3WRlSYfe2V1RncDt28C1a9LJcaxF0Yh+uI0SMhwEfwDoo+lwGKZTaOm/1VzZ1/CnPHYOAGeF506yZYqyAZyhlIoopfcBpEHayINSmiP7eg9AFIBOX1lcIGBD751GRIS0+trrr2s6kueTNL1cL1JRoYPXX+8FVnuVYVTSUqOeTAhJB+BJCLmr8IgnhNxV4dg3AXgQQlwJIfoApgNoPIv9NKS9dBBCukM6HH+PEGJJCDFQWD4EgEqZoLRZS4lnGC3y+DHwww/A7NnsWommHDhgibQ0I0hrrxojMpKVXlWz56n0KiFkQEJCwpOENOvXr+9BCBlw9epV4+aPIi3u0to2ymzatMlm+/bt1m2PvO2a/eWklM4AMAxABoCXFR4TZV9bRCkVA1gO4BKAZADHKKWJhJD1hBB5+pVLAB4TQpIAXAHwNqX0MQAvADGEkDuy5ZsVZ813RtXVQGEha9Q7hW+/BWpqpPemMx1PKORg1aqekKfZrK7m4M03XVBWxkqvdlEdXXrVw8OjOjIy8kkGvdOnT1u5u7urlGjmafznP/95vHv3btvWt3x2Lf5yUkrzKKV9KKVZjR+qHJxSeoFSyqWUulFKN8qWfUwpPSP7nlJK36KU8imlvpTSI7Ll12XP+8i+fvesJ6pp7B71TqK+Hti5U5rj3cdH09E8n9ascYAsd/gTNTUcrF7NSq+y0qvtUnr1pZdeKr1w4UI3AEhMTDQwMzMTW1paPlk/a9asnj4+Pl7u7u7eq1atUvp7d/LkSfO+ffvy+Hy+14QJE3rL0wcvXbrUUX7OCxcudAKkeemdnJxqr1y50uZeflu1rU4r89TY7WydxNmz0h8Wyx6nGbdvG+LgwR6orW04nbS2loODB3tg+fJCNCps0lYikQiXLl0yHzduXNkff/xh/MMPP1jHxsYmU2kJZ6/Ro0eXp6enG9jZ2YmioqIyAODx48c61tbW9bt27bKNjo5Oa6429927d03i4+MTTU1NJf369eNPmjRJaGtrKxYIBAbffffd/dGjRz948OCB3rp16xxjY2OTbWxsxMOGDeMePHiw2+jRoyuWL1/eKyoqKoXH49XJc6y///779iNHjiw7fvz4g6KiIh1/f3+v4ODgsh07dtgsXbo0f8mSJcU1NTVELBbjxIkTFo3jrq2tJStWrOh5/vz5DAcHB/HevXst16xZ43j8+PEH8+fP7/XVV18JJkyYULFo0SInZed07Ngx8wsXLnSLjY1NMTMzkzTO/Q4Ab7/9doG84M3kyZNdjxw5YjFz5kxheHi4XVZWVryRkRGVF5DZtm2bXXh4eNa4ceMqhUIhx9jYWFJQ8O/dzGfPns2YOHGih7z4zt69e7sD0rSpzZ0HANTV1ZGEhITkxrFduXLF1M/Pr0pxmbm5eb2Dg0PdzZs3DU+cONFt6tSpJQcPHnxSA/2LL77IsbW1rReLxRg8eLDnjRs3jAYOHPgkB/ujR490N23aZH/16tU0c3NzyQcffGD36aef2q5Zs6bgwoULlvfu3UvgcDgNiub079+/MioqymzkyJENYmlvXfLakDZiPfVOIiICcHZmCfo1QTY5Di3XXn3qSXPy0qu+vr58JyenupUrVxZFRUWZykuvWlhYSOSlV/v371/9xx9/mC9ZssTx4sWLptbW1iqNmctLr5qamlJ56VUAaK70qp6e3pPSq1FRUSbNlV798ssv7Xk8Hn/o0KGeiqVXt23bZv/BBx/Ypaen65uamlJlcSuWLOXxePwtW7bY5+bm6ikrvarsnFQtvern58fjcrn869evmyUkJBgBgLz06s6dO6309PQo8G/p1Q0bNvQoKirS0dNrNe07gIalVxXPQ76+LaVXAWDatGnFBw8etDp//rzlrFmzGiR6OXDggBWfz/fi8/n89PR0wzt37jQo7hIVFWWSmZlpGBAQwOPxePwjR45YCwQCfWtr63oDAwNJWFhYrwMHDnQzNTV98svao0cPsWK86sJ66h0kK0t6Z5RDuw0gMu0uKQn47Tfgs88AXfan0eGSkgyQkGDSbKMtkRDEx5siOdkA3t6s9CorvdpAW0uvhoWFCT/++GMnX1/fKisrqyf7pqSk6EdERNjKRlLqQ0NDe9U0uhxEKcXQoUPLzp49e7/xcePi4pLPnDljfuLECctdu3b1+Pvvv9MAaf30tpZRfRrN9tQJIWcJIWeae6g7sK5GIAAcHaWVphgt9fXXgIEBsGCBpiN5PvH5tfDxqQSHozznBIdD4etbAS8vVnoVrPTqs5ZeNTMzk6xbty77o48+avBhraSkRMfIyEhiZWVV//DhQ92oqCiLxvuOGDGiMiYmxlQ+g76srIxz9+5dA6FQyCkuLtYJCwsTfvPNNw9TUlKeXENPS0sz8PHxaVMZ1afRUndkq+zrFAB2AA7Jns8A0OYk8887djublhMKgQMHgBkzgO7dW9+eaX/SNHIPEBjIR/O1Vx+0ZzIgVnr1+S69unDhwib51QMDA6t9fHyq3NzcfOzt7esGDBjQ5IOZg4ODePfu3Q+mT5/eu66ujgDA2rVrcywsLCQTJ050r5XNCfn000+fZFW9efOm6eeff672okSqlF6NaVxqUdkyTdP20qtubsDAgdLbnxkt9NVXwJtvAjExwIABmo5Ga3VE6VW88YYTDh60QW3tv/+BDQwkeO21Quzdy0qvMm2m6dKr165dM9qyZYvd6dOnmwzXP41nLb1qIkvVCgAghLgCMGmPwJ4XEgnw8CGb+a61JBLp0HtgIGvQtcG2bbkwNGx47dHQUIJt21jpVeapaLr0akFBgd7nn3/eOKOqWqgyG2gVgChCyD0ABIALgEVqjaqLyc+XTtxlw+9a6pdfgPR0YN06TUfCAIC5uQRffinAsmW9UF3NgZGRBNu3Z8HcXKtzxbLSq9pL06VXQ0JClF7aUIdWP7nIqqZ5AFgJYAUAT0rpJXUH1pWw29m0XEQEYGcHTJ2q6Ui6MolEIlG9lNGcOSXgcqshrb1ahdmzWelVhgEg+ztq9gNuq406IcQYwNsAllNK7wDoSQiZ2H4hdn1Zsvx7rFHXQpmZwIULwKJFgL5+69szTyuhsLDQQuWGXT5pztS0vr0nxzFMZyWRSEhhYaEFgITmtlFl+H0/gFgAgbLnOQCOAzj3zBE+J1g2OS22cyegowMsXKjpSLo0sVi8IC8v79u8vDwfqJr0isMBoqKyAZjhzp1nKujBMF2EBECCWCxu9r5bVRp1N0ppGCFkBgBQSqtI4wwLTIsEAsDCAjA313QkTAOVlcC+fdJhd5YVSK0GDBhQAICl6WMYNVPlE3MdIcQIAAUAQogbgHZL/vA8yMpiQ+9a6dAhoLQUWL5c05EwDMO0C1V66usAXATgTAg5DGlt89fVGFOXIxCwoXetQ6l0gly/fsDgwZqOhmEYpl202qhTSn8hhMQCGATpLW0rKaWqJZFgAEgbddZuaJnoaCAhAfjuO4BdTWIYpotQZfb7bwAGUkrPU0rPUUqLCCF7OiC2LqGiAiguZsPvWiciArC2lqaFZRiG6SJUuabuCuBdQshahWValSJWm7GZ71ro4UPg9Glp4RYjI01HwzAM025UadRLAYwGYCur3NakYg3TPJZ4Rgt98430mvqSJZqOhGEYpl2p0qgTSqmYUroUwI8A/gTQQ5WDE0KCCCGphJAMQsh7zWwzjXt1Np4AABokSURBVBCSRAhJJIT8oLB8DiEkXfaYo8rraSPWqGuZmhpgzx4gOJgNnzAM0+WoMvv9G/k3lNLvCSHxAJa1thMhRAfA1wDGAsgGcJMQcoZSmqSwjQeA/wMwhFJaQgjpIVtuBWAtpMP8FECsbN8mZfK0XVYWoKsL2NtrOhIGAHD0KFBUxG5jYximS2q2p04IkadKOU4IsZI/ANwHsEaFYwcAyKCU3qOU1gE4AmBSo23eAPC1vLGmlBbIlo8H8CultFi27lcAQSqflRYRCAAnJ2nSMkbDKAV27AC8vIBRozQdDcMwTLtrqaf+A4CJkKaIpZDeziZHAfRWtpMCRwAPFZ5nAxjYaBsuABBCrgHQAbBOVkBG2b6OjV+AELIQwEIA6Kml49sCARt61xo3bgCxsdIyq+w2NoZhuqBmG3VK6UTZV1c1v74HgBEAnABcJYT4qrozpXQPgD0A4O/vT9UR4LPKygKGDdN0FAwA6W1s5ubA7NmajoRhGEYtmm3UCSH9W9qRUnqrlWPnAHBWeO4kW6YoG8ANSqkIwH1CSBqkjXwOpA294r5Rrbye1qmvB7Kz2XwsrZCXBxw7BixdCpiaajoahmEYtWhp+H1bC+sogNYuSt4E4EEIcYW0kZ4OYGajbU4DmAFgPyGkO6TD8fcAZALYRAixlG03DtIJdZ3Ko0fShp0Nv2uBvXsBkUjaqDMMw3RRLQ2/j3yWA1NKxYSQ5QAuQXq9fB+lNJEQsh5ADKX0jGzdOEJIEoB6AG9TSh8DACHkU0g/GADAekpp8bPEownsdjYtIRJJ700PCgK4XE1HwzAMozaq3NIGQogPAD4AQ/kySmlka/tRSi8AuNBo2ccK31MAb8kejffdB2CfKvFpq6ws6VfWqGvYyZNAbq70/nSGYZgurNVGXZYedgSkjfoFABMgTUDTaqP+vGM9dS0REQH07g1MmKDpSBiGYdRKlYxyUyFNE5tHKZ0LoA8AlipWBQIBYGXF5mVpVFwc8OefwLJlAEeVX3eGYZjOS5X/ctWUUgkAsSwhTQEazmpnmpGVxXrpGhcRARgbA/PmaToShmEYtVPlmnoMIaQbgL2QJqKpAPCXWqPqIgQC6agvoyGPHwOHDwNz5gDdumk6GoZhGLVrsVEnhBAAn1FKSwF8Qwi5CMCcUnq3Q6Lr5AQCYMQITUfxHNu3T1rAZVmrpQoYhmG6hBYbdUopJYRcAOAre/6gI4LqCoRC6YMNv2tIfT2wc6f0U5WvykkKGYZhOjVVrqnfIoS8oPZIuhj5zHeWTU5Dzp0DHjxg1dgYhnmuqHJNfSCAWYSQLACVkBZ2oZRSP7VG1smx29k0LCJCWh5vUuPCgAzDMF2XKo36eLVH0QWxRl2DkpOBy5eBjRulxewZhmGeE60Ov1NKsyC9hW2U7PsqVfZ73mVlAfr6gK2tpiN5Dn39NWBgALzxhqYjYRiG6VCtNs6yjHLv4t+CKnoADqkzqK5AIACcnVm+kw5XVgYcOABMnw7Y2Gg6GoZhmA6lSpMTAiAY0uvpoJTmAjBTZ1BdgUDAht414sABoKKCTZBjGOa5pEqjXicrvEIBgBBiot6QugaWTU4DJBLpBLlBgwB/f01HwzAM0+FUmUV0jBCyG0A3QsgbAOZBml2OaYZIJC0Kxm5n62C//gqkpQGH2NUhhmGeT6026pTSrYSQsQDKAHgC+JhS+qvaI+vEcnOlnUbWU+9gERHSmYmvvKLpSBiGYTRCpft9ZI04a8hVxOqoa8C9e8D588CHH0pvO2AYhnkOqTL7fQohJJ0QIiSElBFCygkhZR0RXGfFsslpwM6dgI4OsHixpiNhGIbRGFV66v8F8DKlNFndwXQV8kbdmRWo7RiVlcB33wGhoYCDg6ajYRiG0RhVZr/nP22DTggJIoSkEkIyCCHvKVn/OiGkkBASJ3ssUFhXr7D8zNO8vqYIBNJbpI2MNB3Jc+KHH4DSUnYbG8Mwzz1V66kfBXAaQK18IaX0ZEs7EUJ0AHwNYCyAbAA3CSFnKKVJjTY9SilV9t+4mlLaV4X4tA67na0DUQrs2AH07QsMGaLpaBiGYTRKlUbdHNLUsOMUllEALTbqAAIAZFBK7wEAIeQIgEkAGjfqXY5AAPB4mo7iOXH1KhAfD3z7LUCIpqNhGIbRKFVuaZv7lMd2BPBQ4Xk2pBXfGgslhLwIIA3AKkqpfB9DQkgMADGAzZTS0413JIQsBLAQAHpqSdeYUmmjPm5c69sy7SAiArCyAmbO1HQkDMMwGqfK7HcuIeQ3QkiC7LkfIeTDdnr9swB6ycq4/grggMI6F0qpP4CZALYTQtwa70wp3UMp9aeU+ttoSZ7vkhJpllIt+YzRtT18CJw6BcyfzyYwMAzDQLWJcnshLeYiAgD6/9u7/2iryjqP4+8PKJCCgwaGgs3FQs1KTS/YL5nWSk1LwdLK0hUsLGIp1RonZ2rV1KR/TL+Ws5oLEmBkZqn5YymOOGpNpjkJXFMxTSZU7gW0QH6JCsiF7/yx92WdbvfHPvecffc5535ea511795n732/z4V1v+fZ+3m+T8Qq4MIM520gWd2t04R0334RsTkiOp/TXwucUvLehvTrc8ADwLsy/MzCeTrbAFq4MKnyc+mlRUdiZlYTsiT1gyJiRZd9HRnOWwlMkjRR0jCSDwJ/NYpd0hElm9OAP6b7D5U0PP1+DPA+6uRZvNdRHyC7dsGiRXDuudDUVHQ0ZmY1IctAuZfSW9+dC7pcALzY10kR0SFpLnAvMBRYEhFPSboSaI2IpcAXJU0j+ZCwBZiZnv42YKGkfSQfPL7dzaj5muRqcgPklltg0yb4wheKjsTMrGYoWYCtlwOko4FFwHuBrcDzwMURsTb36MrQ3Nwcra2tRYfBFVckY7dee82DsXM1ZQrs2AFPP+1f9ACS9Gg61sXMalCW0e/PAaenS64OiYgd+YdVvzrXUXeeydHy5bByZfLpyb9oM7P9+kzqki7vsg2wHXg0Ih7PKa661ZnULUfz5sGoUfCZzxQdiZlZTckyUK4ZmEMy73w88HngLGCxpH/OMba65GpyOfvLX+Dmm2HmzCSxm5nZflkGyk0ATo6IVwAkfRO4G5gKPEqy4IsBu3fDiy96OluuFi+GPXvgssuKjsTMrOZk6akfTknNd5L56m+KiJ1d9g96G9JZ+O6p52TPHliwICnXd+yxRUdjZlZzsvTUfwYsl3Rnun0u8PN04FxdTDMbKJ7OlrM77oAXXkiKzpiZ2d/IMvr9Kkn3kBSAAZgTEZ1zxy7KLbI65GpyOWtpgaOPhrPPLjoSM7OalKWnTprEi58EXuM6k/qECcXG0ZCeeAIeegi+/30YOrToaMzMalKWZ+qWUVsbjBsHw4cXHUkDmjcvWbRl1qyiIzEzq1lO6lXU3u5b77nYsgV+9jO4+GI49NCiozEzq1lO6lXkwjM5WbIEdu6EuXOLjsTMrKY5qVdJhJN6LvbuhfnzYepUOOGEoqMxM6tpTupV8tJLSWfSSb3Kli2DtWu9GpuZWQZO6lXi6Ww5aWlJphOcd17RkZiZ1Twn9SrpTOruqVfRM8/A/ffDnDlwQKbZl2Zmg5qTepW4mlwO5s+HYcPgc58rOhIzs7rgpF4l7e1w8MFw2GFFR9IgXn4ZrrsOPvlJOPzwoqMxM6sLTupV0jnyPVlu3ip2/fXwyiseIGdmVoZck7qksyStlrRG0le6eX+mpE2SHk9fny15b4akP6WvGXnGWQ1eR72K9u1LKshNmQKTJxcdjZlZ3cgtqUsaCswHzgaOBz4l6fhuDr05Ik5KX9em5x4GfBM4FZgCfFNSTZcSczW5KvrlL2H1anjPe+CRR5IiAGZm1qc8e+pTgDUR8VxEvA7cBEzPeO6HgPsjYktEbAXuB87KKc6K7dwJGze6p14Vy5bB9OnJc4wlS+CMM5Jf7LJlRUdmZlbz8kzq44F1Jdvr031dnS9plaRbJR1VzrmSZktqldS6adOmasVdtvXrk69O6hVatgw+9jHYtSvpne/YkTxXX78eLrjAid3MrA9FD5S7C2iKiBNIeuM/KefkiFgUEc0R0Tx27NhcAszC09mqIAJmz4bdu7t/f+dO+PznfSvezKwXeSb1DcBRJdsT0n37RcTmiOj8K34tcErWc2uJq8lVwfLlsG1b78ds2wYrVgxMPGZmdSjPpL4SmCRpoqRhwIXA0tIDJB1RsjkN+GP6/b3AmZIOTQfInZnuq0nt7ckj4PHdPVywbF58MVm8pTdDhsALLwxMPGZmdSi32psR0SFpLkkyHgosiYinJF0JtEbEUuCLkqYBHcAWYGZ67hZJV5F8MAC4MiK25BVrpdra4Mgj4cADi46kjo0bB6+/3vsx+/Ylv2gzM+tWrgW1I2IZsKzLvm+UfP9V4Ks9nLsEWJJnfNXi6WxVsGdPkrR7M3p0MnfdzMy6VfRAuYbgddSrYN48GDkSRozo/v03vAEWLnTJPjOzXjipV2jfPli3zkm9IuvXw+23J6ux3XZbstTqyJFwyCHJ1wkT4NZb4cMfLjpSM7Oa5vUsK7RxYzILy0m9AgsXJp+OLr0UJk5Mbn2sWJEMijvyyOSWu3voZmZ9clKvkKezVWj3bli0CM45J0nokCTwU08tNi4zszrk2+8V6kzq7qn30y23JLc7vBqbmVnFnNQr5GpyFWppgWOPhQ9+sOhIzMzqnpN6hdrbk/Fco0cXHUkdWrEiec2dmxSWMTOzivgvaYU8na0C8+bBqFEwY0bRkZiZNQQn9Qq1tTmp98vGjXDzzUlCHzWq6GjMzBqCk3qFXE2unxYvTsrCXnZZ0ZGYmTUMJ/UKvPoqbN7snnrZ9uyBBQvgjDPguOOKjsbMrGF4nnoF1q1Lvjqpl+nOO2HDhiSxm5lZ1binXgFPZ+unlhZoanLZVzOzKnNSr4CryfXDqlXw4IPJs/ShQ4uOxsysoTipV6C9PclLRxxRdCR1ZN68ZMW1WbOKjsTMrOE4qVegrQ3Gj4cDPDIhmy1b4IYb4KKL4LDDio7GzKzhOKlXwNPZyvTjH8POnUkFOTMzqzon9Qq4mlwZ9u6F+fPhtNPgxBOLjsbMrCHlmtQlnSVptaQ1kr7Sy3HnSwpJzel2k6Sdkh5PXz/MM87+2Ls3mdLmpJ7RPffA8897NTYzsxzl9jRY0lBgPnAGsB5YKWlpRDzd5bhRwJeA5V0u8WxEnJRXfJX685+ho8O33zNraUkGIJx3XtGRmJk1rDx76lOANRHxXES8DtwETO/muKuA7wC7coyl6ryOehlWr4b77oM5c+DAA4uOxsysYeWZ1McD60q216f79pN0MnBURNzdzfkTJT0m6TeSTssxzn5xUi/D/PkwbBjMnl10JGZmDa2wyViShgBXAzO7eftF4M0RsVnSKcAdkt4eES93ucZsYDbAmwc4u7qaXEY7dsB118EnPgGHH150NGZmDS3PnvoG4KiS7Qnpvk6jgHcAD0haC7wbWCqpOSJ2R8RmgIh4FHgWOKbrD4iIRRHRHBHNY8eOzakZ3Wtvh0MP9aqhfbr++iSxe4CcmVnu8kzqK4FJkiZKGgZcCCztfDMitkfEmIhoiogm4BFgWkS0ShqbDrRD0tHAJOC5HGMtm6ezZRCRVJCbPBmmTCk6GjOzhpfb7feI6JA0F7gXGAosiYinJF0JtEbE0l5OnwpcKWkPsA+YExFb8oq1P9raPPK9T7/6FTzzTNJbNzOz3OX6TD0ilgHLuuz7Rg/HfqDk+9uA2/KMrVLt7TB1atFR1LiWFhg7NnmebmZmuXNFuX54+WXYts2333u1di3cdVcy4n348KKjMTMbFJzU+8HT2TK45hoYMiSZm25mZgPCSb0fvI56H157Da69Fj76UZgwoehozMwGDSf1fnBPvQ833ghbt3oam5nZAHNS74f29qTa6bhxRUdSgyKSAXLvfGeyIpuZmQ2YwirK1bO2tuSu8hB/JPpbDz8MTzwBixaBVHQ0ZmaDitNSP7S3+3l6j1paYPRo+PSni47EzGzQcVLvB1eT68GGDXD77XDJJXDwwUVHY2Y26Dipl6mjI8ldTurdWLgQ9u6FSy8tOhIzs0HJSb1ML7yQ5C3ffu9i9+4kqX/kI3D00UVHY2Y2KDmpl8nT2Xpw662wcaOnsZmZFchJvUxeR70HLS1wzDFw+ulFR2JmNmg5qZfJPfVurFwJy5fD3Lme52dmViD/BS5TezuMGQMHHVR0JDVk3jwYORJmzCg6EjOzQc1JvUyeztbFpk1w001JQj/kkKKjMTMb1JzUy9TW5qT+VxYvhtdfT269m5lZoZzUyxCRJHVPZ0t1dMCCBcnguOOOKzoaM7NBz7Xfy7B9O7zyinvq+915J6xfD/PnFx2JmZnhnnpZPJ2ti5YWaGpKCs6YmVnhck3qks6StFrSGklf6eW48yWFpOaSfV9Nz1st6UN5xplV53Q2334HnnwSfvObpCTs0KFFR2NmZuR4+13SUGA+cAawHlgpaWlEPN3luFHAl4DlJfuOBy4E3g4cCfxS0jERsTeveLPwHPUS8+bBiBEwa1bRkZiZWSrPnvoUYE1EPBcRrwM3AdO7Oe4q4DvArpJ904GbImJ3RDwPrEmvV6j2dhg+HMaOLTqSgm3dCjfcABddBG98Y9HRmJlZKs+BcuOBdSXb64FTSw+QdDJwVETcLemKLuc+0uXc8V1/gKTZwOx0c7ekP1Qj8L4UcLd5DPDSgP/UvvzoR8mr/2qzXdXRqG07tugAzKxnhY1+lzQEuBqY2d9rRMQiYFF6vdaIaO7jlLrUqG1r1HZB47ZNUmvRMZhZz/JM6huAo0q2J6T7Oo0C3gE8IAlgHLBU0rQM55qZmVkXeT5TXwlMkjRR0jCSgW9LO9+MiO0RMSYimiKiieR2+7SIaE2Pu1DScEkTgUnAihxjNTMzq3u59dQjokPSXOBeYCiwJCKeknQl0BoRS3s59ylJvwCeBjqAyzKMfF9UrdhrUKO2rVHbBY3btkZtl1lDUEQUHYOZmZlVgSvKmZmZNQgndTMzswZRd0m9r9Kz6eC6m9P3l0tqGvgoy5ehXVMl/V5Sh6QLioixvzK07XJJT0taJelXkuqiEG+Gds2R9KSkxyX9Nq2UWBcqKfFsZsWpq6ReUnr2bOB44FPd/KG8BNgaEW8F/oOkWl1Ny9iudpI5/T8f2Ogqk7FtjwHNEXECcCvw3YGNsnwZ2/XziHhnRJxE0qarBzjMfsnYtm5LPJtZseoqqZOt9Ox04Cfp97cCH1Q6Eb6G9dmuiFgbEauAfUUEWIEsbft1RLyWbj5CUpeg1mVp18slmwcD9TIqtZISz2ZWoHpL6t2Vnu1aPnb/MRHRAWwHar1AeZZ21aty23YJcE+uEVVHpnZJukzSsyQ99S8OUGyV6rNtpSWeBzIwM+tdvSV1a2CSLgaage8VHUu1RMT8iHgL8C/A14uOpxpKSjz/U9GxmNlfq7eknqV87P5jJB0A/B2weUCi679GLoubqW2STge+RlJVcPcAxVaJcv/NbgLOyzWi6imnxPNa4N0kJZ49WM6sYPWW1HstPZtaCsxIv78A+J+o/Qo7WdpVr/psm6R3AQtJEvrGAmLsjyztmlSy+RHgTwMYXyUqKfFsZgWqq6SePiPvLD37R+AXnaVn04VgAH4EvFHSGuByoMfpOLUiS7skTZa0Hvg4sFDSU8VFnF3Gf7PvASOBW9LpXzX/gSZju+ZKekrS4yT/F2f0cLmakrFtZlaDXCbWzMysQdRVT93MzMx65qRuZmbWIJzUzczMGoSTupmZWYNwUjczM2sQTuo24CQ90FmoRNIySaMrvN4HJP1XD+/dmK7+9o+V/Awzs3pwQNEBWONJF9BRRPS5+ExEfDjHOMYBk9MV+7Kec0A6T9vMrO64pz5ISPrXdH3s36a91y+n+98i6b8lPSrpIUnHpfuvk/Sfkv5X0nOla7hLukLSyrQH/K10X1N6/euBPwBHSVogqTUtwPKtHuJaK2lMuvb44+nreUm/Tt8/U9LvlKwlf4ukken+syQ9I+n3wMd6aPZ9wPj0mqeldwh+kG7/QdKU9Fr/Jumnkh4GflqN37eZWRGc1AcBSZOB84ETSdbILq3RvQj4QkScAnwZuKbkvSOA9wPnAN9Or3UmMIlkec6TgFMkTU2PnwRcExFvj4g24GsR0QycAPyDpBN6ijEifpiuOz6ZZFWwqyWNIVkE5fSIOBloBS6XNAJYDJwLnAKM6+Gy04BnI+KkiHgo3XdQ+nMuBZaUHHt8+nM+1VOMZma1zrffB4f3AXdGxC5gl6S7ANJe73tJyrN2Hju85Lw70lvoT0t6U7rvzPT1WLo9kiSZtwNtEfFIyfmfkDSb5P/ZESSJc1Ufsf6ApF7/XZLOSc95OI1vGPA74Djg+Yj4U9qOG4DZGX8XNwJExIOSDil5nr80InZmvIaZWU1yUh/chgDb0p5rd0pXS1PJ13+PiIWlB0pqAl4t2Z5I0vOfHBFbJV0HjOgtGEkzgb8nqTve+bPu79p7ltRTvFl0rYvcuf1q1wPNzOqNb78PDg8D50oakfbOzwGIiJeB5yV9HJIBbpJO7ONa9wKzSp5tj5d0eDfHHUKSKLenvfyze7uopM7b/xeXDLB7BHifpLemxxws6RjgGaBJ0lvS48q5Zf7J9FrvB7ZHxPYyzjUzq2nuqQ8CEbEyXflsFfAX4EmgM5ldBCyQ9HXgQJJ1v5/o5Vr3SXob8Lv0lvgrwMXA3i7HPSHpMZIEvI7kg0Vv5gKHAb9Or9saEZ9Ne+83Sup8LPD1iPi/9Lb+3ZJeAx4iWeM7i11pXAcCszKeY2ZWF7xK2yAhaWREvCLpIOBBYHZE/L7ouAaSpAeAL3vdbzNrVO6pDx6LJB1P8lz7J4MtoZuZDQbuqZuZmTUID5QzMzNrEE7qZmZmDcJJ3czMrEE4qZuZmTUIJ3UzM7MG8f+4tntQUvTdwQAAAABJRU5ErkJggg==\n",
+      "text/plain": [
+       "<Figure size 432x288 with 1 Axes>"
+      ]
+     },
+     "metadata": {
+      "needs_background": "light"
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "y_pred = postproc.predict_proba(X_test)[:, 1]\n",
+    "y_lr = postproc.estimator_.predict_proba(X_test)[:, 1]\n",
+    "br = postproc.postprocessor_.base_rates_\n",
+    "i = X_test.sex == 1\n",
+    "\n",
+    "plt.plot([0, br[0]], [0, 1-br[0]], '-b', label='All calibrated classifiers (Females)')\n",
+    "plt.plot([0, br[1]], [0, 1-br[1]], '-r', label='All calibrated classifiers (Males)')\n",
+    "\n",
+    "plt.scatter(generalized_fpr(y_test[~i], y_lr[~i]),\n",
+    "            generalized_fnr(y_test[~i], y_lr[~i]),\n",
+    "            300, c='b', marker='.', label='Original classifier (Females)')\n",
+    "plt.scatter(generalized_fpr(y_test[i], y_lr[i]),\n",
+    "            generalized_fnr(y_test[i], y_lr[i]),\n",
+    "            300, c='r', marker='.', label='Original classifier (Males)')\n",
+    "                                                                        \n",
+    "plt.scatter(generalized_fpr(y_test[~i], y_pred[~i]),\n",
+    "            generalized_fnr(y_test[~i], y_pred[~i]),\n",
+    "            100, c='b', marker='d', label='Post-processed classifier (Females)')\n",
+    "plt.scatter(generalized_fpr(y_test[i], y_pred[i]),\n",
+    "            generalized_fnr(y_test[i], y_pred[i]),\n",
+    "            100, c='r', marker='d', label='Post-processed classifier (Males)')\n",
+    "\n",
+    "plt.plot([0, 1], [generalized_fnr(y_test, y_pred)]*2, '--', c='0.5')\n",
     "\n",
-    "clf = GridSearchCV(rew, params, scoring=scoring, cv=5)\n",
-    "clf.fit(X_train, y_train, **{'sample_weight': sw_train})\n",
-    "clf.score(X_test, y_test)"
+    "plt.axis('square')\n",
+    "plt.xlim([0, 0.4])\n",
+    "plt.ylim([0.4, 0.8])\n",
+    "plt.xlabel('generalized fpr');\n",
+    "plt.ylabel('generalized fnr');\n",
+    "plt.legend(bbox_to_anchor=(1.04,1), loc='upper left');"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can see the generalized false negative rate is approximately equalized and the classifiers remain close to the calibration lines."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "scrolled": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": []
   }

From e0856e370ba3f8edeff881b199970e77f5c57ee9 Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Thu, 31 Oct 2019 09:49:07 -0400
Subject: [PATCH 37/61] updated readme

---
 aif360/sklearn/README.md | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/aif360/sklearn/README.md b/aif360/sklearn/README.md
index da318ced..98497eb9 100644
--- a/aif360/sklearn/README.md
+++ b/aif360/sklearn/README.md
@@ -20,16 +20,16 @@ objects with sample properties (protected attributes) as the index
   - [ ] MEPS
 - [ ] Implement metrics as individual functions instead of instance methods
   - [x] Make certain metrics compatible as sklearn scorers
-  - [x] Use "groups" and "priv_group" keywords to specify protected attributes to
+  - [x] Use "prot_attr" and "priv_group" keywords to specify protected attributes to
   functions
-  - [ ] Generalized confusion matrix
+  - [x] Generalized confusion matrix
   - [ ] Sample distortion metrics
 - [ ] Make inprocessing algorithms compatible as sklearn `Estimator`s
+  - [x] Adversarial debiasing
   - [ ] **[External]** `get_feature_names()` from data preprocessing
   steps that would remove DataFrame formatting
     - [ ] SLEP007/8
   - [ ] Prejudice remover
-  - [ ] Adversarial debiasing
   - [ ] Meta-fair classifier
 - [ ] Make preprocessing algorithms compatible as sklearn `Transformer`s
   - [ ] **[External]** Add functionality to modify X and y
@@ -41,11 +41,9 @@ objects with sample properties (protected attributes) as the index
     - [X] Meta-estimator workaround
     - [ ] **[External]** SLEP006 - Sample properties
 - [ ] Make postprocessing algorithms compatible
-  - [ ] **[External]** Allow for `fit(y_true, y_pred)`
-    - [ ] New SLEP?
-  - [ ] Calibrated equalized odds postprocessing
+  - [x] Calibrated equalized odds postprocessing
+    - [x] Meta-estimator workaround again
   - [ ] Equalized odds postprocessing
   - [ ] Reject option classification
 - [ ] Miscellaneous:
-  - [ ] LIME encoder
   - [ ] Explainers

From 8f8cd760d0692dc5afe5c9328420212e8ff780c1 Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Thu, 31 Oct 2019 17:37:50 -0400
Subject: [PATCH 38/61] fixed tests and added additional tests

---
 aif360/sklearn/datasets/openml_datasets.py    |  2 +-
 .../tests/test_calibrated_equalized_odds.py   | 10 ++++++++++
 aif360/sklearn/tests/test_metrics.py          | 19 +++++++++++++++++--
 3 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/aif360/sklearn/datasets/openml_datasets.py b/aif360/sklearn/datasets/openml_datasets.py
index 45d8cd7f..c1c30b02 100644
--- a/aif360/sklearn/datasets/openml_datasets.py
+++ b/aif360/sklearn/datasets/openml_datasets.py
@@ -209,7 +209,7 @@ def fetch_bank(data_home=None, percent10=False, usecols=[], dropcols='duration',
                   'housing', 'loan', 'contact', 'day', 'month', 'duration',
                   'campaign', 'pdays', 'previous', 'poutcome', 'deposit']
     # remap target
-    df.deposit = df.deposit.map({'1': False, '2': True})
+    df.deposit = df.deposit.map({'1': False, '2': True}).astype('bool')
     # replace 'unknown' marker with NaN
     df.apply(lambda s: s.cat.remove_categories('unknown', inplace=True)
              if hasattr(s, 'cat') and 'unknown' in s.cat.categories else s)
diff --git a/aif360/sklearn/tests/test_calibrated_equalized_odds.py b/aif360/sklearn/tests/test_calibrated_equalized_odds.py
index 247ba4c8..f1a6f3b3 100644
--- a/aif360/sklearn/tests/test_calibrated_equalized_odds.py
+++ b/aif360/sklearn/tests/test_calibrated_equalized_odds.py
@@ -1,5 +1,6 @@
 import numpy as np
 from sklearn.linear_model import LogisticRegression
+from sklearn.model_selection import train_test_split
 
 from aif360.datasets import AdultDataset
 from aif360.sklearn.datasets import fetch_adult
@@ -26,6 +27,15 @@ def test_calib_eq_odds_sex():
     assert np.isclose(orig_cal_eq_odds.priv_mix_rate, cal_eq_odds.mix_rates_[1])
     assert np.isclose(orig_cal_eq_odds.unpriv_mix_rate, cal_eq_odds.mix_rates_[0])
 
+def test_split():
+    adult_est, adult_post = adult.split([0.75], shuffle=False)
+    X_est, X_post, y_est, y_post = train_test_split(X, y, shuffle=False)
+
+    assert np.all(adult_est.features == X_est)
+    assert np.all(adult_est.labels.ravel() == y_est)
+    assert np.all(adult_post.features == X_post)
+    assert np.all(adult_post.labels.ravel() == y_post)
+
 def test_postprocessingmeta():
     logreg = LogisticRegression(solver='lbfgs', max_iter=500)
 
diff --git a/aif360/sklearn/tests/test_metrics.py b/aif360/sklearn/tests/test_metrics.py
index c0a1c6e9..326c7c8b 100644
--- a/aif360/sklearn/tests/test_metrics.py
+++ b/aif360/sklearn/tests/test_metrics.py
@@ -6,6 +6,7 @@
 from aif360.metrics import ClassificationMetric
 from aif360.sklearn.metrics import (
         consistency_score, specificity_score, selection_rate,
+        base_rate, generalized_fpr, generalized_fnr,
         disparate_impact_ratio, statistical_parity_difference,
         equal_opportunity_difference, average_odds_difference,
         average_odds_error, generalized_entropy_error,
@@ -13,14 +14,16 @@
 
 
 X, y, sample_weight = fetch_adult(numeric_only=True)
-y_pred = LogisticRegression(solver='liblinear').fit(X, y,
-        sample_weight=sample_weight).predict(X)
+lr = LogisticRegression(solver='liblinear').fit(X, y, sample_weight=sample_weight)
+y_pred = lr.predict(X)
+y_proba = lr.predict_proba(X)[:, 1]
 adult = AdultDataset(instance_weights_name='fnlwgt', categorical_features=[],
         features_to_keep=['age', 'education-num', 'capital-gain',
                           'capital-loss', 'hours-per-week'],
         features_to_drop=[])
 adult_pred = adult.copy()
 adult_pred.labels = y_pred
+adult_pred.scores = y_proba
 cm = ClassificationMetric(adult, adult_pred,
                           unprivileged_groups=[{'sex': 0}],
                           privileged_groups=[{'sex': 1}])
@@ -36,10 +39,22 @@ def test_specificity():
     spec = specificity_score(y, y_pred, sample_weight=sample_weight)
     assert spec == cm.specificity()
 
+def test_base_rate():
+    base = base_rate(y, y_pred, sample_weight=sample_weight)
+    assert base == cm.base_rate()
+
 def test_selection_rate():
     select = selection_rate(y, y_pred, sample_weight=sample_weight)
     assert select == cm.selection_rate()
 
+def test_generalized_fpr():
+    gfpr = generalized_fpr(y, y_proba, sample_weight=sample_weight)
+    assert np.isclose(gfpr, cm.generalized_false_positive_rate())
+
+def test_generalized_fnr():
+    gfnr = generalized_fnr(y, y_proba, sample_weight=sample_weight)
+    assert np.isclose(gfnr, cm.generalized_false_negative_rate())
+
 def test_disparate_impact():
     di = disparate_impact_ratio(y, y_pred, prot_attr='sex',
                                 sample_weight=sample_weight)

From e01f23fe60087d2135ad087a5d04783dd01ea81c Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Mon, 11 Nov 2019 16:49:32 -0500
Subject: [PATCH 39/61] added COMPAS and other dataset fixes* fixed german
 dataset to match paper* added ColumnAlreadyDroppedWarnings in
 standardize_dataset* added compas test and fixed old tests to match new drop
 warnings

---
 aif360/sklearn/datasets/__init__.py        |  1 +
 aif360/sklearn/datasets/compas_dataset.py  | 72 ++++++++++++++++++++++
 aif360/sklearn/datasets/openml_datasets.py |  2 +-
 aif360/sklearn/datasets/utils.py           | 45 +++++++++++---
 aif360/sklearn/tests/test_datasets.py      | 19 ++++--
 5 files changed, 125 insertions(+), 14 deletions(-)
 create mode 100644 aif360/sklearn/datasets/compas_dataset.py

diff --git a/aif360/sklearn/datasets/__init__.py b/aif360/sklearn/datasets/__init__.py
index 1a5a27f0..43168666 100644
--- a/aif360/sklearn/datasets/__init__.py
+++ b/aif360/sklearn/datasets/__init__.py
@@ -1,2 +1,3 @@
 from aif360.sklearn.datasets.utils import *
 from aif360.sklearn.datasets.openml_datasets import *
+from aif360.sklearn.datasets.compas_dataset import fetch_compas
diff --git a/aif360/sklearn/datasets/compas_dataset.py b/aif360/sklearn/datasets/compas_dataset.py
new file mode 100644
index 00000000..31f2a14e
--- /dev/null
+++ b/aif360/sklearn/datasets/compas_dataset.py
@@ -0,0 +1,72 @@
+import os
+
+import pandas as pd
+
+from aif360.sklearn.datasets.utils import standarize_dataset
+
+
+# cache location
+DATA_HOME_DEFAULT = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+                                 '..', 'data', 'raw')
+COMPAS_URL = 'https://raw.githubusercontent.com/propublica/compas-analysis/master/compas-scores-two-years.csv'
+
+def fetch_compas(data_home=None, binary_race=False,
+                 usecols=['sex', 'age', 'age_cat', 'race', 'juv_fel_count',
+                          'juv_misd_count', 'juv_other_count', 'priors_count',
+                          'c_charge_degree', 'c_charge_desc'],
+                 dropcols=[], numeric_only=False, dropna=True):
+    """Load the COMPAS Recidivism Risk Scores dataset.
+
+    Optionally binarizes 'race' to 'Caucasian' (privileged) or 'African-American'
+    (unprivileged). The other protected attribute is 'sex' ('Male' is
+    _unprivileged_ and 'Female' is _privileged_). The outcome variable is
+    'no recid.' (favorable) if the person was not accused of a crime within two
+    years or 'did recid.' (unfavorable) if they were.
+
+    Args:
+        data_home (string, optional): Specify another download and cache folder
+            for the datasets. By default all AIF360 datasets are stored in
+            'aif360/sklearn/data/raw' subfolders.
+        binary_race (bool, optional): Filter only White and Black defendants.
+        usecols (single label or list-like, optional): Feature column(s) to
+            keep. All others are dropped.
+        dropcols (single label or list-like, optional): Feature column(s) to
+            drop.
+        numeric_only (bool): Drop all non-numeric feature columns.
+        dropna (bool): Drop rows with NAs.
+
+    Returns:
+        namedtuple: Tuple containing X and y for the COMPAS dataset accessible
+        by index or name.
+    """
+    cache_path = os.path.join(data_home or DATA_HOME_DEFAULT,
+                              os.path.basename(COMPAS_URL))
+    if os.path.isfile(cache_path):
+        df = pd.read_csv(cache_path, index_col='id')
+    else:
+        df = pd.read_csv(COMPAS_URL, index_col='id')
+        df.to_csv(cache_path)
+
+    # Perform the same preprocessing as the original analysis:
+    # https://github.com/propublica/compas-analysis/blob/master/Compas%20Analysis.ipynb
+    df = df[(df.days_b_screening_arrest <= 30)
+          & (df.days_b_screening_arrest >= -30)
+          & (df.is_recid != -1)
+          & (df.c_charge_degree != 'O')
+          & (df.score_text != 'N/A')]
+
+    for col in ['sex', 'age_cat', 'race', 'c_charge_degree', 'c_charge_desc']:
+        df[col] = df[col].astype('category')
+
+    df.two_year_recid = df.two_year_recid.replace({0: 'no recid.', 1: 'did recid.'}).astype('category').cat.as_ordered()  # 'did recid' < 'no recid'
+
+    if binary_race:
+        df.race = df.race.cat.set_categories(['African-American', 'Caucasian'],
+                                             ordered=True)  # 'African-American' < 'Caucasian'
+
+    df.sex = df.sex.astype('category').cat.as_ordered()  # 'Female' < 'Male'
+
+    return standarize_dataset(df, prot_attr=['sex', 'race'],
+                              target='two_year_recid', usecols=usecols,
+                              dropcols=dropcols, numeric_only=numeric_only,
+                              dropna=dropna)
diff --git a/aif360/sklearn/datasets/openml_datasets.py b/aif360/sklearn/datasets/openml_datasets.py
index c1c30b02..1cb4b9a1 100644
--- a/aif360/sklearn/datasets/openml_datasets.py
+++ b/aif360/sklearn/datasets/openml_datasets.py
@@ -151,7 +151,7 @@ def fetch_german(data_home=None, binary_age=True, usecols=[], dropcols=[],
     df['credit-risk'] = df['credit-risk'].cat.as_ordered()  # 'bad' < 'good'
 
     # binarize protected attribute (but not corresponding feature)
-    age = (pd.cut(df.age, [0, 25, 100], right=False, labels=['young', 'aged'])
+    age = (pd.cut(df.age, [0, 25, 100], labels=numeric_only and ['young', 'aged'])
            if binary_age else 'age')
 
     # Note: marital_status directly implies sex. i.e. 'div/dep/mar' => 'female'
diff --git a/aif360/sklearn/datasets/utils.py b/aif360/sklearn/datasets/utils.py
index e714026b..703ad13f 100644
--- a/aif360/sklearn/datasets/utils.py
+++ b/aif360/sklearn/datasets/utils.py
@@ -1,7 +1,25 @@
 from collections import namedtuple
+import warnings
 
+import numpy as np
 from pandas.core.dtypes.common import is_list_like
 
+
+class ColumnAlreadyDroppedWarning(UserWarning):
+    """Warning used if a column is attempted to be dropped twice."""
+
+def check_already_dropped(labels, dropped_cols, name, dropped_by='numeric_only',
+                          warn=True):
+    if not is_list_like(labels):
+        labels = [labels]
+    labels = [c for c in labels if isinstance(c, str)]
+    already_dropped = dropped_cols.intersection(labels)
+    if warn and already_dropped.any():
+        warnings.warn("Some column labels from `{}` were already dropped by "
+                "`{}`:\n{}".format(name, dropped_by, already_dropped.tolist()),
+                ColumnAlreadyDroppedWarning, stacklevel=2)
+    return [c for c in labels if c not in already_dropped]
+
 def standarize_dataset(df, prot_attr, target, sample_weight=None, usecols=[],
                        dropcols=[], numeric_only=False, dropna=True):
     """Separate data, targets, and possibly sample weights and populate
@@ -36,7 +54,7 @@ def standarize_dataset(df, prot_attr, target, sample_weight=None, usecols=[],
 
     Note:
         The order of execution for the dropping parameters is: numeric_only ->
-        dropcols -> usecols -> dropna.
+        usecols -> dropcols -> dropna.
 
     Examples:
         >>> import pandas as pd
@@ -53,24 +71,35 @@ def standarize_dataset(df, prot_attr, target, sample_weight=None, usecols=[],
         >>> X, y = standarize_dataset(df, prot_attr=0, target=5)
         >>> X_tr, X_te, y_tr, y_te = train_test_split(X, y)
     """
-    # TODO: warn user if label in prot_attr, target, or dropcols is already dropped
-    # TODO: error message if label in usecols is already dropped
+    orig_cols = df.columns
     if numeric_only:
         for col in df.select_dtypes('category'):
             if df[col].cat.ordered:
                 df[col] = df[col].factorize(sort=True)[0]
+                df[col] = df[col].replace(-1, np.nan)
         df = df.select_dtypes(['number', 'bool'])
+    nonnumeric = orig_cols.difference(df.columns)
 
+    prot_attr = check_already_dropped(prot_attr, nonnumeric, 'prot_attr')
+    if len(prot_attr) == 0:
+        raise ValueError("At least one protected attribute must be present.")
     df = df.set_index(prot_attr, drop=False, append=True)
-    y = df.pop(target)
+
+    target = check_already_dropped(target, nonnumeric, 'target')
+    if len(target) == 0:
+        raise ValueError("At least one target must be present.")
+    y = df.pop(target if len(target) > 1 else target[0])  # maybe return Series
 
     # Column-wise drops
-    df = df.drop(columns=dropcols)
+    orig_cols = df.columns
     if usecols:
-        if not is_list_like(usecols):
-            # make sure we don't return a Series instead of a DataFrame
-            usecols = [usecols]
+        usecols = check_already_dropped(usecols, nonnumeric, 'usecols')
         df = df[usecols]
+    unused = orig_cols.difference(df.columns)
+
+    dropcols = check_already_dropped(dropcols, nonnumeric, 'dropcols', warn=False)
+    dropcols = check_already_dropped(dropcols, unused, 'dropcols', 'usecols', False)
+    df = df.drop(columns=dropcols)
 
     # Index-wise drops
     if dropna:
diff --git a/aif360/sklearn/tests/test_datasets.py b/aif360/sklearn/tests/test_datasets.py
index 05974f1e..5e2f00ad 100644
--- a/aif360/sklearn/tests/test_datasets.py
+++ b/aif360/sklearn/tests/test_datasets.py
@@ -6,6 +6,7 @@
 
 from aif360.sklearn.datasets import fetch_adult, fetch_bank, fetch_german
 from aif360.sklearn.datasets import standarize_dataset
+from aif360.sklearn.datasets import fetch_compas, ColumnAlreadyDroppedWarning
 
 
 df = pd.DataFrame([[1, 2, 3, 'a'], [5, 6, 7, 'b'], [np.NaN, 10, 11, 'c']],
@@ -39,8 +40,8 @@ def test_usecols_dropcols_basic():
     assert basic(dropcols=['X1', 'Z']).X.columns.tolist() == ['X2']
 
     assert basic(usecols='X1', dropcols=['X2']).X.columns.tolist() == ['X1']
-    with pytest.raises(KeyError):
-        basic(usecols=['X1', 'X2'], dropcols='X2')
+    assert isinstance(basic(usecols='X2', dropcols=['X1', 'X2'])[0],
+                      pd.DataFrame)
 
 def test_dropna_basic():
     basic_dropna = partial(standarize_dataset, df=df, prot_attr='Z',
@@ -50,9 +51,8 @@ def test_dropna_basic():
 
 def test_numeric_only_basic():
     assert basic(prot_attr='X2', numeric_only=True).X.shape == (3, 2)
-    with pytest.raises(KeyError):
-        assert (basic(prot_attr='X2', dropcols='Z', numeric_only=True).X.shape
-                == (3, 2))
+    assert (basic(prot_attr='X2', dropcols='Z', numeric_only=True).X.shape
+            == (3, 2))
 
 def test_fetch_adult():
     adult = fetch_adult()
@@ -74,6 +74,15 @@ def test_fetch_bank():
     assert fetch_bank(dropcols=[]).X.shape == (45211, 16)
     assert fetch_bank(numeric_only=True).X.shape == (45211, 6)
 
+@pytest.mark.filterwarnings('error', category=ColumnAlreadyDroppedWarning)
+def test_fetch_compas():
+    compas = fetch_compas()
+    assert len(compas) == 2
+    assert compas.X.shape == (6167, 10)
+    assert fetch_compas(binary_race=True).X.shape == (5273, 10)
+    with pytest.raises(ColumnAlreadyDroppedWarning):
+        assert fetch_compas(numeric_only=True).X.shape == (6172, 6)
+
 def test_onehot_transformer():
     X, y = fetch_german()
     assert len(pd.get_dummies(X).columns) == 63

From e92f84663c8f3754821568fd143f94a64263d39e Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Tue, 12 Nov 2019 10:55:03 -0500
Subject: [PATCH 40/61] fix more edge cases in metrics

---
 aif360/sklearn/metrics/metrics.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/aif360/sklearn/metrics/metrics.py b/aif360/sklearn/metrics/metrics.py
index 4adadda0..50632c81 100644
--- a/aif360/sklearn/metrics/metrics.py
+++ b/aif360/sklearn/metrics/metrics.py
@@ -101,6 +101,7 @@ def ratio(func, y, *args, prot_attr=None, priv_group=1, sample_weight=None,
         warnings.warn("The ratio is ill-defined and being set to 0.0 because "
                       "the {} for privileged samples is 0.".format(func.__name__),
                       UndefinedMetricWarning)
+        return 0.
 
     return numerator / denominator
 
@@ -132,7 +133,13 @@ def specificity_score(y_true, y_pred, neg_label=0, sample_weight=None):
                         sample_weight=sample_weight)
 
 def base_rate(y_true, y_pred=None, pos_label=1, sample_weight=None):
-    return np.average(y_true == pos_label, weights=sample_weight)
+    idx = (y_true == pos_label)
+    if not np.any(idx):
+        warnings.warn("base_rate is ill-defined because there are no samples "
+                      "with value {} in y_true.".format(pos_label),
+                      UndefinedMetricWarning)
+        return 0.
+    return np.average(idx, weights=sample_weight)
 
 def selection_rate(y_true, y_pred, pos_label=1, sample_weight=None):
     return base_rate(y_pred, pos_label=pos_label, sample_weight=sample_weight)

From 27aa55c0e98895a3b1adc9115fb28940b719ea63 Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Tue, 12 Nov 2019 10:55:32 -0500
Subject: [PATCH 41/61] removed unused import

---
 aif360/sklearn/postprocessing/calibrated_equalized_odds.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/aif360/sklearn/postprocessing/calibrated_equalized_odds.py b/aif360/sklearn/postprocessing/calibrated_equalized_odds.py
index 322d331a..143ed423 100644
--- a/aif360/sklearn/postprocessing/calibrated_equalized_odds.py
+++ b/aif360/sklearn/postprocessing/calibrated_equalized_odds.py
@@ -1,6 +1,5 @@
 import numpy as np
 from sklearn.base import BaseEstimator, ClassifierMixin
-from sklearn.preprocessing import LabelEncoder
 from sklearn.utils import check_random_state
 
 from aif360.sklearn.metrics import base_rate, generalized_fnr, generalized_fpr
@@ -56,14 +55,16 @@ def fit(self, y_true, y_pred, pos_label=1, sample_weight=None):
                                             self.groups_))
 
         # ensure self.classes_ = [neg_label, pos_label]
-        self.classes_ = np.append(np.delete(self.classes_, pos_label), pos_label)
+        self.classes_ = np.append(np.delete(self.classes_, pos_label),
+                                  pos_label)
 
         def args(grp_idx, triv=False):
             i = (groups == self.groups_[grp_idx])
             pred = (np.full_like(y_pred, self.base_rates_[grp_idx]) if triv else
                     y_pred)
             return dict(y_true=y_true[i], y_pred=pred[i], pos_label=pos_label,
-                        sample_weight=sample_weight[i] if sample_weight is not None else None)
+                        sample_weight=None if sample_weight is None
+                                      else sample_weight[i])
 
         self.base_rates_ = [base_rate(**args(i)) for i in range(2)]
 

From 831775c5532a4ef70664085ba81d071e363dde0c Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Mon, 9 Dec 2019 14:35:45 -0500
Subject: [PATCH 42/61] make cache dir if necessary

---
 aif360/sklearn/datasets/compas_dataset.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/aif360/sklearn/datasets/compas_dataset.py b/aif360/sklearn/datasets/compas_dataset.py
index 31f2a14e..be649864 100644
--- a/aif360/sklearn/datasets/compas_dataset.py
+++ b/aif360/sklearn/datasets/compas_dataset.py
@@ -45,6 +45,7 @@ def fetch_compas(data_home=None, binary_race=False,
         df = pd.read_csv(cache_path, index_col='id')
     else:
         df = pd.read_csv(COMPAS_URL, index_col='id')
+        os.makedirs(os.path.dirname(cache_path), exist_ok=True)
         df.to_csv(cache_path)
 
     # Perform the same preprocessing as the original analysis:

From a0e56b09876717b7e64c97f9be099f3b336fb18d Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Fri, 13 Dec 2019 13:22:19 -0500
Subject: [PATCH 43/61] docstring, formatting, and typo fixes

---
 aif360/sklearn/datasets/__init__.py        |  10 +
 aif360/sklearn/datasets/compas_dataset.py  |  12 +-
 aif360/sklearn/datasets/openml_datasets.py |  31 +-
 aif360/sklearn/datasets/utils.py           |  22 +-
 aif360/sklearn/inprocessing/__init__.py    |   3 +
 aif360/sklearn/metrics/__init__.py         |   6 +
 aif360/sklearn/metrics/metrics.py          | 388 ++++++++++++++++++---
 aif360/sklearn/preprocessing/__init__.py   |   3 +
 aif360/sklearn/preprocessing/reweighing.py | 108 +++++-
 aif360/sklearn/tests/test_datasets.py      |   6 +-
 docs/Makefile                              |   6 +-
 docs/source/conf.py                        |  94 ++++-
 docs/source/modules/sklearn.rst            | 214 +++++++++++-
 docs/source/modules/standard_datasets.rst  |   1 +
 docs/source/static/style.css               |  12 +
 docs/source/templates/base.rst             |   6 +
 docs/source/templates/class.rst            |  29 ++
 17 files changed, 842 insertions(+), 109 deletions(-)
 create mode 100644 docs/source/static/style.css
 create mode 100644 docs/source/templates/base.rst
 create mode 100644 docs/source/templates/class.rst

diff --git a/aif360/sklearn/datasets/__init__.py b/aif360/sklearn/datasets/__init__.py
index 43168666..5aac86b8 100644
--- a/aif360/sklearn/datasets/__init__.py
+++ b/aif360/sklearn/datasets/__init__.py
@@ -1,3 +1,13 @@
+"""
+The dataset format for ``aif360.sklearn`` is a :class:`pandas.DataFrame` with
+protected attributes in the index.
+
+Warning:
+    Currently, while all scikit-learn classes will accept DataFrames as inputs,
+    most classes will return a :class:`numpy.ndarray`. Therefore, many pre-
+    processing steps, when placed before an ``aif360.sklearn`` step in a
+    Pipeline, will cause errors.
+"""
 from aif360.sklearn.datasets.utils import *
 from aif360.sklearn.datasets.openml_datasets import *
 from aif360.sklearn.datasets.compas_dataset import fetch_compas
diff --git a/aif360/sklearn/datasets/compas_dataset.py b/aif360/sklearn/datasets/compas_dataset.py
index be649864..76a0d9df 100644
--- a/aif360/sklearn/datasets/compas_dataset.py
+++ b/aif360/sklearn/datasets/compas_dataset.py
@@ -2,7 +2,7 @@
 
 import pandas as pd
 
-from aif360.sklearn.datasets.utils import standarize_dataset
+from aif360.sklearn.datasets.utils import standardize_dataset
 
 
 # cache location
@@ -19,7 +19,7 @@ def fetch_compas(data_home=None, binary_race=False,
 
     Optionally binarizes 'race' to 'Caucasian' (privileged) or 'African-American'
     (unprivileged). The other protected attribute is 'sex' ('Male' is
-    _unprivileged_ and 'Female' is _privileged_). The outcome variable is
+    *unprivileged* and 'Female' is *privileged*). The outcome variable is
     'no recid.' (favorable) if the person was not accused of a crime within two
     years or 'did recid.' (unfavorable) if they were.
 
@@ -67,7 +67,7 @@ def fetch_compas(data_home=None, binary_race=False,
 
     df.sex = df.sex.astype('category').cat.as_ordered()  # 'Female' < 'Male'
 
-    return standarize_dataset(df, prot_attr=['sex', 'race'],
-                              target='two_year_recid', usecols=usecols,
-                              dropcols=dropcols, numeric_only=numeric_only,
-                              dropna=dropna)
+    return standardize_dataset(df, prot_attr=['sex', 'race'],
+                               target='two_year_recid', usecols=usecols,
+                               dropcols=dropcols, numeric_only=numeric_only,
+                               dropna=dropna)
diff --git a/aif360/sklearn/datasets/openml_datasets.py b/aif360/sklearn/datasets/openml_datasets.py
index 1cb4b9a1..6decfcb7 100644
--- a/aif360/sklearn/datasets/openml_datasets.py
+++ b/aif360/sklearn/datasets/openml_datasets.py
@@ -3,7 +3,7 @@
 import pandas as pd
 from sklearn.datasets import fetch_openml
 
-from aif360.sklearn.datasets.utils import standarize_dataset
+from aif360.sklearn.datasets.utils import standardize_dataset
 
 
 # cache location
@@ -38,8 +38,8 @@ def fetch_adult(subset='all', data_home=None, binary_race=True, usecols=[],
 
     Binarizes 'race' to 'White' (privileged) or 'Non-white' (unprivileged).
     The other protected attribute is 'sex' ('Male' is privileged and 'Female' is
-    unprivileged). The outcome variable is '>50K' (favorable) or '<=50K'
-    (unfavorable).
+    unprivileged). The outcome variable is 'annual-income': '>50K' (favorable)
+    or '<=50K' (unfavorable).
 
     Args:
         subset ({'train', 'test', or 'all'}, optional): Select the dataset to
@@ -88,7 +88,7 @@ def fetch_adult(subset='all', data_home=None, binary_race=True, usecols=[],
                                              ordered=True).fillna('Non-white')
     df.sex = df.sex.cat.as_ordered()  # 'Female' < 'Male'
 
-    return standarize_dataset(df, prot_attr=['race', 'sex'],
+    return standardize_dataset(df, prot_attr=['race', 'sex'],
                               target='annual-income', sample_weight='fnlwgt',
                               usecols=usecols, dropcols=dropcols,
                               numeric_only=numeric_only, dropna=dropna)
@@ -101,19 +101,20 @@ def fetch_german(data_home=None, binary_age=True, usecols=[], dropcols=[],
     unprivileged) and 'age' (binarized by default as recommended by
     [#kamiran09]_: ``age >= 25`` is considered privileged and ``age < 25`` is
     considered unprivileged; see the ``binary_age`` flag to keep this
-    continuous). The outcome variable is 'good' (favorable) or 'bad'
-    (unfavorable).
+    continuous). The outcome variable is 'credit-risk': 'good' (favorable) or
+    'bad' (unfavorable).
 
     References:
-        .. [#kamiran09] F. Kamiran and T. Calders, "Classifying without
+        .. [#kamiran09] `F. Kamiran and T. Calders, "Classifying without
            discriminating," 2nd International Conference on Computer,
            Control and Communication, 2009.
+           <https://ieeexplore.ieee.org/abstract/document/4909197>`_
 
     Args:
         data_home (string, optional): Specify another download and cache folder
             for the datasets. By default all AIF360 datasets are stored in
             'aif360/sklearn/data/raw' subfolders.
-        binary_age (bool, optional): If `True`, split protected attribute,
+        binary_age (bool, optional): If ``True``, split protected attribute,
             ``age``, into 'aged' (privileged) and 'youth' (unprivileged). The
             ``age`` feature remains continuous.
         usecols (single label or list-like, optional): Column name(s) to keep.
@@ -161,16 +162,16 @@ def fetch_german(data_home=None, binary_age=True, usecols=[], dropcols=[],
     df = df.join(personal_status.astype('category'))
     df.sex = df.sex.cat.as_ordered()  # 'female' < 'male'
 
-    return standarize_dataset(df, prot_attr=['sex', age], target='credit-risk',
-                              usecols=usecols, dropcols=dropcols,
-                              numeric_only=numeric_only, dropna=dropna)
+    return standardize_dataset(df, prot_attr=['sex', age], target='credit-risk',
+                               usecols=usecols, dropcols=dropcols,
+                               numeric_only=numeric_only, dropna=dropna)
 
 def fetch_bank(data_home=None, percent10=False, usecols=[], dropcols='duration',
                numeric_only=False, dropna=False):
     """Load the Bank Marketing Dataset.
 
     The protected attribute is 'age' (left as continuous). The outcome variable
-    is 'yes' or 'no'. TODO: which is favorable?
+    is 'deposit': ``True`` or ``False``.
 
     Args:
         data_home (string, optional): Specify another download and cache folder
@@ -213,6 +214,6 @@ def fetch_bank(data_home=None, percent10=False, usecols=[], dropcols='duration',
     # replace 'unknown' marker with NaN
     df.apply(lambda s: s.cat.remove_categories('unknown', inplace=True)
              if hasattr(s, 'cat') and 'unknown' in s.cat.categories else s)
-    return standarize_dataset(df, prot_attr='age', target='deposit',
-                              usecols=usecols, dropcols=dropcols,
-                              numeric_only=numeric_only, dropna=dropna)
+    return standardize_dataset(df, prot_attr='age', target='deposit',
+                               usecols=usecols, dropcols=dropcols,
+                               numeric_only=numeric_only, dropna=dropna)
diff --git a/aif360/sklearn/datasets/utils.py b/aif360/sklearn/datasets/utils.py
index 703ad13f..db88ea46 100644
--- a/aif360/sklearn/datasets/utils.py
+++ b/aif360/sklearn/datasets/utils.py
@@ -10,6 +10,22 @@ class ColumnAlreadyDroppedWarning(UserWarning):
 
 def check_already_dropped(labels, dropped_cols, name, dropped_by='numeric_only',
                           warn=True):
+    """Check if columns have already been dropped and return only those that
+    haven't.
+
+    Args:
+        labels (single label or list-like): Column labels to check.
+        dropped_cols (set or pandas.Index): Columns that were already dropped.
+        name (str): Original arg that triggered the check (e.g. ``dropcols``).
+        dropped_by (str, optional): Original arg that caused ``dropped_cols``
+            (e.g. ``numeric_only``).
+        warn (bool, optional): If ``True``, produces a
+            :class:`ColumnAlreadyDroppedWarning` if there are columns in the
+            intersection of ``dropped_cols`` and ``labels``.
+
+    Returns:
+        list: Columns in ``labels`` which are not in ``dropped_cols``.
+    """
     if not is_list_like(labels):
         labels = [labels]
     labels = [c for c in labels if isinstance(c, str)]
@@ -20,7 +36,7 @@ def check_already_dropped(labels, dropped_cols, name, dropped_by='numeric_only',
                 ColumnAlreadyDroppedWarning, stacklevel=2)
     return [c for c in labels if c not in already_dropped]
 
-def standarize_dataset(df, prot_attr, target, sample_weight=None, usecols=[],
+def standardize_dataset(df, prot_attr, target, sample_weight=None, usecols=[],
                        dropcols=[], numeric_only=False, dropna=True):
     """Separate data, targets, and possibly sample weights and populate
     protected attributes as sample properties.
@@ -61,14 +77,14 @@ def standarize_dataset(df, prot_attr, target, sample_weight=None, usecols=[],
         >>> from sklearn.linear_model import LinearRegression
 
         >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=['X', 'y', 'Z'])
-        >>> train = standarize_dataset(df, prot_attr='Z', target='y')
+        >>> train = standardize_dataset(df, prot_attr='Z', target='y')
         >>> reg = LinearRegression().fit(*train)
 
         >>> import numpy as np
         >>> from sklearn.datasets import make_classification
         >>> from sklearn.model_selection import train_test_split
         >>> df = pd.DataFrame(np.hstack(make_classification(n_features=5)))
-        >>> X, y = standarize_dataset(df, prot_attr=0, target=5)
+        >>> X, y = standardize_dataset(df, prot_attr=0, target=5)
         >>> X_tr, X_te, y_tr, y_te = train_test_split(X, y)
     """
     orig_cols = df.columns
diff --git a/aif360/sklearn/inprocessing/__init__.py b/aif360/sklearn/inprocessing/__init__.py
index 863d3676..18df48c4 100644
--- a/aif360/sklearn/inprocessing/__init__.py
+++ b/aif360/sklearn/inprocessing/__init__.py
@@ -1,3 +1,6 @@
+"""
+In-processing algorithms train a fair classifier (data in, predictions out).
+"""
 from aif360.sklearn.inprocessing.adversarial_debiasing import AdversarialDebiasing
 
 __all__ = [
diff --git a/aif360/sklearn/metrics/__init__.py b/aif360/sklearn/metrics/__init__.py
index ceaef288..a0778b80 100644
--- a/aif360/sklearn/metrics/__init__.py
+++ b/aif360/sklearn/metrics/__init__.py
@@ -1 +1,7 @@
+"""
+``aif360.sklearn`` implements a number of fairness metrics for group fairness
+and individual fairness. For guidance on which metric to use for a given
+application, see our
+`Guidance <http://aif360.mybluemix.net/resources#guidance>`_ page.
+"""
 from aif360.sklearn.metrics.metrics import *
diff --git a/aif360/sklearn/metrics/metrics.py b/aif360/sklearn/metrics/metrics.py
index 50632c81..f100a012 100644
--- a/aif360/sklearn/metrics/metrics.py
+++ b/aif360/sklearn/metrics/metrics.py
@@ -10,11 +10,23 @@
 
 
 __all__ = [
-    'base_rate', 'consistency_score', 'specificity_score', 'selection_rate',
-    'disparate_impact_ratio', 'statistical_parity_difference',
+    # meta-metrics
+    'difference', 'ratio',
+    # scorer factories
+    'make_difference_scorer', 'make_ratio_scorer',
+    # helpers
+    'specificity_score', 'base_rate', 'selection_rate', 'generalized_fpr',
+    'generalized_fnr',
+    # group fairness
+    'statistical_parity_difference', 'disparate_impact_ratio',
     'equal_opportunity_difference', 'average_odds_difference',
-    'average_odds_error', 'generalized_entropy_error', 'generalized_fnr',
-    'between_group_generalized_entropy_error', 'generalized_fpr'
+    'average_odds_error',
+    # individual fairness
+    'generalized_entropy_index', 'generalized_entropy_error',
+    'between_group_generalized_entropy_error', 'theil_index',
+    'coefficient_of_variation', 'consistency_score',
+    # aliases
+    'sensitivity_score', 'mean_difference',
 ]
 
 # ============================= META-METRICS ===================================
@@ -35,7 +47,7 @@ def difference(func, y, *args, prot_attr=None, priv_group=1, sample_weight=None,
         *args: Additional positional args to be passed through to ``func``.
         prot_attr (array-like, keyword-only): Protected attribute(s). If
             ``None``, all protected attributes in ``y`` are used.
-        priv_group (scalar, optional): Label value for the privileged group.
+        priv_group (scalar, optional): The label of the privileged group.
         sample_weight (array-like, optional): Sample weights passed through to
             ``func``.
         **kwargs: Additional keyword args to be passed through to ``func``.
@@ -66,8 +78,7 @@ def ratio(func, y, *args, prot_attr=None, priv_group=1, sample_weight=None,
     arbitrary metric.
 
     Note: The optimal value of a ratio is 1. To make it a scorer, one must
-    take the minimum of the ratio and its inverse, subtract it from 1, and set
-    ``greater_is_better`` to False.
+    take the minimum of the ratio and its inverse.
 
     Unprivileged group is taken to be the inverse of the privileged group.
 
@@ -76,9 +87,9 @@ def ratio(func, y, *args, prot_attr=None, priv_group=1, sample_weight=None,
             :mod:`aif360.sklearn.metrics.metrics`.
         y (array-like): Outcome vector with protected attributes as index.
         *args: Additional positional args to be passed through to ``func``.
-        groups (array-like, keyword-only): Group labels (protected attributes)
-            for the samples.
-        priv_group (scalar, optional): Label value for the privileged group.
+        prot_attr (array-like, keyword-only): Protected attribute(s). If
+            ``None``, all protected attributes in ``y`` are used.
+        priv_group (scalar, optional): The label of the privileged group.
         sample_weight (array-like, optional): Sample weights passed through to
             ``func``.
         **kwargs: Additional keyword args to be passed through to ``func``.
@@ -99,7 +110,7 @@ def ratio(func, y, *args, prot_attr=None, priv_group=1, sample_weight=None,
 
     if denominator == 0:
         warnings.warn("The ratio is ill-defined and being set to 0.0 because "
-                      "the {} for privileged samples is 0.".format(func.__name__),
+                      "'{}' for privileged samples is 0.".format(func.__name__),
                       UndefinedMetricWarning)
         return 0.
 
@@ -107,15 +118,40 @@ def ratio(func, y, *args, prot_attr=None, priv_group=1, sample_weight=None,
 
 
 # =========================== SCORER FACTORIES =================================
-def make_difference_scorer(func):
-    return make_scorer(lambda y, y_pred, **kw: abs(func(y, y_pred, **kw)),
+def make_difference_scorer(diff_func):
+    """Make a scorer from a 'difference' metric (e.g.
+    :func:`statistical_parity_difference`).
+
+    Since the optimal value of a difference metric is 0, this function takes the
+    absolute value and sets ``greater_is_better`` to ``False``.
+
+    See also:
+        :func:`~sklearn.metrics.make_scorer`
+
+    Args:
+        diff_func (callable): A difference metric with signature
+            ``diff_func(y, y_pred, **kwargs)``.
+    """
+    return make_scorer(lambda y, y_pred, **kw: abs(diff_func(y, y_pred, **kw)),
                        greater_is_better=False)
 
-def make_ratio_scorer(func):
+def make_ratio_scorer(ratio_func):
+    """Make a scorer from a 'ratio' metric (e.g. :func:`disparate_impact_ratio`)
+
+    Since the optimal value of a ratio metric is 1, this function takes the
+    minimum of the ratio and its inverse.
+
+    See also:
+        :func:`~sklearn.metrics.make_scorer`
+
+    Args:
+        ratio_func (callable): A ratio metric with signature
+            `ratio_func(y, y_pred, **kwargs)``.
+    """
     def score_fn(y, y_pred, **kwargs):
-        ratio = func(y, y_pred, **kwargs)
-        return 1 - min(ratio, 1/ratio)
-    return make_scorer(score_fn, greater_is_better=False)
+        ratio = ratio_func(y, y_pred, **kwargs)
+        return min(ratio, 1/ratio)
+    return make_scorer(score_fn)
 
 
 # ================================ HELPERS =====================================
@@ -126,66 +162,208 @@ def specificity_score(y_true, y_pred, neg_label=0, sample_weight=None):
     Args:
         y_true (array-like): Ground truth (correct) target values.
         y_pred (array-like): Estimated targets as returned by a classifier.
-        neg_label (scalar, optional): The class to report. Note: the data should
-            be binary.
+        neg_label (scalar, optional): The label of the negative class. Note:
+            the data should be binary.
+        sample_weight (array-like, optional): Sample weights.
     """
     return recall_score(y_true, y_pred, pos_label=neg_label,
                         sample_weight=sample_weight)
 
 def base_rate(y_true, y_pred=None, pos_label=1, sample_weight=None):
+    r"""Compute the base rate, :math:`Pr(Y = \text{pos_label}) = \frac{P}{P+N}`.
+
+    Args:
+        y_true (array-like): Ground truth (correct) target values.
+        y_pred (array-like, optional): Estimated targets. Ignored.
+        pos_label (scalar, optional): The label of the positive class.
+        sample_weight (array-like, optional): Sample weights.
+
+    Returns:
+        float: Base rate.
+    """
     idx = (y_true == pos_label)
-    if not np.any(idx):
-        warnings.warn("base_rate is ill-defined because there are no samples "
-                      "with value {} in y_true.".format(pos_label),
-                      UndefinedMetricWarning)
-        return 0.
     return np.average(idx, weights=sample_weight)
 
 def selection_rate(y_true, y_pred, pos_label=1, sample_weight=None):
+    r"""Compute the selection rate, :math:`Pr(\hat{Y} = \text{pos_label}) =
+    \frac{TP + FP}{P + N}`.
+
+    Args:
+        y_true (array-like): Ground truth (correct) target values. Ignored.
+        y_pred (array-like): Estimated targets as returned by a classifier.
+        pos_label (scalar, optional): The label of the positive class.
+        sample_weight (array-like, optional): Sample weights.
+
+    Returns:
+        float: Selection rate.
+    """
     return base_rate(y_pred, pos_label=pos_label, sample_weight=sample_weight)
 
-def generalized_fpr(y_true, y_pred, pos_label=1, sample_weight=None):
+def generalized_fpr(y_true, probas_pred, pos_label=1, sample_weight=None):
+    r"""Return the ratio of generalized false positives to negative examples in
+    the dataset, :math:`GFPR = \tfrac{GFP}{N}`.
+
+    The generalized confusion matrix is calculated by summing the probabilities
+    of the positive class instead of the hard predictions.
+
+    Args:
+        y_true (array-like): Ground-truth (correct) target values.
+        probas_pred (array-like): Probability estimates of the positive class.
+        pos_label (scalar, optional): The label of the positive class.
+        sample_weight (array-like, optional): Sample weights.
+
+    Returns:
+        float: Generalized false positive rate. If there are no negative samples
+        in ``y_true``, this will raise an
+        :class:`~sklearn.exceptions.UndefinedMetricWarning` and return 0.
+    """
     idx = (y_true != pos_label)
     if not np.any(idx):
-        warnings.warn("generalized_fpr is ill-defined because there are no true"
-                      " negatives in y_true.", UndefinedMetricWarning)
+        warnings.warn("generalized_fpr is ill-defined because there are no "
+                      "negative samples in y_true.", UndefinedMetricWarning)
         return 0.
     if sample_weight is None:
-        return y_pred[idx].mean()
-    return np.average(y_pred[idx], weights=sample_weight[idx])
+        return probas_pred[idx].mean()
+    return np.average(probas_pred[idx], weights=sample_weight[idx])
+
+def generalized_fnr(y_true, probas_pred, pos_label=1, sample_weight=None):
+    r"""Return the ratio of generalized false negatives to positive examples in
+    the dataset, :math:`GFNR = \tfrac{GFN}{P}`.
 
-def generalized_fnr(y_true, y_pred, pos_label=1, sample_weight=None):
+    The generalized confusion matrix is calculated by summing the probabilities
+    of the positive class instead of the hard predictions.
+
+    Args:
+        y_true (array-like): Ground-truth (correct) target values.
+        probas_pred (array-like): Probability estimates of the positive class.
+        pos_label (scalar, optional): The label of the positive class.
+        sample_weight (array-like, optional): Sample weights.
+
+    Returns:
+        float: Generalized false negative rate. If there are no positive samples
+        in ``y_true``, this will raise an
+        :class:`~sklearn.exceptions.UndefinedMetricWarning` and return 0.
+    """
     idx = (y_true == pos_label)
     if not np.any(idx):
-        warnings.warn("generalized_fnr is ill-defined because there are no true"
-                      " positives in y_true.", UndefinedMetricWarning)
+        warnings.warn("generalized_fnr is ill-defined because there are no "
+                      "positive samples in y_true.", UndefinedMetricWarning)
         return 0.
     if sample_weight is None:
-        return 1 - y_pred[idx].mean()
-    return 1 - np.average(y_pred[idx], weights=sample_weight[idx])
+        return 1 - probas_pred[idx].mean()
+    return 1 - np.average(probas_pred[idx], weights=sample_weight[idx])
 
 
 # ============================ GROUP FAIRNESS ==================================
 def statistical_parity_difference(*y, prot_attr=None, priv_group=1, pos_label=1,
                                   sample_weight=None):
+    r"""Difference in selection rates.
+
+    .. math::
+        Pr(\hat{Y} = \text{pos_label} | D = \text{unprivileged})
+        - Pr(\hat{Y} = \text{pos_label} | D = \text{privileged})
+
+    Note:
+        If only ``y_true`` is provided, this will return the difference in base
+        rates (statistical parity difference of the original dataset).
+
+    Args:
+        y_true (array-like): Ground truth (correct) target values. If ``y_pred``
+            is provided, this is ignored.
+        y_pred (array-like, optional): Estimated targets as returned by a
+            classifier.
+        prot_attr (array-like, keyword-only): Protected attribute(s). If
+            ``None``, all protected attributes in ``y_true`` are used.
+        priv_group (scalar, optional): The label of the privileged group.
+        pos_label (scalar, optional): The label of the positive class.
+        sample_weight (array-like, optional): Sample weights.
+
+    Returns:
+        float: Statistical parity difference.
+    """
     rate = base_rate if len(y) == 1 or y[1] is None else selection_rate
     return difference(rate, *y, prot_attr=prot_attr, priv_group=priv_group,
                       pos_label=pos_label, sample_weight=sample_weight)
 
 def disparate_impact_ratio(*y, prot_attr=None, priv_group=1, pos_label=1,
                            sample_weight=None):
+    r"""Ratio of selection rates.
+
+    .. math::
+        \frac{Pr(\hat{Y} = \text{pos_label} | D = \text{unprivileged})}
+        {Pr(\hat{Y} = \text{pos_label} | D = \text{privileged})}
+
+    Note:
+        If only ``y_true`` is provided, this will return the ratio of base rates
+        (disparate impact of the original dataset).
+
+    Args:
+        y_true (array-like): Ground truth (correct) target values. If ``y_pred``
+            is provided, this is ignored.
+        y_pred (array-like, optional): Estimated targets as returned by a
+            classifier.
+        prot_attr (array-like, keyword-only): Protected attribute(s). If
+            ``None``, all protected attributes in ``y_true`` are used.
+        priv_group (scalar, optional): The label of the privileged group.
+        pos_label (scalar, optional): The label of the positive class.
+        sample_weight (array-like, optional): Sample weights.
+
+    Returns:
+        float: Disparate impact.
+    """
     rate = base_rate if len(y) == 1 or y[1] is None else selection_rate
     return ratio(rate, *y, prot_attr=prot_attr, priv_group=priv_group,
                  pos_label=pos_label, sample_weight=sample_weight)
 
 def equal_opportunity_difference(y_true, y_pred, prot_attr=None, priv_group=1,
                                  pos_label=1, sample_weight=None):
+    r"""A relaxed version of equality of opportunity.
+
+    Returns the difference in recall scores (TPR) between the unprivileged and
+    privileged groups. A value of 0 indicates equality of opportunity.
+
+    Args:
+        y_true (array-like): Ground truth (correct) target values.
+        y_pred (array-like): Estimated targets as returned by a classifier.
+        prot_attr (array-like, keyword-only): Protected attribute(s). If
+            ``None``, all protected attributes in ``y_true`` are used.
+        priv_group (scalar, optional): The label of the privileged group.
+        pos_label (scalar, optional): The label of the positive class.
+        sample_weight (array-like, optional): Sample weights.
+
+    Returns:
+        float: Equal opportunity difference.
+    """
     return difference(recall_score, y_true, y_pred, prot_attr=prot_attr,
                       priv_group=priv_group, pos_label=pos_label,
                       sample_weight=sample_weight)
 
 def average_odds_difference(y_true, y_pred, prot_attr=None, priv_group=1,
                             pos_label=1, neg_label=0, sample_weight=None):
+    r"""A relaxed version of equality of odds.
+
+    Returns the average of the difference in FPR and TPR for the unprivileged
+    and privileged groups:
+
+    .. math::
+
+        \dfrac{(FPR_{D = \text{unprivileged}} - FPR_{D = \text{privileged}})
+        + (TPR_{D = \text{unprivileged}} - TPR_{D = \text{privileged}})}{2}
+
+    A value of 0 indicates equality of odds.
+
+    Args:
+        y_true (array-like): Ground truth (correct) target values.
+        y_pred (array-like): Estimated targets as returned by a classifier.
+        prot_attr (array-like, keyword-only): Protected attribute(s). If
+            ``None``, all protected attributes in ``y_true`` are used.
+        priv_group (scalar, optional): The label of the privileged group.
+        pos_label (scalar, optional): The label of the positive class.
+        sample_weight (array-like, optional): Sample weights.
+
+    Returns:
+        float: Average odds difference.
+    """
     fpr_diff = -difference(specificity_score, y_true, y_pred,
                            prot_attr=prot_attr, priv_group=priv_group,
                            neg_label=neg_label, sample_weight=sample_weight)
@@ -196,6 +374,30 @@ def average_odds_difference(y_true, y_pred, prot_attr=None, priv_group=1,
 
 def average_odds_error(y_true, y_pred, prot_attr=None, priv_group=1,
                        pos_label=1, neg_label=0, sample_weight=None):
+    r"""A relaxed version of equality of odds.
+
+    Returns the average of the absolute difference in FPR and TPR for the
+    unprivileged and privileged groups:
+
+    .. math::
+
+        \dfrac{|FPR_{D = \text{unprivileged}} - FPR_{D = \text{privileged}}|
+        + |TPR_{D = \text{unprivileged}} - TPR_{D = \text{privileged}}|}{2}
+
+    A value of 0 indicates equality of odds.
+
+    Args:
+        y_true (array-like): Ground truth (correct) target values.
+        y_pred (array-like): Estimated targets as returned by a classifier.
+        prot_attr (array-like, keyword-only): Protected attribute(s). If
+            ``None``, all protected attributes in ``y_true`` are used.
+        priv_group (scalar, optional): The label of the privileged group.
+        pos_label (scalar, optional): The label of the positive class.
+        sample_weight (array-like, optional): Sample weights.
+
+    Returns:
+        float: Average odds error.
+    """
     fpr_diff = -difference(specificity_score, y_true, y_pred,
                            prot_attr=prot_attr, priv_group=priv_group,
                            neg_label=neg_label, sample_weight=sample_weight)
@@ -207,6 +409,23 @@ def average_odds_error(y_true, y_pred, prot_attr=None, priv_group=1,
 
 # ========================== INDIVIDUAL FAIRNESS ===============================
 def generalized_entropy_index(b, alpha=2):
+    r"""Generalized entropy index measures inequality over a population.
+
+    .. math::
+
+        \mathcal{E}(\alpha) = \begin{cases}
+            \frac{1}{n \alpha (\alpha-1)}\sum_{i=1}^n\left[\left(\frac{b_i}{\mu}\right)^\alpha - 1\right],& \alpha \ne 0, 1,\\
+            \frac{1}{n}\sum_{i=1}^n\frac{b_{i}}{\mu}\ln\frac{b_{i}}{\mu},& \alpha=1,\\
+            -\frac{1}{n}\sum_{i=1}^n\ln\frac{b_{i}}{\mu},& \alpha=0.
+        \end{cases}
+
+    Args:
+        b (array-like): Parameter over which to calculate the entropy index.
+        alpha (scalar): Parameter that regulates the weight given to distances
+            between values at different parts of the distribution. A value of 0
+            is equivalent to the mean log deviation, 1 is the Theil index, and 2
+            is half the squared coefficient of variation.
+    """
     if alpha == 0:
         return -(np.log(b / b.mean()) / b.mean()).mean()
     elif alpha == 1:
@@ -217,12 +436,65 @@ def generalized_entropy_index(b, alpha=2):
 
 def generalized_entropy_error(y_true, y_pred, alpha=2, pos_label=1):
     #                           sample_weight=None):
+    r"""Compute the generalized entropy.
+
+    Generalized entropy index is proposed as a unified individual and
+    group fairness measure in [#speicher18]_.
+
+    Uses :math:`b_i = \hat{y}_i - y_i + 1`. See
+    :func:`generalized_entropy_index` for details.
+
+    Args:
+        y_true (array-like): Ground truth (correct) target values.
+        y_pred (array-like): Estimated targets as returned by a classifier.
+        alpha (scalar, optional): Parameter that regulates the weight given to
+            distances between values at different parts of the distribution. A
+            value of 0 is equivalent to the mean log deviation, 1 is the Theil
+            index, and 2 is half the squared coefficient of variation.
+        pos_label (scalar, optional): The label of the positive class.
+
+    References:
+        .. [#speicher18] `T. Speicher, H. Heidari, N. Grgic-Hlaca,
+           K. P. Gummadi, A. Singla, A. Weller, and M. B. Zafar, "A Unified
+           Approach to Quantifying Algorithmic Unfairness: Measuring Individual
+           and Group Unfairness via Inequality Indices," ACM SIGKDD
+           International Conference on Knowledge Discovery and Data Mining,
+           2018. <https://dl.acm.org/citation.cfm?id=3220046>`_
+    """
     b = 1 + (y_pred == pos_label) - (y_true == pos_label)
     return generalized_entropy_index(b, alpha=alpha)
 
 def between_group_generalized_entropy_error(y_true, y_pred, prot_attr=None,
-                                            priv_group=None, alpha=2,
-                                            pos_label=1):
+        priv_group=None, alpha=2, pos_label=1):
+    r"""Compute the between-group generalized entropy.
+
+    Between-group generalized entropy index is proposed as a group
+    fairness measure in [#speicher18]_ and is one of two terms that the
+    generalized entropy index decomposes to.
+
+    Args:
+        y_true (array-like): Ground truth (correct) target values.
+        y_pred (array-like): Estimated targets as returned by a classifier.
+        prot_attr (array-like, optional): Protected attribute(s). If ``None``,
+            all protected attributes in ``y_true`` are used.
+        priv_group (scalar, optional): The label of the privileged group. If
+            provided, the index will be computed between only the privileged and
+            unprivileged groups. Otherwise, the index will be computed between
+            all groups defined by the ``prot_attr``.
+        alpha (scalar, optional): Parameter that regulates the weight given to
+            distances between values at different parts of the distribution. A
+            value of 0 is equivalent to the mean log deviation, 1 is the Theil
+            index, and 2 is half the squared coefficient of variation.
+        pos_label (scalar, optional): The label of the positive class.
+
+    References:
+        .. [#speicher18] `T. Speicher, H. Heidari, N. Grgic-Hlaca,
+           K. P. Gummadi, A. Singla, A. Weller, and M. B. Zafar, "A Unified
+           Approach to Quantifying Algorithmic Unfairness: Measuring Individual
+           and Group Unfairness via Inequality Indices," ACM SIGKDD
+           International Conference on Knowledge Discovery and Data Mining,
+           2018. <https://dl.acm.org/citation.cfm?id=3220046>`_
+    """
     groups, _ = check_groups(y_true, prot_attr)
     b = np.empty_like(y_true, dtype='float')
     if priv_group is not None:
@@ -233,16 +505,46 @@ def between_group_generalized_entropy_error(y_true, y_pred, prot_attr=None,
     return generalized_entropy_index(b, alpha=alpha)
 
 def theil_index(b):
+    r"""The Theil index is the :func:`generalized_entropy_index` with
+    :math:`\alpha = 1`.
+
+    Args:
+        b (array-like): Parameter over which to calculate the entropy index.
+    """
     return generalized_entropy_index(b, alpha=1)
 
 def coefficient_of_variation(b):
+    r"""The coefficient of variation is two times the square root of the
+    :func:`generalized_entropy_index` with :math:`\alpha = 2`.
+
+    Args:
+        b (array-like): Parameter over which to calculate the entropy index.
+    """
     return 2 * np.sqrt(generalized_entropy_index(b, alpha=2))
 
 
-# TODO: not technically a scorer but you should be allowed to score transformers
-# Is consistency_difference posible?
-# use sample_weight?
+# TODO: use sample_weight?
 def consistency_score(X, y, n_neighbors=5):
+    r"""Compute the consistency score.
+
+    Individual fairness metric from [#zemel13]_ that measures how similar the
+    labels are for similar instances.
+
+    .. math::
+        1 - \frac{1}{n\cdot\text{n_neighbors}}\sum_{i=1}^n |\hat{y}_i -
+        \sum_{j\in\mathcal{N}_{\text{n_neighbors}}(x_i)} \hat{y}_j|
+
+    Args:
+        X (array-like): Sample features.
+        y (array-like): Sample targets.
+        n_neighbors (int, optional): Number of neighbors for the knn
+            computation.
+
+    References:
+        .. [#zemel13] `R. Zemel, Y. Wu, K. Swersky, T. Pitassi, and C. Dwork,
+           "Learning Fair Representations," International Conference on Machine
+           Learning, 2013. <http://proceedings.mlr.press/v28/zemel13.html>`_
+    """
     # cast as ndarrays
     X, y = check_X_y(X, y)
     # learn a KNN on the features
@@ -267,7 +569,9 @@ def sensitivity_score(y_true, y_pred, pos_label=1, sample_weight=None):
 #     return 1 - specificity_score(y_true, y_pred, neg_label=neg_label,
 #                                  sample_weight=sample_weight)
 
-def mean_difference(*y, prot_attr=None, priv_group=1, pos_label=1, sample_weight=None):
+def mean_difference(*y, prot_attr=None, priv_group=1, pos_label=1,
+                    sample_weight=None):
     """Alias of :func:`statistical_parity_difference`."""
-    return statistical_parity_difference(*y, prot_attr=prot_attr, priv_group=priv_group,
-            pos_label=pos_label, sample_weight=sample_weight)
+    return statistical_parity_difference(*y, prot_attr=prot_attr,
+            priv_group=priv_group, pos_label=pos_label,
+            sample_weight=sample_weight)
diff --git a/aif360/sklearn/preprocessing/__init__.py b/aif360/sklearn/preprocessing/__init__.py
index 61a0431d..c47dda96 100644
--- a/aif360/sklearn/preprocessing/__init__.py
+++ b/aif360/sklearn/preprocessing/__init__.py
@@ -1,3 +1,6 @@
+"""
+Pre-processing algorithms modify a dataset to be more fair (data in, data out).
+"""
 from aif360.sklearn.preprocessing.reweighing import Reweighing, ReweighingMeta
 
 __all__ = [
diff --git a/aif360/sklearn/preprocessing/reweighing.py b/aif360/sklearn/preprocessing/reweighing.py
index c73b96fe..dcb1d906 100644
--- a/aif360/sklearn/preprocessing/reweighing.py
+++ b/aif360/sklearn/preprocessing/reweighing.py
@@ -7,11 +7,25 @@
 
 
 class Reweighing(BaseEstimator):
-    """Reweighing is a preprocessing technique that weights the examples in each
+    """Sample reweighing.
+
+    Reweighing is a preprocessing technique that weights the examples in each
     (group, label) combination differently to ensure fairness before
     classification [#kamiran12]_.
 
+    Note:
+        This breaks the scikit-learn API by returning new sample weights from
+        ``fit_transform()``. See :class:`ReweighingMeta` for a workaround.
+
+    References:
+        .. [#kamiran12] `F. Kamiran and T. Calders,  "Data Preprocessing
+           Techniques for Classification without Discrimination," Knowledge and
+           Information Systems, 2012.
+           <https://link.springer.com/article/10.1007/s10115-011-0463-8>`_
+
     Attributes:
+        prot_attr_ (str or list(str)): Protected attribute(s) used for
+            reweighing.
         groups_ (array, shape (n_groups,)): A list of group labels known to the
             transformer.
         classes_ (array, shape (n_classes,)): A list of class labels known to
@@ -20,32 +34,21 @@ class Reweighing(BaseEstimator):
             for each combination of group and class labels used to debias
             samples. Existing sample weights are multiplied by the corresponding
             factor for that sample's group and class.
-
-    Examples:
-        >>> pipe = make_pipeline(Reweighing(), LinearRegression())
-        >>> # sample_weight_ will be used after it is fit
-        >>> fit_params = {'linearregression__sample_weight':
-        ...               pipe['reweighing'].sample_weight_}
-        >>> pipe.fit(X, y, **fit_params)
-
-    References:
-        .. [#kamiran12] F. Kamiran and T. Calders,  "Data Preprocessing
-           Techniques for Classification without Discrimination," Knowledge and
-           Information Systems, 2012.
     """
 
     def __init__(self, prot_attr=None):
         """
         Args:
             prot_attr (single label or list-like, optional): Protected
-                attribute(s) to use as sensitive attribute(s) in the reweighing
-                process. If more than one attribute, all combinations of values
-                (intersections) are considered. Default is ``None`` meaning all
-                protected attributes from the dataset are used.
+                attribute(s) to use in the reweighing process. If more than one
+                attribute, all combinations of values (intersections) are
+                considered. Default is ``None`` meaning all protected attributes
+                from the dataset are used.
         """
         self.prot_attr = prot_attr
 
     def fit(self, X, y, sample_weight=None):
+        """Only ``fit_transform`` is allowed for this algorithm."""
         self.fit_transform(X, y, sample_weight=sample_weight)
         return self
 
@@ -88,7 +91,22 @@ def N_(i): return sample_weight[i].sum()
 
 
 class ReweighingMeta(BaseEstimator, MetaEstimatorMixin):
+    """A meta-estimator which wraps a given estimator with a reweighing
+    preprocessing step.
+
+    This is necessary for use in a Pipeline, etc.
+
+    Attributes:
+        estimator_ (sklearn.BaseEstimator): The fitted underlying estimator.
+        reweigher_: The fitted underlying reweigher.
+    """
     def __init__(self, estimator, reweigher=Reweighing()):
+        """
+        Args:
+            estimator (sklearn.BaseEstimator): Estimator to be wrapped.
+            reweigher: Preprocessor which returns new sample weights from
+                ``transform()``.
+        """
         self.reweigher = reweigher
         self.estimator = estimator
 
@@ -97,6 +115,18 @@ def _estimator_type(self):
         return self.estimator._estimator_type
 
     def fit(self, X, y, sample_weight=None):
+        """Performs ``self.reweigher_.fit_transform(X, y, sample_weight)`` and
+        then ``self.estimator_.fit(X, y, sample_weight)`` using the reweighed
+        samples.
+
+        Args:
+            X (array-like): Training samples.
+            y (array-like): Training labels.
+            sample_weight (array-like, optional): Sample weights.
+
+        Returns:
+            ReweighingMeta: self.
+        """
         if not has_fit_parameter(self.estimator, 'sample_weight'):
             raise TypeError("`estimator` (type: {}) does not have fit parameter"
                             " `sample_weight`.".format(type(self.estimator)))
@@ -111,16 +141,60 @@ def fit(self, X, y, sample_weight=None):
 
     @if_delegate_has_method('estimator_')
     def predict(self, X):
+        """Predict class labels for the given samples using ``self.estimator_``.
+
+        Args:
+            X (array-like): Test samples.
+
+        Returns:
+            array: Predicted class label per sample.
+        """
         return self.estimator_.predict(X)
 
     @if_delegate_has_method('estimator_')
     def predict_proba(self, X):
+        """Probability estimates from ``self.estimator_``.
+
+        The returned estimates for all classes are ordered by the label of
+        classes.
+
+        Args:
+            X (array-like): Test samples.
+
+        Returns:
+            array: Returns the probability of the sample for each class in the
+            model, where classes are ordered as they are in ``self.classes_``.
+        """
         return self.estimator_.predict_proba(X)
 
     @if_delegate_has_method('estimator_')
     def predict_log_proba(self, X):
+        """Log of probability estimates from ``self.estimator_``.
+
+        The returned estimates for all classes are ordered by the label of
+        classes.
+
+        Args:
+            X (array-like): Test samples.
+
+        Returns:
+            array: Returns the log-probability of the sample for each class in
+            the model, where classes are ordered as they are in
+            ``self.classes_``.
+        """
         return self.estimator_.predict_log_proba(X)
 
     @if_delegate_has_method('estimator_')
     def score(self, X, y, sample_weight=None):
+        """Returns the output of the estimator's score function on the given
+        test data and labels.
+
+        Args:
+            X (array-like): Test samples.
+            y (array-like): True labels for ``X``.
+            sample_weight (array-like, optional): Sample weights.
+
+        Returns:
+            float: `self.estimator.score(X, y, sample_weight)`
+        """
         return self.estimator_.score(X, y, sample_weight=sample_weight)
diff --git a/aif360/sklearn/tests/test_datasets.py b/aif360/sklearn/tests/test_datasets.py
index 5e2f00ad..0cd13a6c 100644
--- a/aif360/sklearn/tests/test_datasets.py
+++ b/aif360/sklearn/tests/test_datasets.py
@@ -5,13 +5,13 @@
 import pytest
 
 from aif360.sklearn.datasets import fetch_adult, fetch_bank, fetch_german
-from aif360.sklearn.datasets import standarize_dataset
+from aif360.sklearn.datasets import standardize_dataset
 from aif360.sklearn.datasets import fetch_compas, ColumnAlreadyDroppedWarning
 
 
 df = pd.DataFrame([[1, 2, 3, 'a'], [5, 6, 7, 'b'], [np.NaN, 10, 11, 'c']],
                   columns=['X1', 'X2', 'y', 'Z'])
-basic = partial(standarize_dataset, df=df, prot_attr='Z', target='y',
+basic = partial(standardize_dataset, df=df, prot_attr='Z', target='y',
                 dropna=False)
 
 def test_standardize_dataset_basic():
@@ -44,7 +44,7 @@ def test_usecols_dropcols_basic():
                       pd.DataFrame)
 
 def test_dropna_basic():
-    basic_dropna = partial(standarize_dataset, df=df, prot_attr='Z',
+    basic_dropna = partial(standardize_dataset, df=df, prot_attr='Z',
                            target='y', dropna=True)
     assert basic_dropna().X.shape == (2, 3)
     assert basic(dropcols='X1').X.shape == (3, 2)
diff --git a/docs/Makefile b/docs/Makefile
index 3d5de195..f417938a 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -12,7 +12,11 @@ BUILDDIR      = build
 help:
 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
 
-.PHONY: help Makefile
+.PHONY: help clean Makefile
+
+clean:
+	-rm -rf $(BUILDDIR)/*
+	-rm -rf source/modules/generated/*
 
 # Catch-all target: route all unknown targets to Sphinx using the new
 # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 03058220..0f850880 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -33,7 +33,8 @@
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
 extensions = ['sphinx.ext.autodoc',
-    'sphinx.ext.viewcode',
+    'sphinx.ext.autosummary',
+    'sphinx.ext.linkcode',
     'sphinx.ext.napoleon',
     'sphinx.ext.intersphinx',
     'sphinx.ext.mathjax']
@@ -44,10 +45,22 @@
     'sklearn': ('https://scikit-learn.org/stable/', None),
     'python': ('https://docs.python.org/{}.{}'.format(*sys.version_info), None)}
 
+napoleon_include_init_with_doc = True
+napoleon_use_ivar = True
+napoleon_use_rtype = False
+
 autoclass_content = 'both'
 
 # Add any paths that contain templates here, relative to this directory.
-templates_path = []
+templates_path = ['templates']
+
+# generate autosummary even if no references
+autosummary_generate = True
+
+autodoc_default_options = {
+    'members': True,
+    'inherited-members': True
+}
 
 # The suffix(es) of source filenames.
 # You can specify multiple suffix as a list of string:
@@ -60,7 +73,7 @@
 
 # General information about the project.
 project = u'aif360'
-copyright = u'2018, IBM Corporation'
+copyright = u'2018 - 2019, IBM Corporation'
 author = u'aif360 developers'
 
 # The version info for the project you're documenting, acts as replacement for
@@ -82,7 +95,14 @@
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
 # This patterns also effect to html_static_path and html_extra_path
-exclude_patterns = []
+exclude_patterns = ['templates']
+
+# The reST default role (used for this markup: `text`) to use for all
+# documents.
+default_role = 'literal'
+
+# If true, '()' will be appended to :func: etc. cross-reference text.
+add_function_parentheses = False
 
 # The name of the Pygments (syntax highlighting) style to use.
 pygments_style = 'sphinx'
@@ -95,8 +115,8 @@
 
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
-#
-# html_theme = 'alabaster'
+
+# html_theme = 'bizstyle'
 if os.environ.get('READTHEDOCS') != 'True':
     try:
         import sphinx_rtd_theme
@@ -115,7 +135,10 @@
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = []
+html_static_path = ['static']
+
+def setup(app):
+    app.add_stylesheet('style.css')
 
 # Custom sidebar templates, must be a dictionary that maps document names
 # to template names.
@@ -188,3 +211,60 @@
      author, 'aif360', 'One line description of project.',
      'Miscellaneous'),
 ]
+
+
+# -- Options for linkcode -------------------------------------------------
+# taken from numpy/doc/source/conf.py:
+import inspect
+from os.path import relpath, dirname
+def linkcode_resolve(domain, info):
+    """
+    Determine the URL corresponding to Python object
+    """
+    if domain != 'py':
+        return None
+
+    modname = info['module']
+    fullname = info['fullname']
+
+    submod = sys.modules.get(modname)
+    if submod is None:
+        return None
+
+    obj = submod
+    for part in fullname.split('.'):
+        try:
+            obj = getattr(obj, part)
+        except Exception:
+            return None
+
+    # strip decorators, which would resolve to the source of the decorator
+    # possibly an upstream bug in getsourcefile, bpo-1764286
+    try:
+        unwrap = inspect.unwrap
+    except AttributeError:
+        pass
+    else:
+        obj = unwrap(obj)
+
+    try:
+        fn = inspect.getsourcefile(obj)
+    except Exception:
+        fn = None
+    if not fn:
+        return None
+
+    try:
+        source, lineno = inspect.getsourcelines(obj)
+    except Exception:
+        lineno = None
+
+    if lineno:
+        linespec = "#L%d-L%d" % (lineno, lineno + len(source) - 1)
+    else:
+        linespec = ""
+
+    fn = relpath(fn, start=dirname(aif360.__file__))
+
+    return "https://github.com/IBM/AIF360/blob/master/aif360/%s%s" % (
+           fn, linespec)
diff --git a/docs/source/modules/sklearn.rst b/docs/source/modules/sklearn.rst
index 5a9fdb15..757c0ef8 100644
--- a/docs/source/modules/sklearn.rst
+++ b/docs/source/modules/sklearn.rst
@@ -1,25 +1,209 @@
-:mod:`aif360.sklearn`
-=====================
+=======================================
+`scikit-learn`-Compatible API Reference
+=======================================
 
-.. automodule:: aif360.sklearn
+This is the class and function reference for the `scikit-learn`-compatible
+version of the AIF360 API. It is functionally equivalent to the normal API but
+it uses scikit-learn paradigms (where possible) and Pandas `DataFrames` for
+datasets. Not all functionality from AIF360 is supported yet. See
+`Getting Started <https://github.com/IBM/AIF360/aif360/sklearn/examples/Getting%20Started.ipynb>`_
+for a demo of the capabilities.
 
-Datasets
---------
+Note: This is under active development. Visit our
+`GitHub page <https://github.com/IBM/AIF360>`_ if you'd like to contribute!
 
-.. automodule:: aif360.sklearn.datasets.utils
-    :members:
 
-.. automodule:: aif360.sklearn.datasets.openml_datasets
-    :members:
+:mod:`aif360.sklearn.datasets`: Dataset loading functions
+=========================================================
 
-Metrics
+.. automodule:: aif360.sklearn.datasets
+    :no-members:
+    :no-inherited-members:
+
+Utils
+-----
+.. currentmodule:: aif360.sklearn
+
+.. autosummary::
+   :toctree: generated/
+   :template: class.rst
+   :nosignatures:
+
+   datasets.ColumnAlreadyDroppedWarning
+
+.. autosummary::
+   :toctree: generated/
+   :template: base.rst
+   :nosignatures:
+
+   datasets.check_already_dropped
+   datasets.standardize_dataset
+   datasets.to_dataframe
+
+Loaders
+-------
+
+.. autosummary::
+   :toctree: generated/
+   :template: base.rst
+   :nosignatures:
+
+   datasets.fetch_adult
+   datasets.fetch_german
+   datasets.fetch_bank
+   datasets.fetch_compas
+
+:mod:`aif360.sklearn.metrics`: Fairness metrics
+===============================================
+
+.. automodule:: aif360.sklearn.metrics
+    :no-members:
+    :no-inherited-members:
+
+Meta-metrics
+------------
+.. currentmodule:: aif360.sklearn
+
+.. autosummary::
+   :toctree: generated/
+   :template: base.rst
+   :nosignatures:
+
+   metrics.difference
+   metrics.ratio
+
+Scorers
 -------
+.. currentmodule:: aif360.sklearn
+
+.. autosummary::
+   :toctree: generated/
+   :template: base.rst
+   :nosignatures:
+
+   metrics.make_difference_scorer
+   metrics.make_ratio_scorer
+
+Generic metrics
+---------------
+.. currentmodule:: aif360.sklearn
+
+.. autosummary::
+   :toctree: generated/
+   :template: base.rst
+   :nosignatures:
+
+   metrics.specificity_score
+   metrics.sensitivity_score
+   metrics.base_rate
+   metrics.selection_rate
+   metrics.generalized_fpr
+   metrics.generalized_fnr
+
+Group fairness metrics
+----------------------
+.. currentmodule:: aif360.sklearn
+
+.. autosummary::
+   :toctree: generated/
+   :template: base.rst
+   :nosignatures:
 
-.. automodule:: aif360.sklearn.metrics.metrics
-    :members:
+   metrics.statistical_parity_difference
+   metrics.mean_difference
+   metrics.disparate_impact_ratio
+   metrics.equal_opportunity_difference
+   metrics.average_odds_difference
+   metrics.average_odds_error
+   metrics.between_group_generalized_entropy_error
 
-Preprocessing
+Individual fairness metrics
+---------------------------
+.. currentmodule:: aif360.sklearn
+
+.. autosummary::
+   :toctree: generated/
+   :template: base.rst
+   :nosignatures:
+
+   metrics.generalized_entropy_index
+   metrics.generalized_entropy_error
+   metrics.theil_index
+   metrics.coefficient_of_variation
+   metrics.consistency_score
+
+:mod:`aif360.sklearn.preprocessing`: Pre-processing Algorithms
+==============================================================
+
+.. automodule:: aif360.sklearn.preprocessing
+    :no-members:
+    :no-inherited-members:
+
+Pre-processors
+--------------
+.. currentmodule:: aif360.sklearn
+
+.. autosummary::
+   :toctree: generated/
+   :template: class.rst
+   :nosignatures:
+
+   preprocessing.Reweighing
+
+Meta-Estimator
+--------------
+.. currentmodule:: aif360.sklearn
+
+.. autosummary::
+   :toctree: generated/
+   :template: class.rst
+   :nosignatures:
+
+   preprocessing.ReweighingMeta
+
+:mod:`aif360.sklearn.inprocessing`: In-processing Algorithms
+============================================================
+
+.. automodule:: aif360.sklearn.inprocessing
+    :no-members:
+    :no-inherited-members:
+
+In-processors
 -------------
+.. currentmodule:: aif360.sklearn
+
+.. autosummary::
+   :toctree: generated/
+   :template: class.rst
+   :nosignatures:
+
+   inprocessing.AdversarialDebiasing
+
+:mod:`aif360.sklearn.postprocessing`: Post-processing Algorithms
+================================================================
+
+.. automodule:: aif360.sklearn.postprocessing
+    :no-members:
+    :no-inherited-members:
+
+Post-processors
+---------------
+.. currentmodule:: aif360.sklearn
+
+.. autosummary::
+   :toctree: generated/
+   :template: class.rst
+   :nosignatures:
+
+   postprocessing.CalibratedEqualizedOdds
+
+Meta-Estimator
+--------------
+.. currentmodule:: aif360.sklearn
+
+.. autosummary::
+   :toctree: generated/
+   :template: class.rst
+   :nosignatures:
 
-.. autoclass:: aif360.sklearn.preprocessing.reweighing.Reweighing
-    :members:
+   postprocessing.PostProcessingMeta
\ No newline at end of file
diff --git a/docs/source/modules/standard_datasets.rst b/docs/source/modules/standard_datasets.rst
index 6209bc86..3f6f5622 100644
--- a/docs/source/modules/standard_datasets.rst
+++ b/docs/source/modules/standard_datasets.rst
@@ -1,4 +1,5 @@
 .. module:: aif360.datasets
+    :noindex:
 
 Base Class
 ----------
diff --git a/docs/source/static/style.css b/docs/source/static/style.css
new file mode 100644
index 00000000..460fdebc
--- /dev/null
+++ b/docs/source/static/style.css
@@ -0,0 +1,12 @@
+/* .wy-nav-content {
+    max-width: 1000px !important;
+} */
+
+/* override table width restrictions */
+.wy-table-responsive table td, .wy-table-responsive table th {
+    white-space: normal !important;
+}
+
+.wy-table-responsive {
+    overflow: visible !important;
+}
\ No newline at end of file
diff --git a/docs/source/templates/base.rst b/docs/source/templates/base.rst
new file mode 100644
index 00000000..ba0aa5f3
--- /dev/null
+++ b/docs/source/templates/base.rst
@@ -0,0 +1,6 @@
+:mod:`{{module}}`.{{objname}}
+{{ underline }}====================
+
+.. currentmodule:: {{ module }}
+
+.. auto{{ objtype }}:: {{ objname }}
diff --git a/docs/source/templates/class.rst b/docs/source/templates/class.rst
new file mode 100644
index 00000000..5f46cabb
--- /dev/null
+++ b/docs/source/templates/class.rst
@@ -0,0 +1,29 @@
+:mod:`{{module}}`.{{objname}}
+{{ underline }}==============
+
+.. currentmodule:: {{ module }}
+
+.. autoclass:: {{ objname }}
+
+   {% block methods %}
+   {% if methods %}
+   .. rubric:: Methods
+
+   .. autosummary::
+      :nosignatures:
+   {% for item in methods %}
+      {% if item != '__init__' %}
+        ~{{ name }}.{{ item }}
+      {% endif %}
+   {%- endfor %}
+   {% endif %}
+   {% endblock %}
+
+   {% block attributes %}
+   .. rubric:: Attributes
+
+   .. autosummary::
+   {% for item in attributes %}
+      ~{{ name }}.{{ item }}
+   {%- endfor %}
+   {% endblock %}

From 0e48ead05132a66be6fab4393832e50df9bdbdf4 Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Fri, 13 Dec 2019 13:29:07 -0500
Subject: [PATCH 44/61] more gitignores

---
 .gitignore | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 5b50f9b1..b74b5f5a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,8 +5,23 @@
 .cache/
 .ipynb_checkpoints/
 .pytest_cache/
+__pycache__/
+
 .idea/
+.vscode/
+
+.eggs/
+aif360.egg-info
+build/
+dist/
+
+.coverage*
+coverage.txt
+
 docs/build/
+docs/source/modules/generated
+
+aif360/version.py
 aif360/data/raw/**
 !aif360/data/raw/*/*.md
-aif360/version.py
+aif360/sklearn/data/
\ No newline at end of file

From 0cbc3f4154a3cf877b46155972692b45763e1b88 Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Fri, 13 Dec 2019 13:52:40 -0500
Subject: [PATCH 45/61] docstrings and add alpha=sqrt(global_step) option

---
 .../inprocessing/adversarial_debiasing.py     | 154 ++++++++++++++----
 1 file changed, 124 insertions(+), 30 deletions(-)

diff --git a/aif360/sklearn/inprocessing/adversarial_debiasing.py b/aif360/sklearn/inprocessing/adversarial_debiasing.py
index 1ba8a248..66014d6c 100644
--- a/aif360/sklearn/inprocessing/adversarial_debiasing.py
+++ b/aif360/sklearn/inprocessing/adversarial_debiasing.py
@@ -10,7 +10,9 @@
 
 
 class AdversarialDebiasing(BaseEstimator, ClassifierMixin):
-    """Adversarial debiasing is an in-processing technique that learns a
+    """Debiasing with adversarial learning.
+
+    Adversarial debiasing is an in-processing technique that learns a
     classifier to maximize prediction accuracy and simultaneously reduce an
     adversary's ability to determine the protected attribute from the
     predictions [#zhang18]_. This approach leads to a fair classifier as the
@@ -18,15 +20,55 @@ class AdversarialDebiasing(BaseEstimator, ClassifierMixin):
     adversary can exploit.
 
     References:
-        .. [#zhang18] B. H. Zhang, B. Lemoine, and M. Mitchell, "Mitigating
+        .. [#zhang18] `B. H. Zhang, B. Lemoine, and M. Mitchell, "Mitigating
            Unwanted Biases with Adversarial Learning," AAAI/ACM Conference on
            Artificial Intelligence, Ethics, and Society, 2018.
+           <https://dl.acm.org/citation.cfm?id=3278779>`_
+
+    Attributes:
+        prot_attr_ (str or list(str)): Protected attribute(s) used for
+            debiasing.
+        groups_ (array, shape (n_groups,)): A list of group labels known to the
+            classifier.
+        classes_ (array, shape (n_classes,)): A list of class labels known to
+            the classifier.
+        sess_ (tensorflow.Session): The TensorFlow Session used for the
+            computations. Note: this can be manually closed to free up resources
+            with `self.sess_.close()`.
+        classifier_logits_ (tensorflow.Tensor): Tensor containing output logits
+            from the classifier.
+        adversary_logits_ (tensorflow.Tensor): Tensor containing output logits
+            from the adversary.
     """
 
     def __init__(self, prot_attr=None, scope_name='classifier',
                  adversary_loss_weight=0.1, num_epochs=50, batch_size=128,
                  classifier_num_hidden_units=200, debias=True, verbose=False,
                  random_state=None):
+        r"""
+        Args:
+            prot_attr (single label or list-like, optional): Protected
+                attribute(s) to use in the debiasing process. If more than one
+                attribute, all combinations of values (intersections) are
+                considered. Default is ``None`` meaning all protected attributes
+                from the dataset are used.
+            scope_name (str, optional): TensorFlow "variable_scope" name for the
+                entire model (classifier and adversary).
+            adversary_loss_weight (float or ``None``, optional): If ``None``,
+                this will use the suggestion from the paper:
+                :math:`\alpha = \sqrt(global_step)` with inverse time decay on
+                the learning rate. Otherwise, it uses the provided coefficient
+                with exponential learning rate decay.
+            num_epochs (int, optional): Number of epochs for which to train.
+            batch_size (int, optional): Size of mini-batch for training.
+            classifier_num_hidden_units (int, optional): Number of hidden units
+                in the classifier.
+            debias (bool, optional): If ``False``, learn a classifier without an
+                adversary.
+            verbose (bool, optional): If ``True``, print losses every 200 steps.
+            random_state (int or numpy.RandomState, optional): Seed of pseudo-
+                random number generator for shuffling data.
+        """
 
         self.prot_attr = prot_attr
         self.scope_name = scope_name
@@ -39,10 +81,20 @@ def __init__(self, prot_attr=None, scope_name='classifier',
         self.random_state = random_state
 
     def fit(self, X, y):
+        """Train the classifier and adversary (if ``debias == True``) with the
+        given training data.
+
+        Args:
+            X (array-like): Training samples.
+            y (array-like): Training labels.
+
+        Returns:
+            AdversarialDebiasing: self.
+        """
         X, y, _ = check_inputs(X, y)
         rng = check_random_state(self.random_state)
         ii32 = np.iinfo(np.int32)
-        seed1, seed2, seed3, seed4 = rng.randint(ii32.min, ii32.max, size=4)
+        s1, s2, s3, s4 = rng.randint(ii32.min, ii32.max, size=4)
 
         tf.reset_default_graph()
         self.sess_ = tf.Session()
@@ -51,7 +103,8 @@ def fit(self, X, y):
         le = LabelEncoder()
         y = le.fit_transform(y)
         self.classes_ = le.classes_
-        groups = groups.map(str)  # BUG: LabelEncoder converts to ndarray which removes tuple formatting
+        # BUG: LabelEncoder converts to ndarray which removes tuple formatting
+        groups = groups.map(str)
         groups = le.fit_transform(groups)
         self.groups_ = le.classes_
 
@@ -76,16 +129,16 @@ def fit(self, X, y):
             with tf.variable_scope('classifier_model'):
                 W1 = tf.get_variable(
                         'W1', [n_features, self.classifier_num_hidden_units],
-                        initializer=tf.initializers.glorot_uniform(seed=seed1))
-                b1 = tf.Variable(tf.zeros(shape=[self.classifier_num_hidden_units]),
-                        name='b1')
+                        initializer=tf.initializers.glorot_uniform(seed=s1))
+                b1 = tf.Variable(tf.zeros(
+                        shape=[self.classifier_num_hidden_units]), name='b1')
 
                 h1 = tf.nn.relu(tf.matmul(self.input_ph, W1) + b1)
-                h1 = tf.nn.dropout(h1, rate=1-self.keep_prob, seed=seed2)
+                h1 = tf.nn.dropout(h1, rate=1-self.keep_prob, seed=s2)
 
                 W2 = tf.get_variable(
                         'W2', [self.classifier_num_hidden_units, n_classes],
-                        initializer=tf.initializers.glorot_uniform(seed=seed3))
+                        initializer=tf.initializers.glorot_uniform(seed=s3))
                 b2 = tf.Variable(tf.zeros(shape=[n_classes]), name='b2')
 
                 self.classifier_logits_ = tf.matmul(h1, W2) + b2
@@ -110,12 +163,12 @@ def fit(self, X, y):
                     s = tf.sigmoid((1 + tf.abs(c)) * self.classifier_logits_)
 
                     W2 = tf.get_variable('W2', [3, n_groups],
-                            initializer=tf.initializers.glorot_uniform(seed=seed4))
+                            initializer=tf.initializers.glorot_uniform(seed=s4))
                     b2 = tf.Variable(tf.zeros(shape=[n_groups]), name='b2')
 
                     self.adversary_logits_ = tf.matmul(
                             tf.concat([s, s * self.true_labels_ph,
-                                       s * (1.0 - self.true_labels_ph)], axis=1),
+                                       s * (1. - self.true_labels_ph)], axis=1),
                             W2) + b2
 
                 # Obtain adversary loss
@@ -131,10 +184,14 @@ def fit(self, X, y):
                                                               tf.int32)),
                                     logits=self.adversary_logits_))
 
-            global_step = tf.train.get_or_create_global_step()
-            starter_learning_rate = 0.001
-            learning_rate = tf.train.exponential_decay(starter_learning_rate,
+            global_step = tf.Variable(0., trainable=False)
+            init_learning_rate = 0.001
+            if self.adversary_loss_weight is not None:
+                learning_rate = tf.train.exponential_decay(init_learning_rate,
                     global_step, 1000, 0.96, staircase=True)
+            else:
+                learning_rate = tf.train.inverse_time_decay(init_learning_rate,
+                        global_step, 1000, 0.1, staircase=True)
 
             # Setup optimizers
             clf_opt = tf.train.AdamOptimizer(learning_rate)
@@ -153,15 +210,20 @@ def fit(self, X, y):
             normalize = lambda x: x / (tf.norm(x) + np.finfo(np.float32).tiny)
 
             clf_grads = []
-            for (grad, var) in clf_opt.compute_gradients(clf_loss, var_list=clf_vars):
+            for (grad, var) in clf_opt.compute_gradients(clf_loss,
+                                                         var_list=clf_vars):
                 if self.debias:
                     unit_adv_grad = normalize(adv_grads[var])
                     # proj_{adv_grad} clf_grad:
                     grad -= tf.reduce_sum(grad * unit_adv_grad) * unit_adv_grad
-                    grad -= self.adversary_loss_weight * adv_grads[var]
+                    if self.adversary_loss_weight is not None:
+                        grad -= self.adversary_loss_weight * adv_grads[var]
+                    else:
+                        grad -= tf.sqrt(global_step) * adv_grads[var]
                 clf_grads.append((grad, var))
 
-            clf_min = clf_opt.apply_gradients(clf_grads, global_step=global_step)
+            clf_min = clf_opt.apply_gradients(clf_grads,
+                                              global_step=global_step)
             if self.debias:
                 with tf.control_dependencies([clf_min]):
                     adv_min = adv_opt.minimize(adv_loss, var_list=adv_vars)
@@ -182,26 +244,37 @@ def fit(self, X, y):
                                        self.prot_attr_ph: batch_prot_attr,
                                        self.keep_prob: 0.8}
                     if self.debias:
-                        _, _, clf_loss_value, adv_loss_value = (
-                                self.sess_.run([clf_min, adv_min,
-                                               clf_loss, adv_loss],
-                                               feed_dict=batch_feed_dict))
+                        _, _, clf_loss_val, adv_loss_val = self.sess_.run(
+                                [clf_min, adv_min, clf_loss, adv_loss],
+                                feed_dict=batch_feed_dict)
+
                         if i % 200 == 0 and self.verbose:
-                            print("epoch {}; iter: {}; batch classifier loss: "
-                                  "{}; batch adversarial loss: {}".format(
-                                          epoch, i, clf_loss_value,
-                                          adv_loss_value))
+                            print("epoch {:>3d}; iter: {:>4d}; batch classifier"
+                                  " loss: {:.4f}; batch adversarial loss: "
+                                  "{:.4f}".format(epoch, i, clf_loss_val,
+                                                  adv_loss_val))
                     else:
-                        _, clf_loss_value = self.sess_.run(
-                                [clf_min, clf_loss],
+                        _, clf_loss_val = self.sess_.run([clf_min, clf_loss],
                                 feed_dict=batch_feed_dict)
+
                         if i % 200 == 0 and self.verbose:
-                            print("epoch {}; iter: {}; batch classifier loss: "
-                                  "{}".format(epoch, i, clf_loss_value))
+                            print("epoch {:>3d}; iter: {:>4d}; batch classifier"
+                                  " loss: {:.4f}".format(epoch, i,
+                                                         clf_loss_val))
 
         return self
 
     def decision_function(self, X):
+        """Soft prediction scores.
+
+        Args:
+            X (array-like): Test samples.
+
+        Returns:
+            numpy.ndarray: Confidence scores per (sample, class) combination. In
+            the binary case, confidence score for ``self.classes_[1]`` where >0
+            means this class would be predicted.
+        """
         check_is_fitted(self, ['classes_', 'input_ph', 'keep_prob',
                                'classifier_logits_'])
         n_samples = X.shape[0]
@@ -224,12 +297,25 @@ def decision_function(self, X):
                                self.keep_prob: 1.0}
 
             scores[batch_ids] = self.sess_.run(self.classifier_logits_,
-                                              feed_dict=batch_feed_dict)
+                                               feed_dict=batch_feed_dict)
             samples_covered += len(batch_features)
 
         return scores.ravel() if scores.shape[1] == 1 else scores
 
     def predict_proba(self, X):
+        """Probability estimates.
+
+        The returned estimates for all classes are ordered by the label of
+        classes.
+
+        Args:
+            X (array-like): Test samples.
+
+        Returns:
+            numpy.ndarray: Returns the probability of the sample for each class
+            in the model, where classes are ordered as they are in
+            ``self.classes_``.
+        """
         decision = self.decision_function(X)
 
         if decision.ndim == 1:
@@ -239,6 +325,14 @@ def predict_proba(self, X):
         return scipy.special.softmax(decision_2d, axis=1)
 
     def predict(self, X):
+        """Predict class labels for the given samples.
+
+        Args:
+            X (array-like): Test samples.
+
+        Returns:
+            numpy.ndarray: Predicted class label per sample.
+        """
         scores = self.decision_function(X)
         if scores.ndim == 1:
             indices = (scores > 0).astype(np.int)

From 8be64498e13e1dba3ffcbb44dd29b3c52a99115e Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Fri, 13 Dec 2019 13:57:27 -0500
Subject: [PATCH 46/61] docstrings and input is now predict_proba output

also added score function (compute weighted cost)
---
 aif360/sklearn/postprocessing/__init__.py     | 127 ++++++++++--
 .../calibrated_equalized_odds.py              | 189 +++++++++++++-----
 .../tests/test_calibrated_equalized_odds.py   |   6 +-
 3 files changed, 247 insertions(+), 75 deletions(-)

diff --git a/aif360/sklearn/postprocessing/__init__.py b/aif360/sklearn/postprocessing/__init__.py
index 49e89d42..acc63020 100644
--- a/aif360/sklearn/postprocessing/__init__.py
+++ b/aif360/sklearn/postprocessing/__init__.py
@@ -1,3 +1,7 @@
+"""
+Post-processing algorithms modify predictions to be more fair (predictions in,
+predictions out).
+"""
 from logging import warning
 
 import numpy as np
@@ -11,7 +15,16 @@
 
 
 class PostProcessingMeta(BaseEstimator, MetaEstimatorMixin):
-    """
+    """A meta-estimator which wraps a given estimator with a post-processing
+    step.
+
+    The post-processor trains on a separate training set from the estimator to
+    prevent leakage.
+
+    Note:
+        Because of the dataset splitting, if a Pipeline is necessary it should
+        be used as the input to this meta-estimator not the other way around.
+
     Attributes:
         estimator_: Cloned ``estimator``.
         postprocessor_: Cloned ``postprocessor``.
@@ -40,6 +53,7 @@ def __init__(self, estimator, postprocessor=CalibratedEqualizedOdds(),
         """
         self.estimator = estimator
         self.postprocessor = postprocessor
+        self.use_proba = use_proba
         self.val_size = val_size
         self.options = options
 
@@ -47,9 +61,26 @@ def __init__(self, estimator, postprocessor=CalibratedEqualizedOdds(),
     def _estimator_type(self):
         return self.postprocessor._estimator_type
 
-    def fit(self, X, y, pos_label=1, sample_weight=None):
-        self.pos_label_ = pos_label
-        self.use_proba_ = isinstance(self.postprocessor, CalibratedEqualizedOdds)
+    def fit(self, X, y, sample_weight=None, **fit_params):
+        """Splits the training samples with
+        :func:`~sklearn.model_selection.train_test_split` and uses the resultant
+        'train' portion to train the estimator. Then the estimator predicts on
+        the 'test' portion of the split data and the post-processor is trained
+        with those prediction-ground-truth target pairs.
+
+        Args:
+            X (array-like): Training samples.
+            y (array-like): Training labels.
+            sample_weight (array-like, optional): Sample weights.
+            **fit_params: Parameters passed to the post-processor ``fit``
+                method. Note: these do not need to be prefixed with ``__``
+                notation.
+
+        Returns:
+            PostProcessingMeta: self.
+        """
+        self.use_proba_ = (self.use_proba if self.use_proba is not None else
+                isinstance(self.postprocessor, CalibratedEqualizedOdds))
         if self.use_proba_ and not hasattr(self.estimator, 'predict_proba'):
             raise TypeError("`estimator` (type: {}) does not implement method "
                             "`predict_proba()`.".format(type(self.estimator)))
@@ -72,48 +103,100 @@ def fit(self, X, y, pos_label=1, sample_weight=None):
             X_est, X_post, y_est, y_post = train_test_split(X, y, **options_)
             self.estimator_.fit(X_est, y_est)
 
-        pos_idx = np.nonzero(self.estimator_.classes_ == pos_label)[0][0]
         y_pred = (self.estimator_.predict(X_post) if not self.use_proba_ else
-                  self.estimator_.predict_proba(X_post)[:, pos_idx])
-        self.postprocessor_.fit(y_post, y_pred, pos_label=pos_label,
-                sample_weight=None if sample_weight is None else sw_post)
+                  self.estimator_.predict_proba(X_post))
+        # fit_params = fit_params.copy().update(labels=self.estimator_.classes_)
+        self.postprocessor_.fit(y_pred, y_post, sample_weight=sw_post
+                                if sample_weight is not None else None,
+                                **fit_params)
         return self
 
-    @property
-    def classes_(self):
-        # order of postprocessor.classes_ may differ from estimator_.classes_
-        check_is_fitted(self.postprocessor_, 'classes_')
-        return self.postprocessor_.classes_
-
     @if_delegate_has_method('postprocessor_')
     def predict(self, X):
-        pos_idx = np.nonzero(self.estimator_.classes_ == self.pos_label_)[0][0]
+        """Predict class labels for the given samples.
+
+        First, runs ``self.estimator_.predict`` (or ``predict_proba`` if
+        ``self.use_proba_`` is ``True``) then returns the post-processed output
+        from those predictions.
+
+        Args:
+            X (array-like): Test samples.
+
+        Returns:
+            numpy.ndarray: Predicted class label per sample.
+        """
         y_pred = (self.estimator_.predict(X) if not self.use_proba_ else
-                  self.estimator_.predict_proba(X)[:, pos_idx])
+                  self.estimator_.predict_proba(X))
         y_pred = pd.Series(y_pred, index=X.index)
         return self.postprocessor_.predict(y_pred)
 
     @if_delegate_has_method('postprocessor_')
     def predict_proba(self, X):
-        pos_idx = np.nonzero(self.estimator_.classes_ == self.pos_label_)[0][0]
+        """Probability estimates.
+
+        First, runs ``self.estimator_.predict`` (or ``predict_proba`` if
+        ``self.use_proba_`` is ``True``) then returns the post-processed output
+        from those predictions.
+
+        The returned estimates for all classes are ordered by the label of
+        classes.
+
+        Args:
+            X (array-like): Test samples.
+
+        Returns:
+            numpy.ndarray: Returns the probability of the sample for each class
+            in the model, where classes are ordered as they are in
+            ``self.classes_``.
+        """
         y_pred = (self.estimator_.predict(X) if not self.use_proba_ else
-                  self.estimator_.predict_proba(X)[:, pos_idx])
+                  self.estimator_.predict_proba(X))
         y_pred = pd.Series(y_pred, index=X.index)
         return self.postprocessor_.predict_proba(y_pred)
 
     @if_delegate_has_method('postprocessor_')
     def predict_log_proba(self, X):
-        pos_idx = np.nonzero(self.estimator_.classes_ == self.pos_label_)[0][0]
+        """Log of probability estimates.
+
+        First, runs ``self.estimator_.predict`` (or ``predict_proba`` if
+        ``self.use_proba_`` is ``True``) then returns the post-processed output
+        from those predictions.
+
+        The returned estimates for all classes are ordered by the label of
+        classes.
+
+        Args:
+            X (array-like): Test samples.
+
+        Returns:
+            array: Returns the log-probability of the sample for each class in
+            the model, where classes are ordered as they are in
+            ``self.classes_``.
+        """
         y_pred = (self.estimator_.predict(X) if not self.use_proba_ else
-                  self.estimator_.predict_proba(X)[:, pos_idx])
+                  self.estimator_.predict_proba(X))
         y_pred = pd.Series(y_pred, index=X.index)
         return self.postprocessor_.predict_log_proba(y_pred)
 
     @if_delegate_has_method('postprocessor_')
     def score(self, X, y, sample_weight=None):
-        pos_idx = np.nonzero(self.estimator_.classes_ == self.pos_label_)[0][0]
+        """Returns the output of the post-processor's score function on the
+        given test data and labels.
+
+        First, runs ``self.estimator_.predict`` (or ``predict_proba`` if
+        ``self.use_proba_`` is ``True``) then gets the post-processed output
+        from those predictions and scores it.
+
+        Args:
+            X (array-like): Test samples.
+            y (array-like): True labels for ``X``.
+            sample_weight (array-like, optional): Sample weights.
+
+        Returns:
+            float: Score value.
+        """
         y_pred = (self.estimator_.predict(X) if not self.use_proba_ else
-                  self.estimator_.predict_proba(X)[:, pos_idx])
+                  self.estimator_.predict_proba(X))
         y_pred = pd.Series(y_pred, index=X.index)
         return self.postprocessor_.score(y_pred, y, sample_weight=sample_weight)
 
diff --git a/aif360/sklearn/postprocessing/calibrated_equalized_odds.py b/aif360/sklearn/postprocessing/calibrated_equalized_odds.py
index 143ed423..088a84a3 100644
--- a/aif360/sklearn/postprocessing/calibrated_equalized_odds.py
+++ b/aif360/sklearn/postprocessing/calibrated_equalized_odds.py
@@ -1,16 +1,24 @@
 import numpy as np
 from sklearn.base import BaseEstimator, ClassifierMixin
 from sklearn.utils import check_random_state
+from sklearn.utils.validation import check_is_fitted
 
-from aif360.sklearn.metrics import base_rate, generalized_fnr, generalized_fpr
+from aif360.sklearn.metrics import difference, base_rate
+from aif360.sklearn.metrics import generalized_fnr, generalized_fpr
 from aif360.sklearn.utils import check_groups
 
 
 class CalibratedEqualizedOdds(BaseEstimator, ClassifierMixin):
-    """Calibrated equalized odds postprocessing is a post-processing technique
-    that optimizes over calibrated classifier score outputs to find
-    probabilities with which to change output labels with an equalized odds
-    objective [#pleiss17]_.
+    """Calibrated equalized odds post-processor.
+
+    Calibrated equalized odds is a post-processing technique that optimizes over
+    calibrated classifier score outputs to find probabilities with which to
+    change output labels with an equalized odds objective [#pleiss17]_.
+
+    Note:
+        This breaks the sckit-learn API by requiring fit params ``y_true``,
+        ``y_pred``, and ``pos_label`` and predict param ``y_pred``. See
+        :class:`PostProcessingMeta` for a workaround.
 
     References:
         .. [#pleiss17] `G. Pleiss, M. Raghavan, F. Wu, J. Kleinberg, and
@@ -20,78 +28,125 @@ class CalibratedEqualizedOdds(BaseEstimator, ClassifierMixin):
 
     Adapted from:
     https://github.com/gpleiss/equalized_odds_and_calibration/blob/master/calib_eq_odds.py
+
+    Attributes:
+        prot_attr_ (str or list(str)): Protected attribute(s) used for post-
+            processing.
+        groups_ (array, shape (2,)): A list of group labels known to the
+            classifier. Note: this algorithm require a binary division of the
+            data.
+        classes_ (array, shape (num_classes,)): A list of class labels known to
+            the classifier. Note: this algorithm treats all non-positive
+            outcomes as negative (binary classification only).
+        pos_label_ (scalar): The label of the positive class.
+        mix_rates_ (array, shape (2,)): The interpolation parameters -- the
+            probability of randomly returning the group's base rate. The group
+            for which the cost function is higher is set to 0.
     """
     def __init__(self, prot_attr=None, cost_constraint='weighted',
                  random_state=None):
         """
         Args:
             prot_attr (single label or list-like, optional): Protected
-                attribute(s) to use as sensitive attribute(s) in the post-
-                processing. If more than one attribute, all combinations of
-                values (intersections) are considered. Default is ``None``
-                meaning all protected attributes from the dataset are used.
-                Note: This algorithm requires there be exactly 2 groups
-                (privileged and unprivileged).
-            cost_constraint ('fpr', 'fnr', or 'weighted'):
-            random_state (int or numpy.RandomState, optional):
+                attribute(s) to use in the post-processing. If more than one
+                attribute, all combinations of values (intersections) are
+                considered. Default is ``None`` meaning all protected attributes
+                from the dataset are used. Note: This algorithm requires there
+                be exactly 2 groups (privileged and unprivileged).
+            cost_constraint ('fpr', 'fnr', or 'weighted'): Which equal-cost
+                constraint to satisfy: generalized false positive rate ('fpr'),
+                generalized false negative rate ('fnr'), or a weighted
+                combination of both ('weighted').
+            random_state (int or numpy.RandomState, optional): Seed of pseudo-
+                random number generator for shuffling data.
         """
         self.prot_attr = prot_attr
         self.cost_constraint = cost_constraint
         self.random_state = random_state
 
-    def fit(self, y_true, y_pred, pos_label=1, sample_weight=None):
+    def _weighted_cost(self, y_true, probas_pred, pos_label, sample_weight):
+        """Evaluates the cost function specified by ``self.cost_constraint``."""
+        fpr = generalized_fpr(y_true, probas_pred, pos_label, sample_weight)
+        fnr = generalized_fnr(y_true, probas_pred, pos_label, sample_weight)
+        br = base_rate(y_true, probas_pred, pos_label, sample_weight)
+        if self.cost_constraint == 'fpr':
+            return fpr
+        elif self.cost_constraint == 'fnr':
+            return fnr
+        elif self.cost_constraint == 'weighted':
+            return fpr * (1 - br) + fnr * br
+        else:
+            raise ValueError("`cost_constraint` must be one of: 'fpr', 'fnr', "
+                             "or 'weighted'")
+
+    def fit(self, y_pred, y_true, labels=None, pos_label=1, sample_weight=None):
+        """Compute the mixing rates required to satisfy the cost constraint.
+
+        Args:
+            y_pred (array-like): Probability estimates of the targets as
+                returned by a ``predict_proba()`` call or equivalent.
+            y_true (array-like): Ground-truth (correct) target values.
+            labels (list, optional): The ordered set of labels values. Must
+                match the order of columns in ``y_pred`` if provided. By
+                default, all labels in ``y_true`` are used in sorted order.
+            pos_label (scalar, optional): The label of the positive class.
+            sample_weight (array-like, optional): Sample weights.
+
+        Returns:
+            CalibratedEqualizedOdds: self.
+        """
         groups, self.prot_attr_ = check_groups(y_true, self.prot_attr)
-        self.classes_ = np.unique(y_true)
+        self.classes_ = labels if labels is not None else np.unique(y_true)
         self.groups_ = np.unique(groups)
+        self.pos_label_ = pos_label
+
+        if len(self.classes_) > 2:
+            raise ValueError('Only binary classification is supported.')
 
         if pos_label not in self.classes_:
-            raise ValueError('pos_label={} is not present in y_true. The valid '
-                             'values are:\n{}'.format(pos_label, self.classes_))
+            raise ValueError('pos_label={} is not in the set of labels. The '
+                    'valid values are:\n{}'.format(pos_label, self.classes_))
 
         if len(self.groups_) != 2:
             raise ValueError('prot_attr={}\nyielded {} groups:\n{}\nbut this '
-                             'algorithm requires a binary division of the '
-                             'data.'.format(self.prot_attr_, len(self.groups_),
-                                            self.groups_))
+                    'algorithm requires a binary division of the data.'.format(
+                            self.prot_attr_, len(self.groups_), self.groups_))
 
-        # ensure self.classes_ = [neg_label, pos_label]
-        self.classes_ = np.append(np.delete(self.classes_, pos_label),
-                                  pos_label)
+        y_pred = y_pred[:, np.nonzero(self.classes_ == self.pos_label_)[0][0]]
 
-        def args(grp_idx, triv=False):
-            i = (groups == self.groups_[grp_idx])
+        # local function to return corresponding args for metric evaluation
+        def _args(grp_idx, triv=False):
+            idx = (groups == self.groups_[grp_idx])
             pred = (np.full_like(y_pred, self.base_rates_[grp_idx]) if triv else
                     y_pred)
-            return dict(y_true=y_true[i], y_pred=pred[i], pos_label=pos_label,
-                        sample_weight=None if sample_weight is None
-                                      else sample_weight[i])
-
-        self.base_rates_ = [base_rate(**args(i)) for i in range(2)]
-
-        def weighted_cost(grp_idx, triv=False):
-            fpr = generalized_fpr(**args(grp_idx, triv=triv))
-            fnr = generalized_fnr(**args(grp_idx, triv=triv))
-            base_rate = self.base_rates_[grp_idx]
-            if self.cost_constraint == 'fpr':
-                return fpr
-            elif self.cost_constraint == 'fnr':
-                return fnr
-            elif self.cost_constraint == 'weighted':
-                return fpr * (1 - base_rate) + fnr * base_rate
-            else:
-                raise ValueError("`cost_constraint` must be one of: 'fpr', "
-                                 "'fnr', or 'weighted'")
-
-        costs = [weighted_cost(i) for i in range(2)]
+            return [y_true[idx], pred[idx], pos_label,
+                    sample_weight[idx] if sample_weight is not None else None]
+
+        self.base_rates_ = [base_rate(*_args(i)) for i in range(2)]
+
+        costs = [self._weighted_cost(*_args(i)) for i in range(2)]
         self.mix_rates_ = [(costs[1] - costs[0])
-                           / (weighted_cost(0, triv=True) - costs[0]),
+                         / (self._weighted_cost(*_args(0, True)) - costs[0]),
                            (costs[0] - costs[1])
-                           / (weighted_cost(1, triv=True) - costs[1])]
+                         / (self._weighted_cost(*_args(1, True)) - costs[1])]
         self.mix_rates_[np.argmax(costs)] = 0
 
         return self
 
     def predict_proba(self, y_pred):
+        """The returned estimates for all classes are ordered by the label of
+        classes.
+
+        Args:
+            y_pred (array-like): Probability estimates of the targets as
+                returned by a ``predict_proba()`` call or equivalent.
+
+        Returns:
+            numpy.ndarray: Returns the probability of the sample for each class
+            in the model, where classes are ordered as they are in
+            ``self.classes_``.
+        """
+        check_is_fitted(self, 'mix_rates_')
         rng = check_random_state(self.random_state)
 
         groups, _ = check_groups(y_pred, self.prot_attr_)
@@ -100,6 +155,9 @@ def predict_proba(self, y_pred):
                              'match those from the training set:\n{}'.format(
                                      np.unique(groups), self.groups_))
 
+        pos_idx = np.nonzero(self.classes_ == self.pos_label_)[0][0]
+        y_pred = y_pred[:, pos_idx]
+
         yt = np.empty_like(y_pred)
         for grp_idx in range(2):
             i = (groups == self.groups_[grp_idx])
@@ -108,8 +166,39 @@ def predict_proba(self, y_pred):
             new_preds[to_replace] = self.base_rates_[grp_idx]
             yt[i] = new_preds
 
-        return np.stack([1 - yt, yt], axis=-1)
+        return np.c_[1 - yt, yt] if pos_idx == 1 else np.c_[yt, 1 - yt]
 
     def predict(self, y_pred):
+        """Predict class labels for the given scores.
+
+        Args:
+            y_pred (array-like): Probability estimates of the targets as
+                returned by a ``predict_proba()`` call or equivalent.
+
+        Returns:
+            numpy.ndarray: Predicted class label per sample.
+        """
         scores = self.predict_proba(y_pred)
-        return self.classes_[scores.argmax(axis=1)]
+        return self.classes[scores.argmax(axis=1)]
+
+    def score(self, y_pred, y_true, sample_weight=None):
+        """Score the predictions according to the cost constraint specified.
+
+        Args:
+            y_pred (array-like): Probability estimates of the targets as
+                returned by a ``predict_proba()`` call or equivalent.
+            y_true (array-like): Ground-truth (correct) target values.
+            sample_weight (array-like, optional): Sample weights.
+
+        Returns:
+            float: Absolute value of the difference in cost function for the two
+            groups (e.g. :func:`~aif360.sklearn.metrics.generalized_fpr` if
+            ``self.cost_constraint`` is 'fpr')
+        """
+        check_is_fitted(self, ['classes_', 'pos_label_'])
+        pos_idx = np.nonzero(self.classes_ == self.pos_label_)[0][0]
+        probas_pred = self.predict_proba(y_pred)[:, pos_idx]
+
+        return abs(difference(self._weighted_cost, y_true, probas_pred,
+                prot_attr=self.prot_attr_, priv_group=self.groups_[1],
+                sample_weight=sample_weight))
diff --git a/aif360/sklearn/tests/test_calibrated_equalized_odds.py b/aif360/sklearn/tests/test_calibrated_equalized_odds.py
index f1a6f3b3..1cba4391 100644
--- a/aif360/sklearn/tests/test_calibrated_equalized_odds.py
+++ b/aif360/sklearn/tests/test_calibrated_equalized_odds.py
@@ -15,14 +15,14 @@
 
 def test_calib_eq_odds_sex():
     logreg = LogisticRegression(solver='lbfgs', max_iter=500)
-    y_pred = logreg.fit(X, y, sample_weight=sample_weight).predict_proba(X)[:, 1]
+    y_pred = logreg.fit(X, y, sample_weight=sample_weight).predict_proba(X)
     adult_pred = adult.copy()
-    adult_pred.scores = y_pred
+    adult_pred.scores = y_pred[:, 1]
     orig_cal_eq_odds = CalibratedEqOddsPostprocessing(
             unprivileged_groups=[{'sex': 0}], privileged_groups=[{'sex': 1}])
     orig_cal_eq_odds.fit(adult, adult_pred)
     cal_eq_odds = CalibratedEqualizedOdds('sex')
-    cal_eq_odds.fit(y, y_pred, sample_weight=sample_weight)
+    cal_eq_odds.fit(y_pred, y, sample_weight=sample_weight)
 
     assert np.isclose(orig_cal_eq_odds.priv_mix_rate, cal_eq_odds.mix_rates_[1])
     assert np.isclose(orig_cal_eq_odds.unpriv_mix_rate, cal_eq_odds.mix_rates_[0])

From 994bdf0457a90def05db458f15bcc3d37a54d4ff Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Wed, 18 Dec 2019 15:00:59 -0500
Subject: [PATCH 47/61] moved tests to main test folder

---
 .travis.yml                                                     | 2 +-
 .../tests => tests/sklearn}/test_adversarial_debiasing.py       | 0
 .../tests => tests/sklearn}/test_calibrated_equalized_odds.py   | 0
 {aif360/sklearn/tests => tests/sklearn}/test_datasets.py        | 0
 {aif360/sklearn/tests => tests/sklearn}/test_metrics.py         | 0
 {aif360/sklearn/tests => tests/sklearn}/test_reweighing.py      | 0
 6 files changed, 1 insertion(+), 1 deletion(-)
 rename {aif360/sklearn/tests => tests/sklearn}/test_adversarial_debiasing.py (100%)
 rename {aif360/sklearn/tests => tests/sklearn}/test_calibrated_equalized_odds.py (100%)
 rename {aif360/sklearn/tests => tests/sklearn}/test_datasets.py (100%)
 rename {aif360/sklearn/tests => tests/sklearn}/test_metrics.py (100%)
 rename {aif360/sklearn/tests => tests/sklearn}/test_reweighing.py (100%)

diff --git a/.travis.yml b/.travis.yml
index fdfa087e..a9c99eda 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -28,4 +28,4 @@ before_script:
 script:
   # stop the build if there are Python syntax errors or undefined names
   - flake8 . --count --select=E901,E999,F821,F822,F823 --show-source --statistics
-  - travis_wait python -m pytest aif360/sklearn/tests
+  - travis_wait python -m pytest tests/sklearn
diff --git a/aif360/sklearn/tests/test_adversarial_debiasing.py b/tests/sklearn/test_adversarial_debiasing.py
similarity index 100%
rename from aif360/sklearn/tests/test_adversarial_debiasing.py
rename to tests/sklearn/test_adversarial_debiasing.py
diff --git a/aif360/sklearn/tests/test_calibrated_equalized_odds.py b/tests/sklearn/test_calibrated_equalized_odds.py
similarity index 100%
rename from aif360/sklearn/tests/test_calibrated_equalized_odds.py
rename to tests/sklearn/test_calibrated_equalized_odds.py
diff --git a/aif360/sklearn/tests/test_datasets.py b/tests/sklearn/test_datasets.py
similarity index 100%
rename from aif360/sklearn/tests/test_datasets.py
rename to tests/sklearn/test_datasets.py
diff --git a/aif360/sklearn/tests/test_metrics.py b/tests/sklearn/test_metrics.py
similarity index 100%
rename from aif360/sklearn/tests/test_metrics.py
rename to tests/sklearn/test_metrics.py
diff --git a/aif360/sklearn/tests/test_reweighing.py b/tests/sklearn/test_reweighing.py
similarity index 100%
rename from aif360/sklearn/tests/test_reweighing.py
rename to tests/sklearn/test_reweighing.py

From 372e1116e0f458ea027e94900704acab2be31ad9 Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Thu, 19 Dec 2019 11:53:00 -0500
Subject: [PATCH 48/61] more docs and formatting changes

---
 aif360/datasets/structured_dataset.py         | 23 ++++--
 aif360/sklearn/datasets/compas_dataset.py     | 17 ++--
 aif360/sklearn/datasets/openml_datasets.py    | 11 ++-
 aif360/sklearn/datasets/utils.py              | 10 +--
 .../inprocessing/adversarial_debiasing.py     | 10 +--
 aif360/sklearn/metrics/metrics.py             | 58 +++++++-------
 aif360/sklearn/preprocessing/reweighing.py    | 15 ++--
 aif360/sklearn/utils.py                       | 35 +++++++--
 docs/source/conf.py                           |  4 +-
 docs/source/index.rst                         | 10 ++-
 docs/source/modules/algorithms.rst            | 78 +++++++++++++++----
 docs/source/modules/datasets.rst              | 44 ++++++-----
 docs/source/modules/explainers.rst            | 21 ++---
 docs/source/modules/inprocessing.rst          | 35 ---------
 docs/source/modules/metrics.rst               | 53 +++++++------
 docs/source/modules/postprocessing.rst        | 22 ------
 docs/source/modules/preprocessing.rst         | 30 -------
 docs/source/modules/sklearn.rst               | 42 +++++-----
 docs/source/modules/standard_datasets.rst     | 32 --------
 docs/source/static/style.css                  |  6 +-
 20 files changed, 271 insertions(+), 285 deletions(-)
 delete mode 100644 docs/source/modules/inprocessing.rst
 delete mode 100644 docs/source/modules/postprocessing.rst
 delete mode 100644 docs/source/modules/preprocessing.rst
 delete mode 100644 docs/source/modules/standard_datasets.rst

diff --git a/aif360/datasets/structured_dataset.py b/aif360/datasets/structured_dataset.py
index c36e2308..3f03b94a 100644
--- a/aif360/datasets/structured_dataset.py
+++ b/aif360/datasets/structured_dataset.py
@@ -411,14 +411,25 @@ def import_dataset(self, import_metadata=False):
         return None
 
     def split(self, num_or_size_splits, shuffle=False, seed=None):
-        """Split the dataset into multiple datasets
+        """Split this dataset into multiple partitions.
+
         Args:
-            num_or_size_splits (list or int):
-            shuffle (bool):
-            seed (int or array_like): takes the same argument as `numpy.random.seed()`
-            function
+            num_or_size_splits (array or int): If `num_or_size_splits` is an
+                int, *k*, the value is the number of equal-sized folds to make
+                (if *k* does not evenly divide the dataset these folds are
+                approximately equal-sized). If `num_or_size_splits` is an array
+                of type int, the values are taken as the indices at which to
+                split the dataset. If the values are floats (< 1.), they are
+                considered to be fractional proportions of the dataset at which
+                to split.
+            shuffle (bool, optional): Randomly shuffle the dataset before
+                splitting.
+            seed (int or array_like): Takes the same argument as
+                :func:`numpy.random.seed()`.
+
         Returns:
-            list: Each element of this list is a dataset obtained during the split
+            list: Splits. Contains *k* or `len(num_or_size_splits) + 1`
+            datasets depending on `num_or_size_splits`.
         """
 
         # Set seed
diff --git a/aif360/sklearn/datasets/compas_dataset.py b/aif360/sklearn/datasets/compas_dataset.py
index 76a0d9df..81578ef5 100644
--- a/aif360/sklearn/datasets/compas_dataset.py
+++ b/aif360/sklearn/datasets/compas_dataset.py
@@ -17,11 +17,11 @@ def fetch_compas(data_home=None, binary_race=False,
                  dropcols=[], numeric_only=False, dropna=True):
     """Load the COMPAS Recidivism Risk Scores dataset.
 
-    Optionally binarizes 'race' to 'Caucasian' (privileged) or 'African-American'
-    (unprivileged). The other protected attribute is 'sex' ('Male' is
-    *unprivileged* and 'Female' is *privileged*). The outcome variable is
-    'no recid.' (favorable) if the person was not accused of a crime within two
-    years or 'did recid.' (unfavorable) if they were.
+    Optionally binarizes 'race' to 'Caucasian' (privileged) or
+    'African-American' (unprivileged). The other protected attribute is 'sex'
+    ('Male' is *unprivileged* and 'Female' is *privileged*). The outcome
+    variable is 'no recid.' (favorable) if the person was not accused of a crime
+    within two years or 'did recid.' (unfavorable) if they were.
 
     Args:
         data_home (string, optional): Specify another download and cache folder
@@ -59,11 +59,14 @@ def fetch_compas(data_home=None, binary_race=False,
     for col in ['sex', 'age_cat', 'race', 'c_charge_degree', 'c_charge_desc']:
         df[col] = df[col].astype('category')
 
-    df.two_year_recid = df.two_year_recid.replace({0: 'no recid.', 1: 'did recid.'}).astype('category').cat.as_ordered()  # 'did recid' < 'no recid'
+    # 'did recid' < 'no recid'
+    df.two_year_recid = df.two_year_recid.replace({0: 'no recid.',
+            1: 'did recid.'}).astype('category').cat.as_ordered()
 
     if binary_race:
+        # 'African-American' < 'Caucasian'
         df.race = df.race.cat.set_categories(['African-American', 'Caucasian'],
-                                             ordered=True)  # 'African-American' < 'Caucasian'
+                                             ordered=True)
 
     df.sex = df.sex.astype('category').cat.as_ordered()  # 'Female' < 'Male'
 
diff --git a/aif360/sklearn/datasets/openml_datasets.py b/aif360/sklearn/datasets/openml_datasets.py
index 6decfcb7..1bfa24e7 100644
--- a/aif360/sklearn/datasets/openml_datasets.py
+++ b/aif360/sklearn/datasets/openml_datasets.py
@@ -99,10 +99,9 @@ def fetch_german(data_home=None, binary_age=True, usecols=[], dropcols=[],
 
     Protected attributes are 'sex' ('male' is privileged and 'female' is
     unprivileged) and 'age' (binarized by default as recommended by
-    [#kamiran09]_: ``age >= 25`` is considered privileged and ``age < 25`` is
-    considered unprivileged; see the ``binary_age`` flag to keep this
-    continuous). The outcome variable is 'credit-risk': 'good' (favorable) or
-    'bad' (unfavorable).
+    [#kamiran09]_: age >= 25 is considered privileged and age < 25 is considered
+    unprivileged; see the binary_age flag to keep this continuous). The outcome
+    variable is 'credit-risk': 'good' (favorable) or 'bad' (unfavorable).
 
     References:
         .. [#kamiran09] `F. Kamiran and T. Calders, "Classifying without
@@ -115,8 +114,8 @@ def fetch_german(data_home=None, binary_age=True, usecols=[], dropcols=[],
             for the datasets. By default all AIF360 datasets are stored in
             'aif360/sklearn/data/raw' subfolders.
         binary_age (bool, optional): If ``True``, split protected attribute,
-            ``age``, into 'aged' (privileged) and 'youth' (unprivileged). The
-            ``age`` feature remains continuous.
+            'age', into 'aged' (privileged) and 'youth' (unprivileged). The
+            'age' feature remains continuous.
         usecols (single label or list-like, optional): Column name(s) to keep.
             All others are dropped.
         dropcols (single label or list-like, optional): Column name(s) to drop.
diff --git a/aif360/sklearn/datasets/utils.py b/aif360/sklearn/datasets/utils.py
index db88ea46..a39d5fb3 100644
--- a/aif360/sklearn/datasets/utils.py
+++ b/aif360/sklearn/datasets/utils.py
@@ -16,15 +16,15 @@ def check_already_dropped(labels, dropped_cols, name, dropped_by='numeric_only',
     Args:
         labels (single label or list-like): Column labels to check.
         dropped_cols (set or pandas.Index): Columns that were already dropped.
-        name (str): Original arg that triggered the check (e.g. ``dropcols``).
-        dropped_by (str, optional): Original arg that caused ``dropped_cols``
-            (e.g. ``numeric_only``).
+        name (str): Original arg that triggered the check (e.g. dropcols).
+        dropped_by (str, optional): Original arg that caused dropped_cols``
+            (e.g. numeric_only).
         warn (bool, optional): If ``True``, produces a
             :class:`ColumnAlreadyDroppedWarning` if there are columns in the
-            intersection of ``dropped_cols`` and ``labels``.
+            intersection of dropped_cols and labels.
 
     Returns:
-        list: Columns in ``labels`` which are not in ``dropped_cols``.
+        list: Columns in labels which are not in dropped_cols.
     """
     if not is_list_like(labels):
         labels = [labels]
diff --git a/aif360/sklearn/inprocessing/adversarial_debiasing.py b/aif360/sklearn/inprocessing/adversarial_debiasing.py
index 66014d6c..ca3de37d 100644
--- a/aif360/sklearn/inprocessing/adversarial_debiasing.py
+++ b/aif360/sklearn/inprocessing/adversarial_debiasing.py
@@ -85,11 +85,11 @@ def fit(self, X, y):
         given training data.
 
         Args:
-            X (array-like): Training samples.
+            X (pandas.DataFrame): Training samples.
             y (array-like): Training labels.
 
         Returns:
-            AdversarialDebiasing: self.
+            self
         """
         X, y, _ = check_inputs(X, y)
         rng = check_random_state(self.random_state)
@@ -268,7 +268,7 @@ def decision_function(self, X):
         """Soft prediction scores.
 
         Args:
-            X (array-like): Test samples.
+            X (pandas.DataFrame): Test samples.
 
         Returns:
             numpy.ndarray: Confidence scores per (sample, class) combination. In
@@ -309,7 +309,7 @@ def predict_proba(self, X):
         classes.
 
         Args:
-            X (array-like): Test samples.
+            X (pandas.DataFrame): Test samples.
 
         Returns:
             numpy.ndarray: Returns the probability of the sample for each class
@@ -328,7 +328,7 @@ def predict(self, X):
         """Predict class labels for the given samples.
 
         Args:
-            X (array-like): Test samples.
+            X (pandas.DataFrame): Test samples.
 
         Returns:
             numpy.ndarray: Predicted class label per sample.
diff --git a/aif360/sklearn/metrics/metrics.py b/aif360/sklearn/metrics/metrics.py
index f100a012..7b954bf5 100644
--- a/aif360/sklearn/metrics/metrics.py
+++ b/aif360/sklearn/metrics/metrics.py
@@ -36,21 +36,21 @@ def difference(func, y, *args, prot_attr=None, priv_group=1, sample_weight=None,
     arbitrary metric.
 
     Note: The optimal value of a difference is 0. To make it a scorer, one must
-    take the absolute value and set ``greater_is_better`` to False.
+    take the absolute value and set greater_is_better to False.
 
     Unprivileged group is taken to be the inverse of the privileged group.
 
     Args:
         func (function): A metric function from :mod:`sklearn.metrics` or
             :mod:`aif360.sklearn.metrics.metrics`.
-        y (array-like): Outcome vector with protected attributes as index.
-        *args: Additional positional args to be passed through to ``func``.
+        y (pandas.Series): Outcome vector with protected attributes as index.
+        *args: Additional positional args to be passed through to func.
         prot_attr (array-like, keyword-only): Protected attribute(s). If
-            ``None``, all protected attributes in ``y`` are used.
+            ``None``, all protected attributes in y are used.
         priv_group (scalar, optional): The label of the privileged group.
         sample_weight (array-like, optional): Sample weights passed through to
-            ``func``.
-        **kwargs: Additional keyword args to be passed through to ``func``.
+            func.
+        **kwargs: Additional keyword args to be passed through to func.
 
     Returns:
         scalar: Difference in metric value for unprivileged and privileged
@@ -85,14 +85,14 @@ def ratio(func, y, *args, prot_attr=None, priv_group=1, sample_weight=None,
     Args:
         func (function): A metric function from :mod:`sklearn.metrics` or
             :mod:`aif360.sklearn.metrics.metrics`.
-        y (array-like): Outcome vector with protected attributes as index.
-        *args: Additional positional args to be passed through to ``func``.
+        y (pandas.Series): Outcome vector with protected attributes as index.
+        *args: Additional positional args to be passed through to func.
         prot_attr (array-like, keyword-only): Protected attribute(s). If
-            ``None``, all protected attributes in ``y`` are used.
+            ``None``, all protected attributes in y are used.
         priv_group (scalar, optional): The label of the privileged group.
         sample_weight (array-like, optional): Sample weights passed through to
-            ``func``.
-        **kwargs: Additional keyword args to be passed through to ``func``.
+            func.
+        **kwargs: Additional keyword args to be passed through to func.
 
     Returns:
         scalar: Ratio of metric values for unprivileged and privileged groups.
@@ -123,7 +123,7 @@ def make_difference_scorer(diff_func):
     :func:`statistical_parity_difference`).
 
     Since the optimal value of a difference metric is 0, this function takes the
-    absolute value and sets ``greater_is_better`` to ``False``.
+    absolute value and sets greater_is_better to ``False``.
 
     See also:
         :func:`~sklearn.metrics.make_scorer`
@@ -214,7 +214,7 @@ def generalized_fpr(y_true, probas_pred, pos_label=1, sample_weight=None):
 
     Returns:
         float: Generalized false positive rate. If there are no negative samples
-        in ``y_true``, this will raise an
+        in y_true, this will raise an
         :class:`~sklearn.exceptions.UndefinedMetricWarning` and return 0.
     """
     idx = (y_true != pos_label)
@@ -241,7 +241,7 @@ def generalized_fnr(y_true, probas_pred, pos_label=1, sample_weight=None):
 
     Returns:
         float: Generalized false negative rate. If there are no positive samples
-        in ``y_true``, this will raise an
+        in y_true, this will raise an
         :class:`~sklearn.exceptions.UndefinedMetricWarning` and return 0.
     """
     idx = (y_true == pos_label)
@@ -264,16 +264,16 @@ def statistical_parity_difference(*y, prot_attr=None, priv_group=1, pos_label=1,
         - Pr(\hat{Y} = \text{pos_label} | D = \text{privileged})
 
     Note:
-        If only ``y_true`` is provided, this will return the difference in base
+        If only y_true is provided, this will return the difference in base
         rates (statistical parity difference of the original dataset).
 
     Args:
-        y_true (array-like): Ground truth (correct) target values. If ``y_pred``
+        y_true (pandas.Series): Ground truth (correct) target values. If y_pred
             is provided, this is ignored.
         y_pred (array-like, optional): Estimated targets as returned by a
             classifier.
         prot_attr (array-like, keyword-only): Protected attribute(s). If
-            ``None``, all protected attributes in ``y_true`` are used.
+            ``None``, all protected attributes in y_true are used.
         priv_group (scalar, optional): The label of the privileged group.
         pos_label (scalar, optional): The label of the positive class.
         sample_weight (array-like, optional): Sample weights.
@@ -294,16 +294,16 @@ def disparate_impact_ratio(*y, prot_attr=None, priv_group=1, pos_label=1,
         {Pr(\hat{Y} = \text{pos_label} | D = \text{privileged})}
 
     Note:
-        If only ``y_true`` is provided, this will return the ratio of base rates
+        If only y_true is provided, this will return the ratio of base rates
         (disparate impact of the original dataset).
 
     Args:
-        y_true (array-like): Ground truth (correct) target values. If ``y_pred``
+        y_true (pandas.Series): Ground truth (correct) target values. If y_pred
             is provided, this is ignored.
         y_pred (array-like, optional): Estimated targets as returned by a
             classifier.
         prot_attr (array-like, keyword-only): Protected attribute(s). If
-            ``None``, all protected attributes in ``y_true`` are used.
+            ``None``, all protected attributes in y_true are used.
         priv_group (scalar, optional): The label of the privileged group.
         pos_label (scalar, optional): The label of the positive class.
         sample_weight (array-like, optional): Sample weights.
@@ -323,10 +323,10 @@ def equal_opportunity_difference(y_true, y_pred, prot_attr=None, priv_group=1,
     privileged groups. A value of 0 indicates equality of opportunity.
 
     Args:
-        y_true (array-like): Ground truth (correct) target values.
+        y_true (pandas.Series): Ground truth (correct) target values.
         y_pred (array-like): Estimated targets as returned by a classifier.
         prot_attr (array-like, keyword-only): Protected attribute(s). If
-            ``None``, all protected attributes in ``y_true`` are used.
+            ``None``, all protected attributes in y_true are used.
         priv_group (scalar, optional): The label of the privileged group.
         pos_label (scalar, optional): The label of the positive class.
         sample_weight (array-like, optional): Sample weights.
@@ -353,10 +353,10 @@ def average_odds_difference(y_true, y_pred, prot_attr=None, priv_group=1,
     A value of 0 indicates equality of odds.
 
     Args:
-        y_true (array-like): Ground truth (correct) target values.
+        y_true (pandas.Series): Ground truth (correct) target values.
         y_pred (array-like): Estimated targets as returned by a classifier.
         prot_attr (array-like, keyword-only): Protected attribute(s). If
-            ``None``, all protected attributes in ``y_true`` are used.
+            ``None``, all protected attributes in y_true are used.
         priv_group (scalar, optional): The label of the privileged group.
         pos_label (scalar, optional): The label of the positive class.
         sample_weight (array-like, optional): Sample weights.
@@ -387,10 +387,10 @@ def average_odds_error(y_true, y_pred, prot_attr=None, priv_group=1,
     A value of 0 indicates equality of odds.
 
     Args:
-        y_true (array-like): Ground truth (correct) target values.
+        y_true (pandas.Series): Ground truth (correct) target values.
         y_pred (array-like): Estimated targets as returned by a classifier.
         prot_attr (array-like, keyword-only): Protected attribute(s). If
-            ``None``, all protected attributes in ``y_true`` are used.
+            ``None``, all protected attributes in y_true are used.
         priv_group (scalar, optional): The label of the privileged group.
         pos_label (scalar, optional): The label of the positive class.
         sample_weight (array-like, optional): Sample weights.
@@ -473,14 +473,14 @@ def between_group_generalized_entropy_error(y_true, y_pred, prot_attr=None,
     generalized entropy index decomposes to.
 
     Args:
-        y_true (array-like): Ground truth (correct) target values.
+        y_true (pandas.Series): Ground truth (correct) target values.
         y_pred (array-like): Estimated targets as returned by a classifier.
         prot_attr (array-like, optional): Protected attribute(s). If ``None``,
-            all protected attributes in ``y_true`` are used.
+            all protected attributes in y_true are used.
         priv_group (scalar, optional): The label of the privileged group. If
             provided, the index will be computed between only the privileged and
             unprivileged groups. Otherwise, the index will be computed between
-            all groups defined by the ``prot_attr``.
+            all groups defined by the prot_attr.
         alpha (scalar, optional): Parameter that regulates the weight given to
             distances between values at different parts of the distribution. A
             value of 0 is equivalent to the mean log deviation, 1 is the Theil
diff --git a/aif360/sklearn/preprocessing/reweighing.py b/aif360/sklearn/preprocessing/reweighing.py
index dcb1d906..d4f782b0 100644
--- a/aif360/sklearn/preprocessing/reweighing.py
+++ b/aif360/sklearn/preprocessing/reweighing.py
@@ -48,7 +48,7 @@ def __init__(self, prot_attr=None):
         self.prot_attr = prot_attr
 
     def fit(self, X, y, sample_weight=None):
-        """Only ``fit_transform`` is allowed for this algorithm."""
+        """Only :meth:`fit_transform` is allowed for this algorithm."""
         self.fit_transform(X, y, sample_weight=sample_weight)
         return self
 
@@ -57,15 +57,16 @@ def fit_transform(self, X, y, sample_weight=None):
         sample weights.
 
         Args:
-            X (array-like): Training samples.
+            X (pandas.DataFrame): Training samples.
             y (array-like): Training labels.
             sample_weight (array-like, optional): Sample weights.
 
         Returns:
             tuple:
+                Samples and their weights.
 
-                **X** -- Unchanged samples.
-                **sample_weight** -- Transformed sample weights.
+                * **X** -- Unchanged samples.
+                * **sample_weight** -- Transformed sample weights.
         """
         X, y, sample_weight = check_inputs(X, y, sample_weight)
 
@@ -120,12 +121,12 @@ def fit(self, X, y, sample_weight=None):
         samples.
 
         Args:
-            X (array-like): Training samples.
+            X (pandas.DataFrame): Training samples.
             y (array-like): Training labels.
             sample_weight (array-like, optional): Sample weights.
 
         Returns:
-            ReweighingMeta: self.
+            self
         """
         if not has_fit_parameter(self.estimator, 'sample_weight'):
             raise TypeError("`estimator` (type: {}) does not have fit parameter"
@@ -191,7 +192,7 @@ def score(self, X, y, sample_weight=None):
 
         Args:
             X (array-like): Test samples.
-            y (array-like): True labels for ``X``.
+            y (array-like): True labels for X.
             sample_weight (array-like, optional): Sample weights.
 
         Returns:
diff --git a/aif360/sklearn/utils.py b/aif360/sklearn/utils.py
index 28db1e61..13ad3820 100644
--- a/aif360/sklearn/utils.py
+++ b/aif360/sklearn/utils.py
@@ -1,14 +1,28 @@
 import numpy as np
+import pandas as pd
 from pandas.core.dtypes.common import is_list_like
 from sklearn.utils import check_consistent_length
 from sklearn.utils.validation import column_or_1d
 
 
-def check_inputs(X, y, sample_weight=None):
-    if not hasattr(X, 'index'):
-        raise TypeError("Expected `DataFrame`, got {} instead.".format(
-            type(X).__name__))
-    y = column_or_1d(y)
+def check_inputs(X, y, sample_weight=None, ensure_2d=True):
+    """Input validation for debiasing algorithms.
+
+    Checks all inputs for consistent length, validates shapes (optional for X),
+    and returns an array of all ones if sample_weight is ``None``.
+
+    Args:
+        X (array-like): Input data.
+        y (array-like, shape = (n_samples,)): Target values.
+        sample_weight (array-like): Sample weights.
+        ensure_2d (bool, optional): Whether to raise a ValueError if X is not
+            2D.
+    """
+    if ensure_2d and X.ndim != 2:
+        raise ValueError("Expected X to be 2D, got ndim == {} instead.".format(
+                X.ndim))
+    if not isinstance(y, pd.Series):  # don't cast Series -> ndarray
+        y = column_or_1d(y)
     if sample_weight is not None:
         sample_weight = column_or_1d(sample_weight)
     else:
@@ -17,13 +31,18 @@ def check_inputs(X, y, sample_weight=None):
     return X, y, sample_weight
 
 def check_groups(arr, prot_attr, ensure_binary=False):
-    """Validates ``arr`` and returns ``groups`` and ``prot_attr``.
+    """Get groups from the index of arr.
+
+    If there are multiple protected attributes provided, the index is flattened
+    to be a 1-D Index of tuples. If ensure_binary is ``True``, raises a
+    ValueError if there are not exactly two unique groups. Also checks that all
+    provided protected attributes are in the index.
 
     Args:
         arr (`pandas.Series` or `pandas.DataFrame`): A Pandas object containing
             protected attribute information in the index.
         prot_attr (single label or list-like): Protected attribute(s). If
-            ``None``, all protected attributes in ``arr`` are used.
+            ``None``, all protected attributes in arr are used.
         ensure_binary (bool): Raise an error if the resultant groups are not
             binary.
 
@@ -31,7 +50,7 @@ def check_groups(arr, prot_attr, ensure_binary=False):
         tuple:
 
             * **groups** (`pandas.Index`) -- Label (or tuple of labels) of
-              protected attribute for each sample in ``arr``.
+              protected attribute for each sample in arr.
             * **prot_attr** (list-like) -- Modified input. If input is a single
               label, returns single-item list. If input is ``None`` returns list
               of all protected attributes.
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 0f850880..b6695bc9 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -59,7 +59,7 @@
 
 autodoc_default_options = {
     'members': True,
-    'inherited-members': True
+    # 'inherited-members': True
 }
 
 # The suffix(es) of source filenames.
@@ -102,7 +102,7 @@
 default_role = 'literal'
 
 # If true, '()' will be appended to :func: etc. cross-reference text.
-add_function_parentheses = False
+add_function_parentheses = True
 
 # The name of the Pygments (syntax highlighting) style to use.
 pygments_style = 'sphinx'
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 37ba7078..532c0eb5 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -3,17 +3,21 @@
    You can adapt this file completely to your liking, but it should at least
    contain the root `toctree` directive.
 
-Welcome to AI Fairness 360's documentation!
-===========================================
+AI Fairness 360 documentation
+=============================
 
 .. toctree::
-   :maxdepth: 3
+   :maxdepth: 2
    :caption: Modules
 
    modules/algorithms
    modules/datasets
    modules/explainers
    modules/metrics
+
+.. toctree::
+   :maxdepth: 3
+
    modules/sklearn
 
 
diff --git a/docs/source/modules/algorithms.rst b/docs/source/modules/algorithms.rst
index 16d70441..6842064c 100644
--- a/docs/source/modules/algorithms.rst
+++ b/docs/source/modules/algorithms.rst
@@ -1,25 +1,71 @@
-:mod:`aif360.algorithms`
-========================
+==========
+Algorithms
+==========
 
-.. automodule:: aif360.algorithms
+:mod:`aif360.algorithms.preprocessing`
+======================================
+
+.. automodule:: aif360.algorithms.preprocessing
+    :no-members:
+    :no-inherited-members:
+
+.. currentmodule:: aif360
+
+.. autosummary::
+   :toctree: generated/
+   :template: class.rst
+
+   algorithms.preprocessing.DisparateImpactRemover
+   algorithms.preprocessing.LFR
+   algorithms.preprocessing.OptimPreproc
+   algorithms.preprocessing.Reweighing
+
+:mod:`aif360.algorithms.inprocessing`
+=====================================
+
+.. automodule:: aif360.algorithms.inprocessing
+    :no-members:
+    :no-inherited-members:
 
-.. toctree::
-   :maxdepth: 2
+.. currentmodule:: aif360
 
-   preprocessing
+.. autosummary::
+   :toctree: generated/
+   :template: class.rst
 
-.. toctree::
-   :maxdepth: 2
+   algorithms.inprocessing.AdversarialDebiasing
+   algorithms.inprocessing.ARTClassifier
+   algorithms.inprocessing.MetaFairClassifier
+   algorithms.inprocessing.PrejudiceRemover
 
-   inprocessing
+:mod:`aif360.algorithms.postprocessing`
+=======================================
 
-.. toctree::
-   :maxdepth: 2
+.. automodule:: aif360.algorithms.postprocessing
+    :no-members:
+    :no-inherited-members:
+
+.. currentmodule:: aif360
+
+.. autosummary::
+   :toctree: generated/
+   :template: class.rst
+
+   algorithms.postprocessing.CalibratedEqOddsPostprocessing
+   algorithms.postprocessing.EqOddsPostprocessing
+   algorithms.postprocessing.RejectOptionClassification
+
+:mod:`aif360.algorithms`
+========================
+
+.. automodule:: aif360.algorithms
+   :no-members:
+   :no-inherited-members:
 
-   postprocessing
+.. currentmodule:: aif360
 
-Base Class
-----------
+.. autosummary::
+   :toctree: generated/
+   :template: class.rst
 
-.. autoclass:: Transformer
-   :members:
+   algorithms.Transformer
diff --git a/docs/source/modules/datasets.rst b/docs/source/modules/datasets.rst
index 4ef46368..140ef25b 100644
--- a/docs/source/modules/datasets.rst
+++ b/docs/source/modules/datasets.rst
@@ -1,30 +1,38 @@
+========
+Datasets
+========
+
 :mod:`aif360.datasets`
 ======================
 
 .. automodule:: aif360.datasets
+    :no-members:
+    :no-inherited-members:
 
-Base Class
-----------
-
-.. autoclass:: Dataset
-    :members:
+Base classes
+------------
 
-Structured Dataset
-------------------
+.. currentmodule:: aif360
 
-.. autoclass:: StructuredDataset
-    :members:
+.. autosummary::
+   :toctree: generated/
+   :template: class.rst
 
-Binary Label Dataset
---------------------
+   datasets.Dataset
+   datasets.StructuredDataset
+   datasets.BinaryLabelDataset
+   datasets.StandardDataset
 
-.. autoclass:: BinaryLabelDataset
-    :members:
+Common datasets
+---------------
 
-Standard Datasets
------------------
+.. currentmodule:: aif360
 
-.. toctree::
-   :maxdepth: 2
+.. autosummary::
+   :toctree: generated/
+   :template: class.rst
 
-   standard_datasets
+   datasets.AdultDataset
+   datasets.BankDataset
+   datasets.CompasDataset
+   datasets.GermanDataset
diff --git a/docs/source/modules/explainers.rst b/docs/source/modules/explainers.rst
index c0053e81..f5e14832 100644
--- a/docs/source/modules/explainers.rst
+++ b/docs/source/modules/explainers.rst
@@ -1,16 +1,19 @@
+==========
+Explainers
+==========
+
 :mod:`aif360.explainers`
 ========================
 
 .. automodule:: aif360.explainers
+    :no-members:
+    :no-inherited-members:
 
-Metric Text Explainer
----------------------
-
-.. autoclass:: MetricTextExplainer
-    :members:
+.. currentmodule:: aif360
 
-Metric JSON Explainer
----------------------
+.. autosummary::
+   :toctree: generated/
+   :template: class.rst
 
-.. autoclass:: MetricJSONExplainer
-    :members:
+   explainers.MetricTextExplainer
+   explainers.MetricJSONExplainer
diff --git a/docs/source/modules/inprocessing.rst b/docs/source/modules/inprocessing.rst
deleted file mode 100644
index 1ae7ce3d..00000000
--- a/docs/source/modules/inprocessing.rst
+++ /dev/null
@@ -1,35 +0,0 @@
-:mod:`aif360.algorithms.inprocessing`
-=====================================
-
-.. automodule:: aif360.algorithms.inprocessing
-
-Adversarial Debiasing
----------------------
-
-.. autoclass:: AdversarialDebiasing
-   :members:
-   :inherited-members:
-   :exclude-members: transform, fit_transform
-
-ART Classifier
---------------
-
-.. autoclass:: ARTClassifier
-   :members:
-   :inherited-members:
-   :exclude-members: transform, fit_transform
-
-Meta Fair Classifier
---------------------
-.. autoclass:: MetaFairClassifier
-   :members:
-   :inherited-members:
-   :exclude-members: transform, fit_transform
-
-Prejudice Remover
------------------
-
-.. autoclass:: PrejudiceRemover
-   :members:
-   :inherited-members:
-   :exclude-members: transform, fit_transform
diff --git a/docs/source/modules/metrics.rst b/docs/source/modules/metrics.rst
index ea5171f0..4be7ed49 100644
--- a/docs/source/modules/metrics.rst
+++ b/docs/source/modules/metrics.rst
@@ -1,36 +1,41 @@
+================
+Fairness Metrics
+================
+
 :mod:`aif360.metrics`
 =====================
 
 .. automodule:: aif360.metrics
+    :no-members:
+    :no-inherited-members:
 
-Dataset Metric
---------------
-
-.. autoclass:: DatasetMetric
-    :members:
-    :exclude-members: difference, ratio
-
-Binary Label Dataset Metric
----------------------------
+.. currentmodule:: aif360
 
-.. autoclass:: BinaryLabelDatasetMetric
-    :members:
+.. autosummary::
+   :toctree: generated/
+   :template: class.rst
 
-Classification Metric
----------------------
+   metrics.DatasetMetric
+   metrics.BinaryLabelDatasetMetric
+   metrics.ClassificationMetric
+   metrics.SampleDistortionMetric
 
-.. autoclass:: ClassificationMetric
-    :private-members:
-    :members:
+:mod:`aif360.metrics.utils`
+===========================
 
-Sample Distortion Metric
-------------------------
+.. automodule:: aif360.metrics.utils
+    :no-members:
+    :no-inherited-members:
 
-.. autoclass:: SampleDistortionMetric
-    :members:
+.. currentmodule:: aif360
 
-Utility Functions
------------------
+.. autosummary::
+   :toctree: generated/
+   :template: base.rst
 
-.. automodule:: aif360.metrics.utils
-    :members:
+   metrics.utils.compute_boolean_conditioning_vector
+   metrics.utils.compute_num_instances
+   metrics.utils.compute_num_pos_neg
+   metrics.utils.compute_num_TF_PN
+   metrics.utils.compute_num_gen_TF_PN
+   metrics.utils.compute_distance
\ No newline at end of file
diff --git a/docs/source/modules/postprocessing.rst b/docs/source/modules/postprocessing.rst
deleted file mode 100644
index 18b924db..00000000
--- a/docs/source/modules/postprocessing.rst
+++ /dev/null
@@ -1,22 +0,0 @@
-:mod:`aif360.algorithms.postprocessing`
-=======================================
-
-.. automodule:: aif360.algorithms.postprocessing
-
-Calibrated Equality of Odds
----------------------------
-
-.. autoclass:: CalibratedEqOddsPostprocessing
-   :members:
-
-Equality of Odds
-----------------
-
-.. autoclass:: EqOddsPostprocessing
-   :members:
-
-Reject Option Classification
-----------------------------
-
-.. autoclass:: RejectOptionClassification
-   :members:
diff --git a/docs/source/modules/preprocessing.rst b/docs/source/modules/preprocessing.rst
deleted file mode 100644
index a99006f0..00000000
--- a/docs/source/modules/preprocessing.rst
+++ /dev/null
@@ -1,30 +0,0 @@
-:mod:`aif360.algorithms.preprocessing`
-======================================
-
-.. automodule:: aif360.algorithms.preprocessing
-
-Disparate Impact Remover
-------------------------
-
-.. autoclass:: DisparateImpactRemover
-   :members:
-
-Learning Fair Representations
------------------------------
-
-.. autoclass:: LFR
-   :members:
-
-Optimized Preprocessing
------------------------
-
-.. autoclass:: OptimPreproc
-   :members:
-
-Reweighing
-----------
-
-.. autoclass:: Reweighing
-   :members:
-   :inherited-members:
-   :exclude-members: predict, fit_predict
diff --git a/docs/source/modules/sklearn.rst b/docs/source/modules/sklearn.rst
index 757c0ef8..a61283de 100644
--- a/docs/source/modules/sklearn.rst
+++ b/docs/source/modules/sklearn.rst
@@ -4,7 +4,7 @@
 
 This is the class and function reference for the `scikit-learn`-compatible
 version of the AIF360 API. It is functionally equivalent to the normal API but
-it uses scikit-learn paradigms (where possible) and Pandas `DataFrames` for
+it uses scikit-learn paradigms (where possible) and :class:`pandas.DataFrame` for
 datasets. Not all functionality from AIF360 is supported yet. See
 `Getting Started <https://github.com/IBM/AIF360/aif360/sklearn/examples/Getting%20Started.ipynb>`_
 for a demo of the capabilities.
@@ -27,14 +27,12 @@ Utils
 .. autosummary::
    :toctree: generated/
    :template: class.rst
-   :nosignatures:
 
    datasets.ColumnAlreadyDroppedWarning
 
 .. autosummary::
    :toctree: generated/
    :template: base.rst
-   :nosignatures:
 
    datasets.check_already_dropped
    datasets.standardize_dataset
@@ -46,7 +44,6 @@ Loaders
 .. autosummary::
    :toctree: generated/
    :template: base.rst
-   :nosignatures:
 
    datasets.fetch_adult
    datasets.fetch_german
@@ -67,7 +64,6 @@ Meta-metrics
 .. autosummary::
    :toctree: generated/
    :template: base.rst
-   :nosignatures:
 
    metrics.difference
    metrics.ratio
@@ -79,7 +75,6 @@ Scorers
 .. autosummary::
    :toctree: generated/
    :template: base.rst
-   :nosignatures:
 
    metrics.make_difference_scorer
    metrics.make_ratio_scorer
@@ -91,7 +86,6 @@ Generic metrics
 .. autosummary::
    :toctree: generated/
    :template: base.rst
-   :nosignatures:
 
    metrics.specificity_score
    metrics.sensitivity_score
@@ -107,7 +101,6 @@ Group fairness metrics
 .. autosummary::
    :toctree: generated/
    :template: base.rst
-   :nosignatures:
 
    metrics.statistical_parity_difference
    metrics.mean_difference
@@ -124,7 +117,6 @@ Individual fairness metrics
 .. autosummary::
    :toctree: generated/
    :template: base.rst
-   :nosignatures:
 
    metrics.generalized_entropy_index
    metrics.generalized_entropy_error
@@ -132,7 +124,7 @@ Individual fairness metrics
    metrics.coefficient_of_variation
    metrics.consistency_score
 
-:mod:`aif360.sklearn.preprocessing`: Pre-processing Algorithms
+:mod:`aif360.sklearn.preprocessing`: Pre-processing algorithms
 ==============================================================
 
 .. automodule:: aif360.sklearn.preprocessing
@@ -146,7 +138,6 @@ Pre-processors
 .. autosummary::
    :toctree: generated/
    :template: class.rst
-   :nosignatures:
 
    preprocessing.Reweighing
 
@@ -157,11 +148,10 @@ Meta-Estimator
 .. autosummary::
    :toctree: generated/
    :template: class.rst
-   :nosignatures:
 
    preprocessing.ReweighingMeta
 
-:mod:`aif360.sklearn.inprocessing`: In-processing Algorithms
+:mod:`aif360.sklearn.inprocessing`: In-processing algorithms
 ============================================================
 
 .. automodule:: aif360.sklearn.inprocessing
@@ -175,11 +165,10 @@ In-processors
 .. autosummary::
    :toctree: generated/
    :template: class.rst
-   :nosignatures:
 
    inprocessing.AdversarialDebiasing
 
-:mod:`aif360.sklearn.postprocessing`: Post-processing Algorithms
+:mod:`aif360.sklearn.postprocessing`: Post-processing algorithms
 ================================================================
 
 .. automodule:: aif360.sklearn.postprocessing
@@ -193,7 +182,6 @@ Post-processors
 .. autosummary::
    :toctree: generated/
    :template: class.rst
-   :nosignatures:
 
    postprocessing.CalibratedEqualizedOdds
 
@@ -204,6 +192,24 @@ Meta-Estimator
 .. autosummary::
    :toctree: generated/
    :template: class.rst
-   :nosignatures:
 
-   postprocessing.PostProcessingMeta
\ No newline at end of file
+   postprocessing.PostProcessingMeta
+
+:mod:`aif360.sklearn.utils`: Utility functions
+==============================================
+
+.. automodule:: aif360.sklearn.utils
+    :no-members:
+    :no-inherited-members:
+
+Validation
+----------
+
+.. currentmodule:: aif360.sklearn
+
+.. autosummary::
+   :toctree: generated/
+   :template: base.rst
+
+   utils.check_inputs
+   utils.check_groups
\ No newline at end of file
diff --git a/docs/source/modules/standard_datasets.rst b/docs/source/modules/standard_datasets.rst
deleted file mode 100644
index 3f6f5622..00000000
--- a/docs/source/modules/standard_datasets.rst
+++ /dev/null
@@ -1,32 +0,0 @@
-.. module:: aif360.datasets
-    :noindex:
-
-Base Class
-----------
-
-.. autoclass:: StandardDataset
-    :members:
-
-Adult Dataset
--------------
-
-.. autoclass:: AdultDataset
-    :members:
-
-Bank Dataset
-------------
-
-.. autoclass:: BankDataset
-    :members:
-
-Compas Dataset
---------------
-
-.. autoclass:: CompasDataset
-    :members:
-
-German Dataset
---------------
-
-.. autoclass:: GermanDataset
-    :members:
diff --git a/docs/source/static/style.css b/docs/source/static/style.css
index 460fdebc..db0cc5f8 100644
--- a/docs/source/static/style.css
+++ b/docs/source/static/style.css
@@ -1,6 +1,6 @@
-/* .wy-nav-content {
-    max-width: 1000px !important;
-} */
+.wy-nav-content {
+    max-width: 900px !important;
+}
 
 /* override table width restrictions */
 .wy-table-responsive table td, .wy-table-responsive table th {

From 8d108935cb2761be33b79dbdf9c430c46ec3bb60 Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Thu, 19 Dec 2019 12:03:24 -0500
Subject: [PATCH 49/61] postprocessor takes DataFrame if use_proba

added additional tests to check this
---
 .../calibrated_eq_odds_postprocessing.py      | 10 ++--
 aif360/sklearn/postprocessing/__init__.py     | 38 +++++++--------
 .../calibrated_equalized_odds.py              | 46 +++++++++----------
 .../sklearn/test_calibrated_equalized_odds.py | 46 ++++++++++++-------
 4 files changed, 77 insertions(+), 63 deletions(-)

diff --git a/aif360/algorithms/postprocessing/calibrated_eq_odds_postprocessing.py b/aif360/algorithms/postprocessing/calibrated_eq_odds_postprocessing.py
index 4bae2ed9..ba240d72 100644
--- a/aif360/algorithms/postprocessing/calibrated_eq_odds_postprocessing.py
+++ b/aif360/algorithms/postprocessing/calibrated_eq_odds_postprocessing.py
@@ -171,16 +171,16 @@ def predict(self, dataset, threshold=0.5):
             dataset.protected_attribute_names,
             self.unprivileged_groups)
 
-        priv_indices = (np.random.random(sum(cond_vec_priv))
-                     <= self.priv_mix_rate)
-        priv_new_pred = dataset.scores[cond_vec_priv].copy()
-        priv_new_pred[priv_indices] = self.base_rate_priv
-
         unpriv_indices = (np.random.random(sum(cond_vec_unpriv))
                        <= self.unpriv_mix_rate)
         unpriv_new_pred = dataset.scores[cond_vec_unpriv].copy()
         unpriv_new_pred[unpriv_indices] = self.base_rate_unpriv
 
+        priv_indices = (np.random.random(sum(cond_vec_priv))
+                     <= self.priv_mix_rate)
+        priv_new_pred = dataset.scores[cond_vec_priv].copy()
+        priv_new_pred[priv_indices] = self.base_rate_priv
+
         dataset_new = dataset.copy(deepcopy=True)
 
         dataset_new.scores = np.zeros_like(dataset.scores, dtype=np.float64)
diff --git a/aif360/sklearn/postprocessing/__init__.py b/aif360/sklearn/postprocessing/__init__.py
index acc63020..d1b0465f 100644
--- a/aif360/sklearn/postprocessing/__init__.py
+++ b/aif360/sklearn/postprocessing/__init__.py
@@ -26,8 +26,8 @@ class PostProcessingMeta(BaseEstimator, MetaEstimatorMixin):
         be used as the input to this meta-estimator not the other way around.
 
     Attributes:
-        estimator_: Cloned ``estimator``.
-        postprocessor_: Cloned ``postprocessor``.
+        estimator_: Fitted estimator.
+        postprocessor_: Fitted postprocessor.
         use_proba_ (bool): Determined depending on the postprocessor type if
             `use_proba` is None.
     """
@@ -49,7 +49,7 @@ def __init__(self, estimator, postprocessor=CalibratedEqualizedOdds(),
             **options: Keyword options passed through to
                 :func:`~sklearn.model_selection.train_test_split`.
                 Note: 'train_size' and 'test_size' will be ignored in favor of
-                ``val_size``.
+                'val_size'.
         """
         self.estimator = estimator
         self.postprocessor = postprocessor
@@ -70,14 +70,14 @@ def fit(self, X, y, sample_weight=None, **fit_params):
 
         Args:
             X (array-like): Training samples.
-            y (array-like): Training labels.
+            y (pandas.Series): Training labels.
             sample_weight (array-like, optional): Sample weights.
-            **fit_params: Parameters passed to the post-processor ``fit``
+            **fit_params: Parameters passed to the post-processor ``fit()``
                 method. Note: these do not need to be prefixed with ``__``
                 notation.
 
         Returns:
-            PostProcessingMeta: self.
+            self
         """
         self.use_proba_ = (self.use_proba if self.use_proba is not None else
                 isinstance(self.postprocessor, CalibratedEqualizedOdds))
@@ -115,26 +115,26 @@ def fit(self, X, y, sample_weight=None, **fit_params):
     def predict(self, X):
         """Predict class labels for the given samples.
 
-        First, runs ``self.estimator_.predict`` (or ``predict_proba`` if
+        First, runs ``self.estimator_.predict()`` (or ``predict_proba()`` if
         ``self.use_proba_`` is ``True``) then returns the post-processed output
         from those predictions.
 
         Args:
-            X (array-like): Test samples.
+            X (pandas.DataFrame): Test samples.
 
         Returns:
             numpy.ndarray: Predicted class label per sample.
         """
         y_pred = (self.estimator_.predict(X) if not self.use_proba_ else
                   self.estimator_.predict_proba(X))
-        y_pred = pd.Series(y_pred, index=X.index)
+        y_pred = pd.DataFrame(y_pred, index=X.index).squeeze('columns')
         return self.postprocessor_.predict(y_pred)
 
     @if_delegate_has_method('postprocessor_')
     def predict_proba(self, X):
         """Probability estimates.
 
-        First, runs ``self.estimator_.predict`` (or ``predict_proba`` if
+        First, runs ``self.estimator_.predict()`` (or ``predict_proba()`` if
         ``self.use_proba_`` is ``True``) then returns the post-processed output
         from those predictions.
 
@@ -142,7 +142,7 @@ def predict_proba(self, X):
         classes.
 
         Args:
-            X (array-like): Test samples.
+            X (pandas.DataFrame): Test samples.
 
         Returns:
             numpy.ndarray: Returns the probability of the sample for each class
@@ -151,14 +151,14 @@ def predict_proba(self, X):
         """
         y_pred = (self.estimator_.predict(X) if not self.use_proba_ else
                   self.estimator_.predict_proba(X))
-        y_pred = pd.Series(y_pred, index=X.index)
+        y_pred = pd.DataFrame(y_pred, index=X.index).squeeze('columns')
         return self.postprocessor_.predict_proba(y_pred)
 
     @if_delegate_has_method('postprocessor_')
     def predict_log_proba(self, X):
         """Log of probability estimates.
 
-        First, runs ``self.estimator_.predict`` (or ``predict_proba`` if
+        First, runs ``self.estimator_.predict()`` (or ``predict_proba()`` if
         ``self.use_proba_`` is ``True``) then returns the post-processed output
         from those predictions.
 
@@ -166,7 +166,7 @@ def predict_log_proba(self, X):
         classes.
 
         Args:
-            X (array-like): Test samples.
+            X (pandas.DataFrame): Test samples.
 
         Returns:
             array: Returns the log-probability of the sample for each class in
@@ -175,7 +175,7 @@ def predict_log_proba(self, X):
         """
         y_pred = (self.estimator_.predict(X) if not self.use_proba_ else
                   self.estimator_.predict_proba(X))
-        y_pred = pd.Series(y_pred, index=X.index)
+        y_pred = pd.DataFrame(y_pred, index=X.index).squeeze('columns')
         return self.postprocessor_.predict_log_proba(y_pred)
 
     @if_delegate_has_method('postprocessor_')
@@ -183,13 +183,13 @@ def score(self, X, y, sample_weight=None):
         """Returns the output of the post-processor's score function on the
         given test data and labels.
 
-        First, runs ``self.estimator_.predict`` (or ``predict_proba`` if
+        First, runs ``self.estimator_.predict()`` (or ``predict_proba()`` if
         ``self.use_proba_`` is ``True``) then gets the post-processed output
         from those predictions and scores it.
 
         Args:
-            X (array-like): Test samples.
-            y (array-like): True labels for ``X``.
+            X (pandas.DataFrame): Test samples.
+            y (array-like): True labels for X.
             sample_weight (array-like, optional): Sample weights.
 
         Returns:
@@ -197,7 +197,7 @@ def score(self, X, y, sample_weight=None):
         """
         y_pred = (self.estimator_.predict(X) if not self.use_proba_ else
                   self.estimator_.predict_proba(X))
-        y_pred = pd.Series(y_pred, index=X.index)
+        y_pred = pd.DataFrame(y_pred, index=X.index).squeeze('columns')
         return self.postprocessor_.score(y_pred, y, sample_weight=sample_weight)
 
 
diff --git a/aif360/sklearn/postprocessing/calibrated_equalized_odds.py b/aif360/sklearn/postprocessing/calibrated_equalized_odds.py
index 088a84a3..a648d13b 100644
--- a/aif360/sklearn/postprocessing/calibrated_equalized_odds.py
+++ b/aif360/sklearn/postprocessing/calibrated_equalized_odds.py
@@ -5,7 +5,7 @@
 
 from aif360.sklearn.metrics import difference, base_rate
 from aif360.sklearn.metrics import generalized_fnr, generalized_fpr
-from aif360.sklearn.utils import check_groups
+from aif360.sklearn.utils import check_inputs, check_groups
 
 
 class CalibratedEqualizedOdds(BaseEstimator, ClassifierMixin):
@@ -16,9 +16,9 @@ class CalibratedEqualizedOdds(BaseEstimator, ClassifierMixin):
     change output labels with an equalized odds objective [#pleiss17]_.
 
     Note:
-        This breaks the sckit-learn API by requiring fit params ``y_true``,
-        ``y_pred``, and ``pos_label`` and predict param ``y_pred``. See
-        :class:`PostProcessingMeta` for a workaround.
+        This breaks the sckit-learn API by requiring fit params y_true, y_pred,
+        and pos_label and predict param y_pred. See :class:`PostProcessingMeta`
+        for a workaround.
 
     References:
         .. [#pleiss17] `G. Pleiss, M. Raghavan, F. Wu, J. Kleinberg, and
@@ -85,17 +85,20 @@ def fit(self, y_pred, y_true, labels=None, pos_label=1, sample_weight=None):
         Args:
             y_pred (array-like): Probability estimates of the targets as
                 returned by a ``predict_proba()`` call or equivalent.
-            y_true (array-like): Ground-truth (correct) target values.
+            y_true (pandas.Series): Ground-truth (correct) target values.
             labels (list, optional): The ordered set of labels values. Must
-                match the order of columns in ``y_pred`` if provided. By
-                default, all labels in ``y_true`` are used in sorted order.
+                match the order of columns in y_pred if provided. By default,
+                all labels in y_true are used in sorted order.
             pos_label (scalar, optional): The label of the positive class.
             sample_weight (array-like, optional): Sample weights.
 
         Returns:
-            CalibratedEqualizedOdds: self.
+            self
         """
-        groups, self.prot_attr_ = check_groups(y_true, self.prot_attr)
+        y_pred, y_true, sample_weight = check_inputs(y_pred, y_true,
+                                                     sample_weight)
+        groups, self.prot_attr_ = check_groups(y_true, self.prot_attr,
+                                               ensure_binary=True)
         self.classes_ = labels if labels is not None else np.unique(y_true)
         self.groups_ = np.unique(groups)
         self.pos_label_ = pos_label
@@ -107,11 +110,6 @@ def fit(self, y_pred, y_true, labels=None, pos_label=1, sample_weight=None):
             raise ValueError('pos_label={} is not in the set of labels. The '
                     'valid values are:\n{}'.format(pos_label, self.classes_))
 
-        if len(self.groups_) != 2:
-            raise ValueError('prot_attr={}\nyielded {} groups:\n{}\nbut this '
-                    'algorithm requires a binary division of the data.'.format(
-                            self.prot_attr_, len(self.groups_), self.groups_))
-
         y_pred = y_pred[:, np.nonzero(self.classes_ == self.pos_label_)[0][0]]
 
         # local function to return corresponding args for metric evaluation
@@ -119,8 +117,7 @@ def _args(grp_idx, triv=False):
             idx = (groups == self.groups_[grp_idx])
             pred = (np.full_like(y_pred, self.base_rates_[grp_idx]) if triv else
                     y_pred)
-            return [y_true[idx], pred[idx], pos_label,
-                    sample_weight[idx] if sample_weight is not None else None]
+            return [y_true[idx], pred[idx], pos_label, sample_weight[idx]]
 
         self.base_rates_ = [base_rate(*_args(i)) for i in range(2)]
 
@@ -138,8 +135,9 @@ def predict_proba(self, y_pred):
         classes.
 
         Args:
-            y_pred (array-like): Probability estimates of the targets as
-                returned by a ``predict_proba()`` call or equivalent.
+            y_pred (pandas.DataFrame): Probability estimates of the targets as
+                returned by a ``predict_proba()`` call or equivalent. Note: must
+                include protected attributes in the index.
 
         Returns:
             numpy.ndarray: Returns the probability of the sample for each class
@@ -156,7 +154,7 @@ def predict_proba(self, y_pred):
                                      np.unique(groups), self.groups_))
 
         pos_idx = np.nonzero(self.classes_ == self.pos_label_)[0][0]
-        y_pred = y_pred[:, pos_idx]
+        y_pred = y_pred.iloc[:, pos_idx]
 
         yt = np.empty_like(y_pred)
         for grp_idx in range(2):
@@ -172,8 +170,9 @@ def predict(self, y_pred):
         """Predict class labels for the given scores.
 
         Args:
-            y_pred (array-like): Probability estimates of the targets as
-                returned by a ``predict_proba()`` call or equivalent.
+            y_pred (pandas.DataFrame): Probability estimates of the targets as
+                returned by a ``predict_proba()`` call or equivalent. Note: must
+                include protected attributes in the index.
 
         Returns:
             numpy.ndarray: Predicted class label per sample.
@@ -185,8 +184,9 @@ def score(self, y_pred, y_true, sample_weight=None):
         """Score the predictions according to the cost constraint specified.
 
         Args:
-            y_pred (array-like): Probability estimates of the targets as
-                returned by a ``predict_proba()`` call or equivalent.
+            y_pred (pandas.DataFrame): Probability estimates of the targets as
+                returned by a ``predict_proba()`` call or equivalent. Note: must
+                include protected attributes in the index.
             y_true (array-like): Ground-truth (correct) target values.
             sample_weight (array-like, optional): Sample weights.
 
diff --git a/tests/sklearn/test_calibrated_equalized_odds.py b/tests/sklearn/test_calibrated_equalized_odds.py
index 1cba4391..3352b548 100644
--- a/tests/sklearn/test_calibrated_equalized_odds.py
+++ b/tests/sklearn/test_calibrated_equalized_odds.py
@@ -13,7 +13,7 @@
         features_to_keep=['age', 'education-num', 'capital-gain', 'capital-loss',
                           'hours-per-week'], features_to_drop=[])
 
-def test_calib_eq_odds_sex():
+def test_calib_eq_odds_sex_weighted():
     logreg = LogisticRegression(solver='lbfgs', max_iter=500)
     y_pred = logreg.fit(X, y, sample_weight=sample_weight).predict_proba(X)
     adult_pred = adult.copy()
@@ -27,31 +27,45 @@ def test_calib_eq_odds_sex():
     assert np.isclose(orig_cal_eq_odds.priv_mix_rate, cal_eq_odds.mix_rates_[1])
     assert np.isclose(orig_cal_eq_odds.unpriv_mix_rate, cal_eq_odds.mix_rates_[0])
 
-def test_split():
-    adult_est, adult_post = adult.split([0.75], shuffle=False)
-    X_est, X_post, y_est, y_post = train_test_split(X, y, shuffle=False)
+def test_postprocessingmeta_fnr():
+    adult_train, adult_test = adult.split([0.9], shuffle=False)
+    X_tr, X_te, y_tr, _, sw_tr, _ = train_test_split(X, y, sample_weight,
+                train_size=0.9, shuffle=False)
 
-    assert np.all(adult_est.features == X_est)
-    assert np.all(adult_est.labels.ravel() == y_est)
-    assert np.all(adult_post.features == X_post)
-    assert np.all(adult_post.labels.ravel() == y_post)
+    assert np.all(adult_train.features == X_tr)
+    assert np.all(adult_test.features == X_te)
+    assert np.all(adult_train.labels.ravel() == y_tr)
+
+    adult_est, adult_post = adult_train.split([0.75], shuffle=False)
 
-def test_postprocessingmeta():
     logreg = LogisticRegression(solver='lbfgs', max_iter=500)
+    logreg.fit(adult_est.features, adult_est.labels.ravel(),
+               sample_weight=adult_est.instance_weights)
+    probas_pred = logreg.predict_proba(adult_post.features)[:, 1]
 
-    adult_est, adult_post = adult.split([0.75], shuffle=False)
-    logreg.fit(adult_est.features, adult_est.labels.ravel())
-    y_pred = logreg.predict_proba(adult_post.features)[:, 1]
     adult_pred = adult_post.copy()
-    adult_pred.scores = y_pred
+    adult_pred.scores = probas_pred
+
     orig_cal_eq_odds = CalibratedEqOddsPostprocessing(
-            unprivileged_groups=[{'sex': 0}], privileged_groups=[{'sex': 1}])
+            unprivileged_groups=[{'sex': 0}], privileged_groups=[{'sex': 1}],
+            cost_constraint='fnr', seed=0)
     orig_cal_eq_odds.fit(adult_post, adult_pred)
 
     cal_eq_odds = PostProcessingMeta(estimator=logreg,
-            postprocessor=CalibratedEqualizedOdds('sex'), shuffle=False)
-    cal_eq_odds.fit(X, y, sample_weight=sample_weight)
+            postprocessor=CalibratedEqualizedOdds('sex', cost_constraint='fnr', random_state=0),
+            shuffle=False)
+    cal_eq_odds.fit(X_tr, y_tr, sample_weight=sw_tr)
+
+    assert np.allclose(logreg.coef_, cal_eq_odds.estimator_.coef_)
 
     assert np.allclose([orig_cal_eq_odds.unpriv_mix_rate,
                         orig_cal_eq_odds.priv_mix_rate],
                        cal_eq_odds.postprocessor_.mix_rates_)
+
+    adult_test_pred = adult_test.copy()
+    adult_test_pred.scores = logreg.predict_proba(adult_test.features)[:, 1]
+    adult_test_pred = orig_cal_eq_odds.predict(adult_test_pred)
+
+    y_test_pred = cal_eq_odds.predict_proba(X_te)
+
+    assert np.allclose(adult_test_pred.scores, y_test_pred[:, 1])

From e0ff2b66b5aa4b866a15ccfe51ad57816e85d704 Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Thu, 19 Dec 2019 13:04:31 -0500
Subject: [PATCH 50/61] readme changes overwritten in the merge

---
 aif360/sklearn/README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/aif360/sklearn/README.md b/aif360/sklearn/README.md
index 98497eb9..f895d171 100644
--- a/aif360/sklearn/README.md
+++ b/aif360/sklearn/README.md
@@ -28,18 +28,18 @@ objects with sample properties (protected attributes) as the index
   - [x] Adversarial debiasing
   - [ ] **[External]** `get_feature_names()` from data preprocessing
   steps that would remove DataFrame formatting
-    - [ ] SLEP007/8
+    - [ ] [SLEP007](https://github.com/scikit-learn/enhancement_proposals/pull/17)/[SLEP008](https://github.com/scikit-learn/enhancement_proposals/pull/18) - feature names
   - [ ] Prejudice remover
   - [ ] Meta-fair classifier
 - [ ] Make preprocessing algorithms compatible as sklearn `Transformer`s
   - [ ] **[External]** Add functionality to modify X and y
-    - [ ] [SLEP001](https://github.com/scikit-learn/enhancement_proposals/blob/master/slep001/proposal.rst)
+    - [ ] [SLEP005](https://github.com/scikit-learn/enhancement_proposals/pull/15) - Resampler API (see discussion; meta-estimator workaround may be enough)
   - [ ] Disparate impact remover
   - [ ] Learning fair representations
   - [ ] Optimized preprocessing
   - [X] Reweighing
     - [X] Meta-estimator workaround
-    - [ ] **[External]** SLEP006 - Sample properties
+    - [ ] **[External]** [SLEP006](https://github.com/scikit-learn/enhancement_proposals/pull/16) - Sample properties (meta-estimator works but would be very nice to have)
 - [ ] Make postprocessing algorithms compatible
   - [x] Calibrated equalized odds postprocessing
     - [x] Meta-estimator workaround again

From a2cd77ee4369f7b70de153edd96c678a67070e1a Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Thu, 19 Dec 2019 14:56:37 -0500
Subject: [PATCH 51/61] train, test were swapped for adult

---
 examples/demo_disparate_impact_remover.ipynb | 2 +-
 tests/test_differential_fairness.py          | 4 ++--
 tests/test_disparate_impact_remover.py       | 2 +-
 tests/test_meta_classifier.py                | 2 +-
 tests/test_standard_datasets.py              | 4 ++--
 5 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/examples/demo_disparate_impact_remover.ipynb b/examples/demo_disparate_impact_remover.ipynb
index 6d6a8bbd..ba5948fd 100644
--- a/examples/demo_disparate_impact_remover.ipynb
+++ b/examples/demo_disparate_impact_remover.ipynb
@@ -64,7 +64,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "train, test = ad.split([32561])\n",
+    "test, train = ad.split([16281])\n",
     "train.features = scaler.fit_transform(train.features)\n",
     "test.features = scaler.fit_transform(test.features)\n",
     "\n",
diff --git a/tests/test_differential_fairness.py b/tests/test_differential_fairness.py
index 26d1a489..b586c1d5 100644
--- a/tests/test_differential_fairness.py
+++ b/tests/test_differential_fairness.py
@@ -10,7 +10,7 @@
                   categorical_features=['workclass', 'education',
                           'marital-status', 'occupation', 'relationship'],
                   custom_preprocessing=lambda df: df.fillna('Unknown'))
-adult_train, adult_test = ad.split([32561], shuffle=False)
+adult_test, adult_train = ad.split([16281], shuffle=False)
 
 scaler = StandardScaler()
 X = scaler.fit_transform(adult_train.features)
@@ -58,7 +58,7 @@ def custom_preprocessing(df):
     nonbinary_ad.features = np.delete(nonbinary_ad.features, index, axis=1)
     nonbinary_ad.feature_names = np.delete(nonbinary_ad.feature_names, index)
 
-    _, nonbinary_test = nonbinary_ad.split([32561], shuffle=False)
+    nonbinary_test, _ = nonbinary_ad.split([16281], shuffle=False)
     dataset_metric = BinaryLabelDatasetMetric(nonbinary_test)
     eps_data = dataset_metric.smoothed_empirical_differential_fairness()
     assert eps_data == 2.063813731996515  # verified with reference implementation
diff --git a/tests/test_disparate_impact_remover.py b/tests/test_disparate_impact_remover.py
index 3f74ff0d..0b3d4973 100644
--- a/tests/test_disparate_impact_remover.py
+++ b/tests/test_disparate_impact_remover.py
@@ -28,7 +28,7 @@ def test_adult():
     scaler = MinMaxScaler(copy=False)
     # ad.features = scaler.fit_transform(ad.features)
 
-    train, test = ad.split([32561])
+    test, train = ad.split([16281])
     assert np.any(test.labels)
 
     train.features = scaler.fit_transform(train.features)
diff --git a/tests/test_meta_classifier.py b/tests/test_meta_classifier.py
index 8195650e..45931cdd 100644
--- a/tests/test_meta_classifier.py
+++ b/tests/test_meta_classifier.py
@@ -19,7 +19,7 @@ def test_adult():
     #scaler = MinMaxScaler(copy=False)
     # ad.features = scaler.fit_transform(ad.features)
 
-    train, test = ad.split([32561])
+    test, train = ad.split([16281])
 
     biased_model = MetaFairClassifier(tau=0, sensitive_attr=protected)
     biased_model.fit(train)
diff --git a/tests/test_standard_datasets.py b/tests/test_standard_datasets.py
index 3aa7b984..0f09d0aa 100644
--- a/tests/test_standard_datasets.py
+++ b/tests/test_standard_datasets.py
@@ -24,8 +24,8 @@ def test_german():
 
 def test_adult_test_set():
     ad = AdultDataset()
-    # train, test = ad.split([32561])
-    train, test = ad.split([30162])
+    # test, train = ad.split([16281])
+    test, train = ad.split([15060])
     assert np.any(test.labels)
 
 def test_adult():

From ee7f23c90697095628934814f0d37bb582a1c40a Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Thu, 19 Dec 2019 15:20:23 -0500
Subject: [PATCH 52/61] remove branch mentions

---
 .travis.yml              | 4 ++--
 aif360/sklearn/README.md | 2 --
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index a9c99eda..9aa44262 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -9,7 +9,7 @@ env:
 
 branches:
   only:
-    - sklearn-compat
+    - master
 
 install:
   - pip install -r requirements.txt
@@ -28,4 +28,4 @@ before_script:
 script:
   # stop the build if there are Python syntax errors or undefined names
   - flake8 . --count --select=E901,E999,F821,F822,F823 --show-source --statistics
-  - travis_wait python -m pytest tests/sklearn
+  - travis_wait pytest tests
diff --git a/aif360/sklearn/README.md b/aif360/sklearn/README.md
index f895d171..0bc7f189 100644
--- a/aif360/sklearn/README.md
+++ b/aif360/sklearn/README.md
@@ -1,7 +1,5 @@
 ## `aif360.sklearn`
 
-[![Build Status](https://travis-ci.org/IBM/AIF360.svg?branch=sklearn-compat)](https://travis-ci.org/IBM/AIF360)
-
 This is a wholly separate interface for interacting with data, viewing metrics,
 and running debiasing algorithms than the main AIF360 package. The purpose of
 this sub-package is to match scikit-learn paradigms/APIs for easier integration

From c8154ec0969896cc6d9889c1d2bebf8161d76f95 Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Fri, 20 Dec 2019 10:47:34 -0500
Subject: [PATCH 53/61] remove "attributes" line if none present

---
 docs/source/templates/class.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/source/templates/class.rst b/docs/source/templates/class.rst
index 5f46cabb..9ce9f9e6 100644
--- a/docs/source/templates/class.rst
+++ b/docs/source/templates/class.rst
@@ -20,10 +20,12 @@
    {% endblock %}
 
    {% block attributes %}
+   {% if attributes %}
    .. rubric:: Attributes
 
    .. autosummary::
    {% for item in attributes %}
       ~{{ name }}.{{ item }}
    {%- endfor %}
+   {%- endif %}
    {% endblock %}

From 7ef94e71ccba3ee16da718d1465dc035245d7785 Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Fri, 27 Dec 2019 19:06:14 -0500
Subject: [PATCH 54/61] moved example to main folder

---
 docs/source/modules/sklearn.rst                            | 2 +-
 examples/README.md                                         | 7 +++++--
 .../sklearn/demo_new_features.ipynb                        | 0
 3 files changed, 6 insertions(+), 3 deletions(-)
 rename aif360/sklearn/examples/Getting Started.ipynb => examples/sklearn/demo_new_features.ipynb (100%)

diff --git a/docs/source/modules/sklearn.rst b/docs/source/modules/sklearn.rst
index a61283de..1401e2eb 100644
--- a/docs/source/modules/sklearn.rst
+++ b/docs/source/modules/sklearn.rst
@@ -6,7 +6,7 @@ This is the class and function reference for the `scikit-learn`-compatible
 version of the AIF360 API. It is functionally equivalent to the normal API but
 it uses scikit-learn paradigms (where possible) and :class:`pandas.DataFrame` for
 datasets. Not all functionality from AIF360 is supported yet. See
-`Getting Started <https://github.com/IBM/AIF360/aif360/sklearn/examples/Getting%20Started.ipynb>`_
+`Getting Started <https://github.com/IBM/AIF360/examples/sklearn/demo_new_features.ipynb>`_
 for a demo of the capabilities.
 
 Note: This is under active development. Visit our
diff --git a/examples/README.md b/examples/README.md
index a9bc4f59..026e0ade 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -1,7 +1,7 @@
 # AI Fairness 360 Examples (Tutorials and Demos)
 
 This directory contains a diverse collection of jupyter notebooks that use [AI Fairness 360](http://aif360.mybluemix.net/) in various ways.
-Both tutorials and demos illustrate working code using AIF360.  Tutorials provide additional discussion that walks 
+Both tutorials and demos illustrate working code using AIF360.  Tutorials provide additional discussion that walks
 the user through the various steps of the notebook.
 
 ## Tutorials
@@ -13,6 +13,9 @@ Data from the Medical Expenditure Panel Survey ([2015](https://meps.ahrq.gov/mep
 ## Demos
 Below is a list of additional notebooks that demonstrate the use of AIF360.
 
+**NEW:** [sklearn/demo_new_features.ipynb](sklearn/demo_new_features.ipynb): highlights the
+features of the new `scikit-learn`-compatible API
+
 [demo_optim_data_preproc.ipynb](demo_optim_data_preproc.ipynb): demonstrates a generalization of the credit scoring tutorial that  shows the full machine learning workflow for the optimized data pre-processing algorithm for bias mitigation on several datasets
 
 [demo_adversarial_debiasing.ipynb](demo_adversarial_debiasing.ipynb): demonstrates the use of the adversarial debiasing in-processing algorithm to learn a fair classifier
@@ -21,7 +24,7 @@ Below is a list of additional notebooks that demonstrate the use of AIF360.
 
 [demo_disparate_impact_remover.ipynb](demo_calibrated_eqodds_postprocessing.ipynb): demonstrates the use of a disparate impact remover pre-processing algorithm for bias mitigiation
 
-[demo_json_explainers.ipynb](demo_json_explainers.ipynb): 
+[demo_json_explainers.ipynb](demo_json_explainers.ipynb):
 
 [demo_lfr.ipynb](demo_lfr.ipynb):  demonstrates the use of the learning fair representations algorithm for bias mitigation
 
diff --git a/aif360/sklearn/examples/Getting Started.ipynb b/examples/sklearn/demo_new_features.ipynb
similarity index 100%
rename from aif360/sklearn/examples/Getting Started.ipynb
rename to examples/sklearn/demo_new_features.ipynb

From c5af6479b82193f93dd417098b442a9fbbcc7adb Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Fri, 31 Jan 2020 16:40:01 -0500
Subject: [PATCH 55/61] use_proba -> needs_proba

---
 aif360/sklearn/postprocessing/__init__.py | 32 +++++++++++------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/aif360/sklearn/postprocessing/__init__.py b/aif360/sklearn/postprocessing/__init__.py
index d1b0465f..9af0db10 100644
--- a/aif360/sklearn/postprocessing/__init__.py
+++ b/aif360/sklearn/postprocessing/__init__.py
@@ -28,17 +28,17 @@ class PostProcessingMeta(BaseEstimator, MetaEstimatorMixin):
     Attributes:
         estimator_: Fitted estimator.
         postprocessor_: Fitted postprocessor.
-        use_proba_ (bool): Determined depending on the postprocessor type if
-            `use_proba` is None.
+        needs_proba_ (bool): Determined depending on the postprocessor type if
+            `needs_proba` is None.
     """
 
     def __init__(self, estimator, postprocessor=CalibratedEqualizedOdds(),
-                 use_proba=None, val_size=0.25, **options):
+                 needs_proba=None, val_size=0.25, **options):
         """
         Args:
             estimator (sklearn.BaseEstimator): Original estimator.
             postprocessor: Post-processing algorithm.
-            use_proba (bool): Use ``self.estimator_.predict_proba()`` instead of
+            needs_proba (bool): Use ``self.estimator_.predict_proba()`` instead of
                 ``self.estimator_.predict()`` as input to postprocessor. If
                 ``None``, defaults to ``True`` if the postprocessor supports it.
             val_size (int or float): Size of validation set used to fit the
@@ -53,7 +53,7 @@ def __init__(self, estimator, postprocessor=CalibratedEqualizedOdds(),
         """
         self.estimator = estimator
         self.postprocessor = postprocessor
-        self.use_proba = use_proba
+        self.needs_proba = needs_proba
         self.val_size = val_size
         self.options = options
 
@@ -79,9 +79,9 @@ def fit(self, X, y, sample_weight=None, **fit_params):
         Returns:
             self
         """
-        self.use_proba_ = (self.use_proba if self.use_proba is not None else
+        self.needs_proba_ = (self.needs_proba if self.needs_proba is not None else
                 isinstance(self.postprocessor, CalibratedEqualizedOdds))
-        if self.use_proba_ and not hasattr(self.estimator, 'predict_proba'):
+        if self.needs_proba_ and not hasattr(self.estimator, 'predict_proba'):
             raise TypeError("`estimator` (type: {}) does not implement method "
                             "`predict_proba()`.".format(type(self.estimator)))
 
@@ -103,7 +103,7 @@ def fit(self, X, y, sample_weight=None, **fit_params):
             X_est, X_post, y_est, y_post = train_test_split(X, y, **options_)
             self.estimator_.fit(X_est, y_est)
 
-        y_pred = (self.estimator_.predict(X_post) if not self.use_proba_ else
+        y_pred = (self.estimator_.predict(X_post) if not self.needs_proba_ else
                   self.estimator_.predict_proba(X_post))
         # fit_params = fit_params.copy().update(labels=self.estimator_.classes_)
         self.postprocessor_.fit(y_pred, y_post, sample_weight=sw_post
@@ -116,7 +116,7 @@ def predict(self, X):
         """Predict class labels for the given samples.
 
         First, runs ``self.estimator_.predict()`` (or ``predict_proba()`` if
-        ``self.use_proba_`` is ``True``) then returns the post-processed output
+        ``self.needs_proba_`` is ``True``) then returns the post-processed output
         from those predictions.
 
         Args:
@@ -125,7 +125,7 @@ def predict(self, X):
         Returns:
             numpy.ndarray: Predicted class label per sample.
         """
-        y_pred = (self.estimator_.predict(X) if not self.use_proba_ else
+        y_pred = (self.estimator_.predict(X) if not self.needs_proba_ else
                   self.estimator_.predict_proba(X))
         y_pred = pd.DataFrame(y_pred, index=X.index).squeeze('columns')
         return self.postprocessor_.predict(y_pred)
@@ -135,7 +135,7 @@ def predict_proba(self, X):
         """Probability estimates.
 
         First, runs ``self.estimator_.predict()`` (or ``predict_proba()`` if
-        ``self.use_proba_`` is ``True``) then returns the post-processed output
+        ``self.needs_proba_`` is ``True``) then returns the post-processed output
         from those predictions.
 
         The returned estimates for all classes are ordered by the label of
@@ -149,7 +149,7 @@ def predict_proba(self, X):
             in the model, where classes are ordered as they are in
             ``self.classes_``.
         """
-        y_pred = (self.estimator_.predict(X) if not self.use_proba_ else
+        y_pred = (self.estimator_.predict(X) if not self.needs_proba_ else
                   self.estimator_.predict_proba(X))
         y_pred = pd.DataFrame(y_pred, index=X.index).squeeze('columns')
         return self.postprocessor_.predict_proba(y_pred)
@@ -159,7 +159,7 @@ def predict_log_proba(self, X):
         """Log of probability estimates.
 
         First, runs ``self.estimator_.predict()`` (or ``predict_proba()`` if
-        ``self.use_proba_`` is ``True``) then returns the post-processed output
+        ``self.needs_proba_`` is ``True``) then returns the post-processed output
         from those predictions.
 
         The returned estimates for all classes are ordered by the label of
@@ -173,7 +173,7 @@ def predict_log_proba(self, X):
             the model, where classes are ordered as they are in
             ``self.classes_``.
         """
-        y_pred = (self.estimator_.predict(X) if not self.use_proba_ else
+        y_pred = (self.estimator_.predict(X) if not self.needs_proba_ else
                   self.estimator_.predict_proba(X))
         y_pred = pd.DataFrame(y_pred, index=X.index).squeeze('columns')
         return self.postprocessor_.predict_log_proba(y_pred)
@@ -184,7 +184,7 @@ def score(self, X, y, sample_weight=None):
         given test data and labels.
 
         First, runs ``self.estimator_.predict()`` (or ``predict_proba()`` if
-        ``self.use_proba_`` is ``True``) then gets the post-processed output
+        ``self.needs_proba_`` is ``True``) then gets the post-processed output
         from those predictions and scores it.
 
         Args:
@@ -195,7 +195,7 @@ def score(self, X, y, sample_weight=None):
         Returns:
             float: Score value.
         """
-        y_pred = (self.estimator_.predict(X) if not self.use_proba_ else
+        y_pred = (self.estimator_.predict(X) if not self.needs_proba_ else
                   self.estimator_.predict_proba(X))
         y_pred = pd.DataFrame(y_pred, index=X.index).squeeze('columns')
         return self.postprocessor_.score(y_pred, y, sample_weight=sample_weight)

From 042bb1215fd0c85221266d70f33e5647a845211d Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Fri, 31 Jan 2020 18:10:57 -0500
Subject: [PATCH 56/61] fixed/renamed/reordered/added some attributes

* fixed German 'age' from being dropped
* renamed two_year_recid labels to 'Survived' and 'Recidivated' to match ProPublica article
* reordered COMPAS categories to 'Male' < 'Female'
* added 'foreign_worker' protected attribute for German
---
 aif360/sklearn/datasets/compas_dataset.py  | 19 +++++++++++++------
 aif360/sklearn/datasets/openml_datasets.py | 14 ++++++++------
 aif360/sklearn/datasets/utils.py           |  8 ++++----
 3 files changed, 25 insertions(+), 16 deletions(-)

diff --git a/aif360/sklearn/datasets/compas_dataset.py b/aif360/sklearn/datasets/compas_dataset.py
index 81578ef5..c909692d 100644
--- a/aif360/sklearn/datasets/compas_dataset.py
+++ b/aif360/sklearn/datasets/compas_dataset.py
@@ -20,8 +20,12 @@ def fetch_compas(data_home=None, binary_race=False,
     Optionally binarizes 'race' to 'Caucasian' (privileged) or
     'African-American' (unprivileged). The other protected attribute is 'sex'
     ('Male' is *unprivileged* and 'Female' is *privileged*). The outcome
-    variable is 'no recid.' (favorable) if the person was not accused of a crime
-    within two years or 'did recid.' (unfavorable) if they were.
+    variable is 'Survived' (favorable) if the person was not accused of a crime
+    within two years or 'Recidivated' (unfavorable) if they were.
+
+    Note:
+        The values for the 'sex' variable if numeric_only is ``True`` are 1 for
+        'Female and 0 for 'Male' -- opposite the convention of other datasets.
 
     Args:
         data_home (string, optional): Specify another download and cache folder
@@ -59,16 +63,19 @@ def fetch_compas(data_home=None, binary_race=False,
     for col in ['sex', 'age_cat', 'race', 'c_charge_degree', 'c_charge_desc']:
         df[col] = df[col].astype('category')
 
-    # 'did recid' < 'no recid'
-    df.two_year_recid = df.two_year_recid.replace({0: 'no recid.',
-            1: 'did recid.'}).astype('category').cat.as_ordered()
+    # 'Survived' < 'Recidivated'
+    cats = ['Survived', 'Recidivated']
+    df.two_year_recid = df.two_year_recid.replace([0, 1], cats).astype('category')
+    df.two_year_recid = df.two_year_recid.cat.set_categories(cats, ordered=True)
 
     if binary_race:
         # 'African-American' < 'Caucasian'
         df.race = df.race.cat.set_categories(['African-American', 'Caucasian'],
                                              ordered=True)
 
-    df.sex = df.sex.astype('category').cat.as_ordered()  # 'Female' < 'Male'
+    # 'Male' < 'Female'
+    df.sex = df.sex.astype('category').cat.reorder_categories(
+            ['Male', 'Female'], ordered=True)
 
     return standardize_dataset(df, prot_attr=['sex', 'race'],
                                target='two_year_recid', usecols=usecols,
diff --git a/aif360/sklearn/datasets/openml_datasets.py b/aif360/sklearn/datasets/openml_datasets.py
index 1bfa24e7..2e6f73d6 100644
--- a/aif360/sklearn/datasets/openml_datasets.py
+++ b/aif360/sklearn/datasets/openml_datasets.py
@@ -36,8 +36,8 @@ def fetch_adult(subset='all', data_home=None, binary_race=True, usecols=[],
                 dropcols=[], numeric_only=False, dropna=True):
     """Load the Adult Census Income Dataset.
 
-    Binarizes 'race' to 'White' (privileged) or 'Non-white' (unprivileged).
-    The other protected attribute is 'sex' ('Male' is privileged and 'Female' is
+    Binarizes 'race' to 'White' (privileged) or 'Non-white' (unprivileged). The
+    other protected attribute is 'sex' ('Male' is privileged and 'Female' is
     unprivileged). The outcome variable is 'annual-income': '>50K' (favorable)
     or '<=50K' (unfavorable).
 
@@ -151,7 +151,8 @@ def fetch_german(data_home=None, binary_age=True, usecols=[], dropcols=[],
     df['credit-risk'] = df['credit-risk'].cat.as_ordered()  # 'bad' < 'good'
 
     # binarize protected attribute (but not corresponding feature)
-    age = (pd.cut(df.age, [0, 25, 100], labels=numeric_only and ['young', 'aged'])
+    age = (pd.cut(df.age, [0, 25, 100],
+                  labels=False if numeric_only else ['young', 'aged'])
            if binary_age else 'age')
 
     # Note: marital_status directly implies sex. i.e. 'div/dep/mar' => 'female'
@@ -161,9 +162,10 @@ def fetch_german(data_home=None, binary_age=True, usecols=[], dropcols=[],
     df = df.join(personal_status.astype('category'))
     df.sex = df.sex.cat.as_ordered()  # 'female' < 'male'
 
-    return standardize_dataset(df, prot_attr=['sex', age], target='credit-risk',
-                               usecols=usecols, dropcols=dropcols,
-                               numeric_only=numeric_only, dropna=dropna)
+    return standardize_dataset(df, prot_attr=['sex', age, 'foreign_worker'],
+                               target='credit-risk', usecols=usecols,
+                               dropcols=dropcols, numeric_only=numeric_only,
+                               dropna=dropna)
 
 def fetch_bank(data_home=None, percent10=False, usecols=[], dropcols='duration',
                numeric_only=False, dropna=False):
diff --git a/aif360/sklearn/datasets/utils.py b/aif360/sklearn/datasets/utils.py
index a39d5fb3..a776ff16 100644
--- a/aif360/sklearn/datasets/utils.py
+++ b/aif360/sklearn/datasets/utils.py
@@ -28,13 +28,13 @@ def check_already_dropped(labels, dropped_cols, name, dropped_by='numeric_only',
     """
     if not is_list_like(labels):
         labels = [labels]
-    labels = [c for c in labels if isinstance(c, str)]
-    already_dropped = dropped_cols.intersection(labels)
-    if warn and already_dropped.any():
+    str_labels = [c for c in labels if isinstance(c, str)]
+    already_dropped = dropped_cols.intersection(str_labels)
+    if warn and any(already_dropped):
         warnings.warn("Some column labels from `{}` were already dropped by "
                 "`{}`:\n{}".format(name, dropped_by, already_dropped.tolist()),
                 ColumnAlreadyDroppedWarning, stacklevel=2)
-    return [c for c in labels if c not in already_dropped]
+    return [c for c in labels if not isinstance(c, str) or c not in already_dropped]
 
 def standardize_dataset(df, prot_attr, target, sample_weight=None, usecols=[],
                        dropcols=[], numeric_only=False, dropna=True):

From ff9e70c0170c3a6178e00e094aeb370f6daa9535 Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Wed, 5 Feb 2020 18:39:29 -0500
Subject: [PATCH 57/61] fixed sample_weight=None bug and classes_ typo

---
 aif360/sklearn/postprocessing/calibrated_equalized_odds.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/aif360/sklearn/postprocessing/calibrated_equalized_odds.py b/aif360/sklearn/postprocessing/calibrated_equalized_odds.py
index a648d13b..94f8d5ef 100644
--- a/aif360/sklearn/postprocessing/calibrated_equalized_odds.py
+++ b/aif360/sklearn/postprocessing/calibrated_equalized_odds.py
@@ -64,7 +64,8 @@ def __init__(self, prot_attr=None, cost_constraint='weighted',
         self.cost_constraint = cost_constraint
         self.random_state = random_state
 
-    def _weighted_cost(self, y_true, probas_pred, pos_label, sample_weight):
+    def _weighted_cost(self, y_true, probas_pred, pos_label=1,
+                       sample_weight=None):
         """Evaluates the cost function specified by ``self.cost_constraint``."""
         fpr = generalized_fpr(y_true, probas_pred, pos_label, sample_weight)
         fnr = generalized_fnr(y_true, probas_pred, pos_label, sample_weight)
@@ -178,7 +179,7 @@ def predict(self, y_pred):
             numpy.ndarray: Predicted class label per sample.
         """
         scores = self.predict_proba(y_pred)
-        return self.classes[scores.argmax(axis=1)]
+        return self.classes_[scores.argmax(axis=1)]
 
     def score(self, y_pred, y_true, sample_weight=None):
         """Score the predictions according to the cost constraint specified.
@@ -201,4 +202,4 @@ def score(self, y_pred, y_true, sample_weight=None):
 
         return abs(difference(self._weighted_cost, y_true, probas_pred,
                 prot_attr=self.prot_attr_, priv_group=self.groups_[1],
-                sample_weight=sample_weight))
+                pos_label=self.pos_label_, sample_weight=sample_weight))

From 57b2ab51efa5725396818cdb67070aafdbd80b37 Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Thu, 6 Feb 2020 12:54:00 -0500
Subject: [PATCH 58/61] improved specificity_score and added fpr/fnr error

---
 aif360/sklearn/metrics/metrics.py | 44 ++++++++++++++++++-------------
 1 file changed, 26 insertions(+), 18 deletions(-)

diff --git a/aif360/sklearn/metrics/metrics.py b/aif360/sklearn/metrics/metrics.py
index 7b954bf5..4fda5c67 100644
--- a/aif360/sklearn/metrics/metrics.py
+++ b/aif360/sklearn/metrics/metrics.py
@@ -2,6 +2,7 @@
 
 import numpy as np
 from sklearn.metrics import make_scorer, recall_score
+from sklearn.metrics import multilabel_confusion_matrix
 from sklearn.neighbors import NearestNeighbors
 from sklearn.utils import check_X_y
 from sklearn.exceptions import UndefinedMetricWarning
@@ -26,7 +27,8 @@
     'between_group_generalized_entropy_error', 'theil_index',
     'coefficient_of_variation', 'consistency_score',
     # aliases
-    'sensitivity_score', 'mean_difference',
+    'sensitivity_score', 'mean_difference', 'false_negative_rate_error',
+    'false_positive_rate_error'
 ]
 
 # ============================= META-METRICS ===================================
@@ -155,19 +157,24 @@ def score_fn(y, y_pred, **kwargs):
 
 
 # ================================ HELPERS =====================================
-# TODO: make this more general
-def specificity_score(y_true, y_pred, neg_label=0, sample_weight=None):
+def specificity_score(y_true, y_pred, pos_label=1, sample_weight=None):
     """Compute the specificity or true negative rate.
 
     Args:
         y_true (array-like): Ground truth (correct) target values.
         y_pred (array-like): Estimated targets as returned by a classifier.
-        neg_label (scalar, optional): The label of the negative class. Note:
-            the data should be binary.
+        pos_label (scalar, optional): The label of the positive class.
         sample_weight (array-like, optional): Sample weights.
     """
-    return recall_score(y_true, y_pred, pos_label=neg_label,
-                        sample_weight=sample_weight)
+    MCM = multilabel_confusion_matrix(y_true, y_pred, labels=[pos_label],
+                                      sample_weight=sample_weight)
+    tn, fp, fn, tp = MCM.ravel()
+    negs = tn + fp
+    if negs == 0:
+        warnings.warn('specificity_score is ill-defined and being set to 0.0 '
+                      'due to no negative samples.', UndefinedMetricWarning)
+        return 0.
+    return tn / negs
 
 def base_rate(y_true, y_pred=None, pos_label=1, sample_weight=None):
     r"""Compute the base rate, :math:`Pr(Y = \text{pos_label}) = \frac{P}{P+N}`.
@@ -339,7 +346,7 @@ def equal_opportunity_difference(y_true, y_pred, prot_attr=None, priv_group=1,
                       sample_weight=sample_weight)
 
 def average_odds_difference(y_true, y_pred, prot_attr=None, priv_group=1,
-                            pos_label=1, neg_label=0, sample_weight=None):
+                            pos_label=1, sample_weight=None):
     r"""A relaxed version of equality of odds.
 
     Returns the average of the difference in FPR and TPR for the unprivileged
@@ -366,14 +373,14 @@ def average_odds_difference(y_true, y_pred, prot_attr=None, priv_group=1,
     """
     fpr_diff = -difference(specificity_score, y_true, y_pred,
                            prot_attr=prot_attr, priv_group=priv_group,
-                           neg_label=neg_label, sample_weight=sample_weight)
+                           pos_label=pos_label, sample_weight=sample_weight)
     tpr_diff = difference(recall_score, y_true, y_pred, prot_attr=prot_attr,
                           priv_group=priv_group, pos_label=pos_label,
                           sample_weight=sample_weight)
     return (tpr_diff + fpr_diff) / 2
 
-def average_odds_error(y_true, y_pred, prot_attr=None, priv_group=1,
-                       pos_label=1, neg_label=0, sample_weight=None):
+def average_odds_error(y_true, y_pred, prot_attr=None, pos_label=1,
+                       sample_weight=None):
     r"""A relaxed version of equality of odds.
 
     Returns the average of the absolute difference in FPR and TPR for the
@@ -398,9 +405,10 @@ def average_odds_error(y_true, y_pred, prot_attr=None, priv_group=1,
     Returns:
         float: Average odds error.
     """
+    priv_group = check_groups(y_true, prot_attr=prot_attr)[0][0]
     fpr_diff = -difference(specificity_score, y_true, y_pred,
                            prot_attr=prot_attr, priv_group=priv_group,
-                           neg_label=neg_label, sample_weight=sample_weight)
+                           pos_label=pos_label, sample_weight=sample_weight)
     tpr_diff = difference(recall_score, y_true, y_pred, prot_attr=prot_attr,
                           priv_group=priv_group, pos_label=pos_label,
                           sample_weight=sample_weight)
@@ -561,13 +569,13 @@ def sensitivity_score(y_true, y_pred, pos_label=1, sample_weight=None):
     return recall_score(y_true, y_pred, pos_label=pos_label,
                         sample_weight=sample_weight)
 
-# def false_negative_rate_error(y_true, y_pred, pos_label=1, sample_weight=None):
-#     return 1 - recall_score(y_true, y_pred, pos_label=pos_label,
-#                             sample_weight=sample_weight)
+def false_negative_rate_error(y_true, y_pred, pos_label=1, sample_weight=None):
+    return 1 - recall_score(y_true, y_pred, pos_label=pos_label,
+                            sample_weight=sample_weight)
 
-# def false_positive_rate_error(y_true, y_pred, neg_label=0, sample_weight=None):
-#     return 1 - specificity_score(y_true, y_pred, neg_label=neg_label,
-#                                  sample_weight=sample_weight)
+def false_positive_rate_error(y_true, y_pred, pos_label=1, sample_weight=None):
+    return 1 - specificity_score(y_true, y_pred, pos_label=pos_label,
+                                 sample_weight=sample_weight)
 
 def mean_difference(*y, prot_attr=None, priv_group=1, pos_label=1,
                     sample_weight=None):

From 8fdd6dc1460ff0c891b1ff4bd04bb7e3a3d18a1c Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Thu, 6 Feb 2020 12:56:43 -0500
Subject: [PATCH 59/61] made foreign_worker and education (bank) ordered

---
 aif360/sklearn/datasets/openml_datasets.py | 6 ++++++
 tests/sklearn/test_datasets.py             | 4 ++--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/aif360/sklearn/datasets/openml_datasets.py b/aif360/sklearn/datasets/openml_datasets.py
index 2e6f73d6..16d3165f 100644
--- a/aif360/sklearn/datasets/openml_datasets.py
+++ b/aif360/sklearn/datasets/openml_datasets.py
@@ -162,6 +162,9 @@ def fetch_german(data_home=None, binary_age=True, usecols=[], dropcols=[],
     df = df.join(personal_status.astype('category'))
     df.sex = df.sex.cat.as_ordered()  # 'female' < 'male'
 
+    # 'no' < 'yes'
+    df.foreign_worker = df.foreign_worker.astype('category').cat.as_ordered()
+
     return standardize_dataset(df, prot_attr=['sex', age, 'foreign_worker'],
                                target='credit-risk', usecols=usecols,
                                dropcols=dropcols, numeric_only=numeric_only,
@@ -215,6 +218,9 @@ def fetch_bank(data_home=None, percent10=False, usecols=[], dropcols='duration',
     # replace 'unknown' marker with NaN
     df.apply(lambda s: s.cat.remove_categories('unknown', inplace=True)
              if hasattr(s, 'cat') and 'unknown' in s.cat.categories else s)
+    # 'primary' < 'secondary' < 'tertiary'
+    df.education = df.education.astype('category').cat.as_ordered()
+
     return standardize_dataset(df, prot_attr='age', target='deposit',
                                usecols=usecols, dropcols=dropcols,
                                numeric_only=numeric_only, dropna=dropna)
diff --git a/tests/sklearn/test_datasets.py b/tests/sklearn/test_datasets.py
index 0cd13a6c..1d2ec6a0 100644
--- a/tests/sklearn/test_datasets.py
+++ b/tests/sklearn/test_datasets.py
@@ -65,14 +65,14 @@ def test_fetch_german():
     german = fetch_german()
     assert len(german) == 2
     assert german.X.shape == (1000, 21)
-    assert fetch_german(numeric_only=True).X.shape == (1000, 8)
+    assert fetch_german(numeric_only=True).X.shape == (1000, 9)
 
 def test_fetch_bank():
     bank = fetch_bank()
     assert len(bank) == 2
     assert bank.X.shape == (45211, 15)
     assert fetch_bank(dropcols=[]).X.shape == (45211, 16)
-    assert fetch_bank(numeric_only=True).X.shape == (45211, 6)
+    assert fetch_bank(numeric_only=True).X.shape == (45211, 7)
 
 @pytest.mark.filterwarnings('error', category=ColumnAlreadyDroppedWarning)
 def test_fetch_compas():

From 2cf455f186181d72fc6bcd155d730dc42eeb5062 Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Wed, 19 Feb 2020 15:29:32 -0500
Subject: [PATCH 60/61] various fixes to address PR comments

* added one-hot encoding example and random_states to demo notebook
* added 'prefit' option to PostProcessingMeta
* multiple fixes to docstring wordings
* added additional links/disclaimers in docstrings
* renamed CalibratedEqualizedOdds args to X and y
---
 aif360/sklearn/datasets/openml_datasets.py    |  37 +-
 .../inprocessing/adversarial_debiasing.py     |   2 +-
 aif360/sklearn/metrics/metrics.py             |  35 +-
 aif360/sklearn/postprocessing/__init__.py     |  81 ++-
 .../calibrated_equalized_odds.py              |  69 +--
 aif360/sklearn/preprocessing/reweighing.py    |   3 +
 aif360/sklearn/utils.py                       |  27 +-
 examples/sklearn/demo_new_features.ipynb      | 557 +++++-------------
 8 files changed, 324 insertions(+), 487 deletions(-)

diff --git a/aif360/sklearn/datasets/openml_datasets.py b/aif360/sklearn/datasets/openml_datasets.py
index 16d3165f..f4c78e67 100644
--- a/aif360/sklearn/datasets/openml_datasets.py
+++ b/aif360/sklearn/datasets/openml_datasets.py
@@ -41,6 +41,10 @@ def fetch_adult(subset='all', data_home=None, binary_race=True, usecols=[],
     unprivileged). The outcome variable is 'annual-income': '>50K' (favorable)
     or '<=50K' (unfavorable).
 
+    Note:
+        By default, the data is downloaded from OpenML. See the `adult
+        <https://www.openml.org/d/1590>`_ page for details.
+
     Args:
         subset ({'train', 'test', or 'all'}, optional): Select the dataset to
             load: 'train' for the training set, 'test' for the test set, 'all'
@@ -60,6 +64,9 @@ def fetch_adult(subset='all', data_home=None, binary_race=True, usecols=[],
         namedtuple: Tuple containing X, y, and sample_weights for the Adult
         dataset accessible by index or name.
 
+    See also:
+        :func:`sklearn.datasets.fetch_openml`
+
     Examples:
         >>> adult = fetch_adult()
         >>> adult.X.shape
@@ -103,11 +110,9 @@ def fetch_german(data_home=None, binary_age=True, usecols=[], dropcols=[],
     unprivileged; see the binary_age flag to keep this continuous). The outcome
     variable is 'credit-risk': 'good' (favorable) or 'bad' (unfavorable).
 
-    References:
-        .. [#kamiran09] `F. Kamiran and T. Calders, "Classifying without
-           discriminating," 2nd International Conference on Computer,
-           Control and Communication, 2009.
-           <https://ieeexplore.ieee.org/abstract/document/4909197>`_
+    Note:
+        By default, the data is downloaded from OpenML. See the `credit-g
+        <https://www.openml.org/d/31>`_ page for details.
 
     Args:
         data_home (string, optional): Specify another download and cache folder
@@ -126,6 +131,15 @@ def fetch_german(data_home=None, binary_age=True, usecols=[], dropcols=[],
         namedtuple: Tuple containing X and y for the German dataset accessible
         by index or name.
 
+    See also:
+        :func:`sklearn.datasets.fetch_openml`
+
+    References:
+        .. [#kamiran09] `F. Kamiran and T. Calders, "Classifying without
+           discriminating," 2nd International Conference on Computer,
+           Control and Communication, 2009.
+           <https://ieeexplore.ieee.org/abstract/document/4909197>`_
+
     Examples:
         >>> german = fetch_german()
         >>> german.X.shape
@@ -142,7 +156,6 @@ def fetch_german(data_home=None, binary_age=True, usecols=[], dropcols=[],
         >>> disparate_impact_ratio(y, y_pred, prot_attr='age', priv_group=True,
         ... pos_label='good')
         0.9483094846144106
-
     """
     df = to_dataframe(fetch_openml(data_id=31, target_column=None,
                                    data_home=data_home or DATA_HOME_DEFAULT))
@@ -175,7 +188,11 @@ def fetch_bank(data_home=None, percent10=False, usecols=[], dropcols='duration',
     """Load the Bank Marketing Dataset.
 
     The protected attribute is 'age' (left as continuous). The outcome variable
-    is 'deposit': ``True`` or ``False``.
+    is 'deposit': 'yes' or 'no'.
+
+    Note:
+        By default, the data is downloaded from OpenML. See the `bank-marketing
+        <https://www.openml.org/d/1461>`_ page for details.
 
     Args:
         data_home (string, optional): Specify another download and cache folder
@@ -193,6 +210,9 @@ def fetch_bank(data_home=None, percent10=False, usecols=[], dropcols='duration',
         namedtuple: Tuple containing X and y for the Bank dataset accessible by
         index or name.
 
+    See also:
+        :func:`sklearn.datasets.fetch_openml`
+
     Examples:
         >>> bank = fetch_bank()
         >>> bank.X.shape
@@ -214,7 +234,8 @@ def fetch_bank(data_home=None, percent10=False, usecols=[], dropcols='duration',
                   'housing', 'loan', 'contact', 'day', 'month', 'duration',
                   'campaign', 'pdays', 'previous', 'poutcome', 'deposit']
     # remap target
-    df.deposit = df.deposit.map({'1': False, '2': True}).astype('bool')
+    df.deposit = df.deposit.map({'1': 'no', '2': 'yes'}).astype('category')
+    df.deposit = df.deposit.cat.as_ordered()  # 'no' < 'yes'
     # replace 'unknown' marker with NaN
     df.apply(lambda s: s.cat.remove_categories('unknown', inplace=True)
              if hasattr(s, 'cat') and 'unknown' in s.cat.categories else s)
diff --git a/aif360/sklearn/inprocessing/adversarial_debiasing.py b/aif360/sklearn/inprocessing/adversarial_debiasing.py
index ca3de37d..e2328e00 100644
--- a/aif360/sklearn/inprocessing/adversarial_debiasing.py
+++ b/aif360/sklearn/inprocessing/adversarial_debiasing.py
@@ -67,7 +67,7 @@ def __init__(self, prot_attr=None, scope_name='classifier',
                 adversary.
             verbose (bool, optional): If ``True``, print losses every 200 steps.
             random_state (int or numpy.RandomState, optional): Seed of pseudo-
-                random number generator for shuffling data.
+                random number generator for shuffling data and seeding weights.
         """
 
         self.prot_attr = prot_attr
diff --git a/aif360/sklearn/metrics/metrics.py b/aif360/sklearn/metrics/metrics.py
index 4fda5c67..956621c0 100644
--- a/aif360/sklearn/metrics/metrics.py
+++ b/aif360/sklearn/metrics/metrics.py
@@ -210,8 +210,8 @@ def generalized_fpr(y_true, probas_pred, pos_label=1, sample_weight=None):
     r"""Return the ratio of generalized false positives to negative examples in
     the dataset, :math:`GFPR = \tfrac{GFP}{N}`.
 
-    The generalized confusion matrix is calculated by summing the probabilities
-    of the positive class instead of the hard predictions.
+    Generalized confusion matrix measures such as this are calculated by summing
+    the probabilities of the positive class instead of the hard predictions.
 
     Args:
         y_true (array-like): Ground-truth (correct) target values.
@@ -237,8 +237,8 @@ def generalized_fnr(y_true, probas_pred, pos_label=1, sample_weight=None):
     r"""Return the ratio of generalized false negatives to positive examples in
     the dataset, :math:`GFNR = \tfrac{GFN}{P}`.
 
-    The generalized confusion matrix is calculated by summing the probabilities
-    of the positive class instead of the hard predictions.
+    Generalized confusion matrix measures such as this are calculated by summing
+    the probabilities of the positive class instead of the hard predictions.
 
     Args:
         y_true (array-like): Ground-truth (correct) target values.
@@ -272,7 +272,8 @@ def statistical_parity_difference(*y, prot_attr=None, priv_group=1, pos_label=1,
 
     Note:
         If only y_true is provided, this will return the difference in base
-        rates (statistical parity difference of the original dataset).
+        rates (statistical parity difference of the original dataset). If both
+        y_true and y_pred are provided, only y_pred is used.
 
     Args:
         y_true (pandas.Series): Ground truth (correct) target values. If y_pred
@@ -287,6 +288,9 @@ def statistical_parity_difference(*y, prot_attr=None, priv_group=1, pos_label=1,
 
     Returns:
         float: Statistical parity difference.
+
+    See also:
+        :func:`selection_rate`, :func:`base_rate`
     """
     rate = base_rate if len(y) == 1 or y[1] is None else selection_rate
     return difference(rate, *y, prot_attr=prot_attr, priv_group=priv_group,
@@ -302,7 +306,8 @@ def disparate_impact_ratio(*y, prot_attr=None, priv_group=1, pos_label=1,
 
     Note:
         If only y_true is provided, this will return the ratio of base rates
-        (disparate impact of the original dataset).
+        (disparate impact of the original dataset). If both y_true and y_pred
+        are provided, only y_pred is used.
 
     Args:
         y_true (pandas.Series): Ground truth (correct) target values. If y_pred
@@ -317,6 +322,9 @@ def disparate_impact_ratio(*y, prot_attr=None, priv_group=1, pos_label=1,
 
     Returns:
         float: Disparate impact.
+
+    See also:
+        :func:`selection_rate`, :func:`base_rate`
     """
     rate = base_rate if len(y) == 1 or y[1] is None else selection_rate
     return ratio(rate, *y, prot_attr=prot_attr, priv_group=priv_group,
@@ -340,6 +348,9 @@ def equal_opportunity_difference(y_true, y_pred, prot_attr=None, priv_group=1,
 
     Returns:
         float: Equal opportunity difference.
+
+    See also:
+        :func:`~sklearn.metrics.recall_score`
     """
     return difference(recall_score, y_true, y_pred, prot_attr=prot_attr,
                       priv_group=priv_group, pos_label=pos_label,
@@ -461,6 +472,9 @@ def generalized_entropy_error(y_true, y_pred, alpha=2, pos_label=1):
             index, and 2 is half the squared coefficient of variation.
         pos_label (scalar, optional): The label of the positive class.
 
+    See also:
+        :func:`generalized_entropy_index`
+
     References:
         .. [#speicher18] `T. Speicher, H. Heidari, N. Grgic-Hlaca,
            K. P. Gummadi, A. Singla, A. Weller, and M. B. Zafar, "A Unified
@@ -495,6 +509,9 @@ def between_group_generalized_entropy_error(y_true, y_pred, prot_attr=None,
             index, and 2 is half the squared coefficient of variation.
         pos_label (scalar, optional): The label of the positive class.
 
+    See also:
+        :func:`generalized_entropy_index`
+
     References:
         .. [#speicher18] `T. Speicher, H. Heidari, N. Grgic-Hlaca,
            K. P. Gummadi, A. Singla, A. Weller, and M. B. Zafar, "A Unified
@@ -518,6 +535,9 @@ def theil_index(b):
 
     Args:
         b (array-like): Parameter over which to calculate the entropy index.
+
+    See also:
+        :func:`generalized_entropy_index`
     """
     return generalized_entropy_index(b, alpha=1)
 
@@ -527,6 +547,9 @@ def coefficient_of_variation(b):
 
     Args:
         b (array-like): Parameter over which to calculate the entropy index.
+
+    See also:
+        :func:`generalized_entropy_index`
     """
     return 2 * np.sqrt(generalized_entropy_index(b, alpha=2))
 
diff --git a/aif360/sklearn/postprocessing/__init__.py b/aif360/sklearn/postprocessing/__init__.py
index 9af0db10..c45f4e4b 100644
--- a/aif360/sklearn/postprocessing/__init__.py
+++ b/aif360/sklearn/postprocessing/__init__.py
@@ -33,14 +33,16 @@ class PostProcessingMeta(BaseEstimator, MetaEstimatorMixin):
     """
 
     def __init__(self, estimator, postprocessor=CalibratedEqualizedOdds(),
-                 needs_proba=None, val_size=0.25, **options):
+                 needs_proba=None, prefit=False, val_size=0.25, **options):
         """
         Args:
             estimator (sklearn.BaseEstimator): Original estimator.
             postprocessor: Post-processing algorithm.
-            needs_proba (bool): Use ``self.estimator_.predict_proba()`` instead of
-                ``self.estimator_.predict()`` as input to postprocessor. If
+            needs_proba (bool): Use ``self.estimator_.predict_proba()`` instead
+                of ``self.estimator_.predict()`` as input to postprocessor. If
                 ``None``, defaults to ``True`` if the postprocessor supports it.
+            prefit (bool): If ``True``, it is assumed that estimator has been
+                fitted already and all data is used to train postprocessor.
             val_size (int or float): Size of validation set used to fit the
                 postprocessor. The estimator fits on the remainder of the
                 training set.
@@ -54,6 +56,7 @@ def __init__(self, estimator, postprocessor=CalibratedEqualizedOdds(),
         self.estimator = estimator
         self.postprocessor = postprocessor
         self.needs_proba = needs_proba
+        self.prefit = prefit
         self.val_size = val_size
         self.options = options
 
@@ -79,14 +82,28 @@ def fit(self, X, y, sample_weight=None, **fit_params):
         Returns:
             self
         """
-        self.needs_proba_ = (self.needs_proba if self.needs_proba is not None else
-                isinstance(self.postprocessor, CalibratedEqualizedOdds))
+        self.needs_proba_ = (self.needs_proba if self.needs_proba is not None
+                else isinstance(self.postprocessor, CalibratedEqualizedOdds))
         if self.needs_proba_ and not hasattr(self.estimator, 'predict_proba'):
             raise TypeError("`estimator` (type: {}) does not implement method "
                             "`predict_proba()`.".format(type(self.estimator)))
 
+        if self.prefit:
+            if len(self.options):
+                warning("Splitting options were passed but prefit is True so "
+                        "these are ignored.")
+            self.postprocessor_ = clone(self.postprocessor)
+            y_score = (self.estimator.predict(X) if not self.needs_proba_ else
+                       self.estimator.predict_proba(X))
+            fit_params = fit_params.copy()
+            fit_params.update(labels=self.estimator_.classes_)
+            self.postprocessor_.fit(y_score, y, sample_weight=sample_weight,
+                                    **fit_params)
+            return self
+
         if 'train_size' in self.options or 'test_size' in self.options:
-            warning("'train_size' and 'test_size' are ignored in favor of 'val_size'")
+            warning("'train_size' and 'test_size' are ignored in favor of "
+                    "'val_size'")
         options_ = self.options.copy()
         options_['test_size'] = self.val_size
         if 'train_size' in options_:
@@ -103,10 +120,11 @@ def fit(self, X, y, sample_weight=None, **fit_params):
             X_est, X_post, y_est, y_post = train_test_split(X, y, **options_)
             self.estimator_.fit(X_est, y_est)
 
-        y_pred = (self.estimator_.predict(X_post) if not self.needs_proba_ else
+        y_score = (self.estimator_.predict(X_post) if not self.needs_proba_ else
                   self.estimator_.predict_proba(X_post))
-        # fit_params = fit_params.copy().update(labels=self.estimator_.classes_)
-        self.postprocessor_.fit(y_pred, y_post, sample_weight=sw_post
+        fit_params = fit_params.copy()
+        fit_params.update(labels=self.estimator_.classes_)
+        self.postprocessor_.fit(y_score, y_post, sample_weight=sw_post
                                 if sample_weight is not None else None,
                                 **fit_params)
         return self
@@ -116,8 +134,8 @@ def predict(self, X):
         """Predict class labels for the given samples.
 
         First, runs ``self.estimator_.predict()`` (or ``predict_proba()`` if
-        ``self.needs_proba_`` is ``True``) then returns the post-processed output
-        from those predictions.
+        ``self.needs_proba_`` is ``True``) then returns the post-processed
+        output from those predictions.
 
         Args:
             X (pandas.DataFrame): Test samples.
@@ -125,18 +143,18 @@ def predict(self, X):
         Returns:
             numpy.ndarray: Predicted class label per sample.
         """
-        y_pred = (self.estimator_.predict(X) if not self.needs_proba_ else
-                  self.estimator_.predict_proba(X))
-        y_pred = pd.DataFrame(y_pred, index=X.index).squeeze('columns')
-        return self.postprocessor_.predict(y_pred)
+        y_score = (self.estimator_.predict(X) if not self.needs_proba_ else
+                   self.estimator_.predict_proba(X))
+        y_score = pd.DataFrame(y_score, index=X.index).squeeze('columns')
+        return self.postprocessor_.predict(y_score)
 
     @if_delegate_has_method('postprocessor_')
     def predict_proba(self, X):
         """Probability estimates.
 
         First, runs ``self.estimator_.predict()`` (or ``predict_proba()`` if
-        ``self.needs_proba_`` is ``True``) then returns the post-processed output
-        from those predictions.
+        ``self.needs_proba_`` is ``True``) then returns the post-processed
+        output from those predictions.
 
         The returned estimates for all classes are ordered by the label of
         classes.
@@ -149,18 +167,18 @@ def predict_proba(self, X):
             in the model, where classes are ordered as they are in
             ``self.classes_``.
         """
-        y_pred = (self.estimator_.predict(X) if not self.needs_proba_ else
-                  self.estimator_.predict_proba(X))
-        y_pred = pd.DataFrame(y_pred, index=X.index).squeeze('columns')
-        return self.postprocessor_.predict_proba(y_pred)
+        y_score = (self.estimator_.predict(X) if not self.needs_proba_ else
+                   self.estimator_.predict_proba(X))
+        y_score = pd.DataFrame(y_score, index=X.index).squeeze('columns')
+        return self.postprocessor_.predict_proba(y_score)
 
     @if_delegate_has_method('postprocessor_')
     def predict_log_proba(self, X):
         """Log of probability estimates.
 
         First, runs ``self.estimator_.predict()`` (or ``predict_proba()`` if
-        ``self.needs_proba_`` is ``True``) then returns the post-processed output
-        from those predictions.
+        ``self.needs_proba_`` is ``True``) then returns the post-processed
+        output from those predictions.
 
         The returned estimates for all classes are ordered by the label of
         classes.
@@ -173,10 +191,10 @@ def predict_log_proba(self, X):
             the model, where classes are ordered as they are in
             ``self.classes_``.
         """
-        y_pred = (self.estimator_.predict(X) if not self.needs_proba_ else
-                  self.estimator_.predict_proba(X))
-        y_pred = pd.DataFrame(y_pred, index=X.index).squeeze('columns')
-        return self.postprocessor_.predict_log_proba(y_pred)
+        y_score = (self.estimator_.predict(X) if not self.needs_proba_ else
+                   self.estimator_.predict_proba(X))
+        y_score = pd.DataFrame(y_score, index=X.index).squeeze('columns')
+        return self.postprocessor_.predict_log_proba(y_score)
 
     @if_delegate_has_method('postprocessor_')
     def score(self, X, y, sample_weight=None):
@@ -195,10 +213,11 @@ def score(self, X, y, sample_weight=None):
         Returns:
             float: Score value.
         """
-        y_pred = (self.estimator_.predict(X) if not self.needs_proba_ else
-                  self.estimator_.predict_proba(X))
-        y_pred = pd.DataFrame(y_pred, index=X.index).squeeze('columns')
-        return self.postprocessor_.score(y_pred, y, sample_weight=sample_weight)
+        y_score = (self.estimator_.predict(X) if not self.needs_proba_ else
+                   self.estimator_.predict_proba(X))
+        y_score = pd.DataFrame(y_score, index=X.index).squeeze('columns')
+        return self.postprocessor_.score(y_score, y,
+                                         sample_weight=sample_weight)
 
 
 __all__ = [
diff --git a/aif360/sklearn/postprocessing/calibrated_equalized_odds.py b/aif360/sklearn/postprocessing/calibrated_equalized_odds.py
index 94f8d5ef..0b3bdf01 100644
--- a/aif360/sklearn/postprocessing/calibrated_equalized_odds.py
+++ b/aif360/sklearn/postprocessing/calibrated_equalized_odds.py
@@ -16,15 +16,18 @@ class CalibratedEqualizedOdds(BaseEstimator, ClassifierMixin):
     change output labels with an equalized odds objective [#pleiss17]_.
 
     Note:
-        This breaks the sckit-learn API by requiring fit params y_true, y_pred,
-        and pos_label and predict param y_pred. See :class:`PostProcessingMeta`
-        for a workaround.
+        A :class:`~sklearn.pipeline.Pipeline` expects a single estimation step
+        but this class requires an estimator's predictions as input. See
+        :class:`PostProcessingMeta` for a workaround.
+
+    See also:
+        :class:`PostProcessingMeta`
 
     References:
         .. [#pleiss17] `G. Pleiss, M. Raghavan, F. Wu, J. Kleinberg, and
            K. Q. Weinberger, "On Fairness and Calibration," Conference on Neural
            Information Processing Systems, 2017.
-           <https://arxiv.org/pdf/1709.02012.pdf>`_
+           <http://papers.nips.cc/paper/7151-on-fairness-and-calibration.pdf>`_
 
     Adapted from:
     https://github.com/gpleiss/equalized_odds_and_calibration/blob/master/calib_eq_odds.py
@@ -58,7 +61,7 @@ def __init__(self, prot_attr=None, cost_constraint='weighted',
                 generalized false negative rate ('fnr'), or a weighted
                 combination of both ('weighted').
             random_state (int or numpy.RandomState, optional): Seed of pseudo-
-                random number generator for shuffling data.
+                random number generator for sampling from the mix rates.
         """
         self.prot_attr = prot_attr
         self.cost_constraint = cost_constraint
@@ -80,27 +83,26 @@ def _weighted_cost(self, y_true, probas_pred, pos_label=1,
             raise ValueError("`cost_constraint` must be one of: 'fpr', 'fnr', "
                              "or 'weighted'")
 
-    def fit(self, y_pred, y_true, labels=None, pos_label=1, sample_weight=None):
+    def fit(self, X, y, labels=None, pos_label=1, sample_weight=None):
         """Compute the mixing rates required to satisfy the cost constraint.
 
         Args:
-            y_pred (array-like): Probability estimates of the targets as
-                returned by a ``predict_proba()`` call or equivalent.
-            y_true (pandas.Series): Ground-truth (correct) target values.
+            X (array-like): Probability estimates of the targets as returned by
+                a ``predict_proba()`` call or equivalent.
+            y (pandas.Series): Ground-truth (correct) target values.
             labels (list, optional): The ordered set of labels values. Must
-                match the order of columns in y_pred if provided. By default,
-                all labels in y_true are used in sorted order.
+                match the order of columns in X if provided. By default,
+                all labels in y are used in sorted order.
             pos_label (scalar, optional): The label of the positive class.
             sample_weight (array-like, optional): Sample weights.
 
         Returns:
             self
         """
-        y_pred, y_true, sample_weight = check_inputs(y_pred, y_true,
-                                                     sample_weight)
-        groups, self.prot_attr_ = check_groups(y_true, self.prot_attr,
+        X, y, sample_weight = check_inputs(X, y, sample_weight)
+        groups, self.prot_attr_ = check_groups(y, self.prot_attr,
                                                ensure_binary=True)
-        self.classes_ = labels if labels is not None else np.unique(y_true)
+        self.classes_ = labels if labels is not None else np.unique(y)
         self.groups_ = np.unique(groups)
         self.pos_label_ = pos_label
 
@@ -111,14 +113,13 @@ def fit(self, y_pred, y_true, labels=None, pos_label=1, sample_weight=None):
             raise ValueError('pos_label={} is not in the set of labels. The '
                     'valid values are:\n{}'.format(pos_label, self.classes_))
 
-        y_pred = y_pred[:, np.nonzero(self.classes_ == self.pos_label_)[0][0]]
+        X = X[:, np.nonzero(self.classes_ == self.pos_label_)[0][0]]
 
         # local function to return corresponding args for metric evaluation
         def _args(grp_idx, triv=False):
             idx = (groups == self.groups_[grp_idx])
-            pred = (np.full_like(y_pred, self.base_rates_[grp_idx]) if triv else
-                    y_pred)
-            return [y_true[idx], pred[idx], pos_label, sample_weight[idx]]
+            pred = np.full_like(X, self.base_rates_[grp_idx]) if triv else X
+            return [y[idx], pred[idx], pos_label, sample_weight[idx]]
 
         self.base_rates_ = [base_rate(*_args(i)) for i in range(2)]
 
@@ -131,12 +132,12 @@ def _args(grp_idx, triv=False):
 
         return self
 
-    def predict_proba(self, y_pred):
+    def predict_proba(self, X):
         """The returned estimates for all classes are ordered by the label of
         classes.
 
         Args:
-            y_pred (pandas.DataFrame): Probability estimates of the targets as
+            X (pandas.DataFrame): Probability estimates of the targets as
                 returned by a ``predict_proba()`` call or equivalent. Note: must
                 include protected attributes in the index.
 
@@ -148,47 +149,47 @@ def predict_proba(self, y_pred):
         check_is_fitted(self, 'mix_rates_')
         rng = check_random_state(self.random_state)
 
-        groups, _ = check_groups(y_pred, self.prot_attr_)
+        groups, _ = check_groups(X, self.prot_attr_)
         if not set(np.unique(groups)) <= set(self.groups_):
-            raise ValueError('The protected groups from y_pred:\n{}\ndo not '
+            raise ValueError('The protected groups from X:\n{}\ndo not '
                              'match those from the training set:\n{}'.format(
                                      np.unique(groups), self.groups_))
 
         pos_idx = np.nonzero(self.classes_ == self.pos_label_)[0][0]
-        y_pred = y_pred.iloc[:, pos_idx]
+        X = X.iloc[:, pos_idx]
 
-        yt = np.empty_like(y_pred)
+        yt = np.empty_like(X)
         for grp_idx in range(2):
             i = (groups == self.groups_[grp_idx])
             to_replace = (rng.rand(sum(i)) < self.mix_rates_[grp_idx])
-            new_preds = y_pred[i].copy()
+            new_preds = X[i].copy()
             new_preds[to_replace] = self.base_rates_[grp_idx]
             yt[i] = new_preds
 
         return np.c_[1 - yt, yt] if pos_idx == 1 else np.c_[yt, 1 - yt]
 
-    def predict(self, y_pred):
+    def predict(self, X):
         """Predict class labels for the given scores.
 
         Args:
-            y_pred (pandas.DataFrame): Probability estimates of the targets as
+            X (pandas.DataFrame): Probability estimates of the targets as
                 returned by a ``predict_proba()`` call or equivalent. Note: must
                 include protected attributes in the index.
 
         Returns:
             numpy.ndarray: Predicted class label per sample.
         """
-        scores = self.predict_proba(y_pred)
+        scores = self.predict_proba(X)
         return self.classes_[scores.argmax(axis=1)]
 
-    def score(self, y_pred, y_true, sample_weight=None):
+    def score(self, X, y, sample_weight=None):
         """Score the predictions according to the cost constraint specified.
 
         Args:
-            y_pred (pandas.DataFrame): Probability estimates of the targets as
+            X (pandas.DataFrame): Probability estimates of the targets as
                 returned by a ``predict_proba()`` call or equivalent. Note: must
                 include protected attributes in the index.
-            y_true (array-like): Ground-truth (correct) target values.
+            y (array-like): Ground-truth (correct) target values.
             sample_weight (array-like, optional): Sample weights.
 
         Returns:
@@ -198,8 +199,8 @@ def score(self, y_pred, y_true, sample_weight=None):
         """
         check_is_fitted(self, ['classes_', 'pos_label_'])
         pos_idx = np.nonzero(self.classes_ == self.pos_label_)[0][0]
-        probas_pred = self.predict_proba(y_pred)[:, pos_idx]
+        probas_pred = self.predict_proba(X)[:, pos_idx]
 
-        return abs(difference(self._weighted_cost, y_true, probas_pred,
+        return abs(difference(self._weighted_cost, y, probas_pred,
                 prot_attr=self.prot_attr_, priv_group=self.groups_[1],
                 pos_label=self.pos_label_, sample_weight=sample_weight))
diff --git a/aif360/sklearn/preprocessing/reweighing.py b/aif360/sklearn/preprocessing/reweighing.py
index d4f782b0..f29653ae 100644
--- a/aif360/sklearn/preprocessing/reweighing.py
+++ b/aif360/sklearn/preprocessing/reweighing.py
@@ -17,6 +17,9 @@ class Reweighing(BaseEstimator):
         This breaks the scikit-learn API by returning new sample weights from
         ``fit_transform()``. See :class:`ReweighingMeta` for a workaround.
 
+    See also:
+        :class:`ReweighingMeta`
+
     References:
         .. [#kamiran12] `F. Kamiran and T. Calders,  "Data Preprocessing
            Techniques for Classification without Discrimination," Knowledge and
diff --git a/aif360/sklearn/utils.py b/aif360/sklearn/utils.py
index 13ad3820..604b1202 100644
--- a/aif360/sklearn/utils.py
+++ b/aif360/sklearn/utils.py
@@ -14,9 +14,20 @@ def check_inputs(X, y, sample_weight=None, ensure_2d=True):
     Args:
         X (array-like): Input data.
         y (array-like, shape = (n_samples,)): Target values.
-        sample_weight (array-like): Sample weights.
+        sample_weight (array-like, optional): Sample weights.
         ensure_2d (bool, optional): Whether to raise a ValueError if X is not
             2D.
+
+    Returns:
+        tuple:
+
+            * **X** (`array-like`) -- Validated X. Unchanged.
+
+            * **y** (`array-like`) -- Validated y. Possibly converted to 1D if
+              not a :class:`pandas.Series`.
+            * **sample_weight** (`array-like`) -- Validated sample_weight. If no
+              sample_weight is provided, returns a consistent-length array of
+              ones.
     """
     if ensure_2d and X.ndim != 2:
         raise ValueError("Expected X to be 2D, got ndim == {} instead.".format(
@@ -39,8 +50,8 @@ def check_groups(arr, prot_attr, ensure_binary=False):
     provided protected attributes are in the index.
 
     Args:
-        arr (`pandas.Series` or `pandas.DataFrame`): A Pandas object containing
-            protected attribute information in the index.
+        arr (:class:`pandas.Series` or :class:`pandas.DataFrame`): A Pandas
+            object containing protected attribute information in the index.
         prot_attr (single label or list-like): Protected attribute(s). If
             ``None``, all protected attributes in arr are used.
         ensure_binary (bool): Raise an error if the resultant groups are not
@@ -49,11 +60,11 @@ def check_groups(arr, prot_attr, ensure_binary=False):
     Returns:
         tuple:
 
-            * **groups** (`pandas.Index`) -- Label (or tuple of labels) of
-              protected attribute for each sample in arr.
-            * **prot_attr** (list-like) -- Modified input. If input is a single
-              label, returns single-item list. If input is ``None`` returns list
-              of all protected attributes.
+            * **groups** (:class:`pandas.Index`) -- Label (or tuple of labels)
+              of protected attribute for each sample in arr.
+            * **prot_attr** (`list-like`) -- Modified input. If input is a
+              single label, returns single-item list. If input is ``None``
+              returns list of all protected attributes.
     """
     if not hasattr(arr, 'index'):
         raise TypeError(
diff --git a/examples/sklearn/demo_new_features.ipynb b/examples/sklearn/demo_new_features.ipynb
index 026bf790..34a6c087 100644
--- a/examples/sklearn/demo_new_features.ipynb
+++ b/examples/sklearn/demo_new_features.ipynb
@@ -18,15 +18,20 @@
     "import numpy as np\n",
     "import pandas as pd\n",
     "import tensorflow as tf\n",
+    "tf.logging.set_verbosity(tf.logging.ERROR)\n",
+    "\n",
+    "from sklearn.compose import make_column_transformer\n",
     "from sklearn.linear_model import LogisticRegression\n",
     "from sklearn.metrics import accuracy_score\n",
     "from sklearn.model_selection import GridSearchCV, train_test_split\n",
+    "from sklearn.preprocessing import OneHotEncoder\n",
     "\n",
     "from aif360.sklearn.preprocessing import ReweighingMeta\n",
     "from aif360.sklearn.inprocessing import AdversarialDebiasing\n",
     "from aif360.sklearn.postprocessing import CalibratedEqualizedOdds, PostProcessingMeta\n",
     "from aif360.sklearn.datasets import fetch_adult\n",
-    "from aif360.sklearn.metrics import disparate_impact_ratio, average_odds_error, generalized_fpr, generalized_fnr"
+    "from aif360.sklearn.metrics import disparate_impact_ratio, average_odds_error, generalized_fpr\n",
+    "from aif360.sklearn.metrics import generalized_fnr, difference"
    ]
   },
   {
@@ -52,188 +57,8 @@
    "outputs": [
     {
      "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th>age</th>\n",
-       "      <th>workclass</th>\n",
-       "      <th>education</th>\n",
-       "      <th>education-num</th>\n",
-       "      <th>marital-status</th>\n",
-       "      <th>occupation</th>\n",
-       "      <th>relationship</th>\n",
-       "      <th>race</th>\n",
-       "      <th>sex</th>\n",
-       "      <th>capital-gain</th>\n",
-       "      <th>capital-loss</th>\n",
-       "      <th>hours-per-week</th>\n",
-       "      <th>native-country</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th></th>\n",
-       "      <th>race</th>\n",
-       "      <th>sex</th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <th>Non-white</th>\n",
-       "      <th>Male</th>\n",
-       "      <td>25.0</td>\n",
-       "      <td>Private</td>\n",
-       "      <td>11th</td>\n",
-       "      <td>7.0</td>\n",
-       "      <td>Never-married</td>\n",
-       "      <td>Machine-op-inspct</td>\n",
-       "      <td>Own-child</td>\n",
-       "      <td>Non-white</td>\n",
-       "      <td>Male</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>40.0</td>\n",
-       "      <td>United-States</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <th>White</th>\n",
-       "      <th>Male</th>\n",
-       "      <td>38.0</td>\n",
-       "      <td>Private</td>\n",
-       "      <td>HS-grad</td>\n",
-       "      <td>9.0</td>\n",
-       "      <td>Married-civ-spouse</td>\n",
-       "      <td>Farming-fishing</td>\n",
-       "      <td>Husband</td>\n",
-       "      <td>White</td>\n",
-       "      <td>Male</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>50.0</td>\n",
-       "      <td>United-States</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <th>White</th>\n",
-       "      <th>Male</th>\n",
-       "      <td>28.0</td>\n",
-       "      <td>Local-gov</td>\n",
-       "      <td>Assoc-acdm</td>\n",
-       "      <td>12.0</td>\n",
-       "      <td>Married-civ-spouse</td>\n",
-       "      <td>Protective-serv</td>\n",
-       "      <td>Husband</td>\n",
-       "      <td>White</td>\n",
-       "      <td>Male</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>40.0</td>\n",
-       "      <td>United-States</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <th>Non-white</th>\n",
-       "      <th>Male</th>\n",
-       "      <td>44.0</td>\n",
-       "      <td>Private</td>\n",
-       "      <td>Some-college</td>\n",
-       "      <td>10.0</td>\n",
-       "      <td>Married-civ-spouse</td>\n",
-       "      <td>Machine-op-inspct</td>\n",
-       "      <td>Husband</td>\n",
-       "      <td>Non-white</td>\n",
-       "      <td>Male</td>\n",
-       "      <td>7688.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>40.0</td>\n",
-       "      <td>United-States</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <th>White</th>\n",
-       "      <th>Male</th>\n",
-       "      <td>34.0</td>\n",
-       "      <td>Private</td>\n",
-       "      <td>10th</td>\n",
-       "      <td>6.0</td>\n",
-       "      <td>Never-married</td>\n",
-       "      <td>Other-service</td>\n",
-       "      <td>Not-in-family</td>\n",
-       "      <td>White</td>\n",
-       "      <td>Male</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>30.0</td>\n",
-       "      <td>United-States</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                   age  workclass     education  education-num  \\\n",
-       "  race      sex                                                  \n",
-       "0 Non-white Male  25.0    Private          11th            7.0   \n",
-       "1 White     Male  38.0    Private       HS-grad            9.0   \n",
-       "2 White     Male  28.0  Local-gov    Assoc-acdm           12.0   \n",
-       "3 Non-white Male  44.0    Private  Some-college           10.0   \n",
-       "5 White     Male  34.0    Private          10th            6.0   \n",
-       "\n",
-       "                      marital-status         occupation   relationship  \\\n",
-       "  race      sex                                                          \n",
-       "0 Non-white Male       Never-married  Machine-op-inspct      Own-child   \n",
-       "1 White     Male  Married-civ-spouse    Farming-fishing        Husband   \n",
-       "2 White     Male  Married-civ-spouse    Protective-serv        Husband   \n",
-       "3 Non-white Male  Married-civ-spouse  Machine-op-inspct        Husband   \n",
-       "5 White     Male       Never-married      Other-service  Not-in-family   \n",
-       "\n",
-       "                       race   sex  capital-gain  capital-loss  hours-per-week  \\\n",
-       "  race      sex                                                                 \n",
-       "0 Non-white Male  Non-white  Male           0.0           0.0            40.0   \n",
-       "1 White     Male      White  Male           0.0           0.0            50.0   \n",
-       "2 White     Male      White  Male           0.0           0.0            40.0   \n",
-       "3 Non-white Male  Non-white  Male        7688.0           0.0            40.0   \n",
-       "5 White     Male      White  Male           0.0           0.0            30.0   \n",
-       "\n",
-       "                 native-country  \n",
-       "  race      sex                  \n",
-       "0 Non-white Male  United-States  \n",
-       "1 White     Male  United-States  \n",
-       "2 White     Male  United-States  \n",
-       "3 Non-white Male  United-States  \n",
-       "5 White     Male  United-States  "
-      ]
+      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th></th>\n      <th></th>\n      <th>age</th>\n      <th>workclass</th>\n      <th>education</th>\n      <th>education-num</th>\n      <th>marital-status</th>\n      <th>occupation</th>\n      <th>relationship</th>\n      <th>race</th>\n      <th>sex</th>\n      <th>capital-gain</th>\n      <th>capital-loss</th>\n      <th>hours-per-week</th>\n      <th>native-country</th>\n    </tr>\n    <tr>\n      <th></th>\n      <th>race</th>\n      <th>sex</th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <th>Non-white</th>\n      <th>Male</th>\n      <td>25.0</td>\n      <td>Private</td>\n      <td>11th</td>\n      <td>7.0</td>\n      <td>Never-married</td>\n      <td>Machine-op-inspct</td>\n      <td>Own-child</td>\n      <td>Non-white</td>\n      <td>Male</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>40.0</td>\n      <td>United-States</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <th>White</th>\n      <th>Male</th>\n      <td>38.0</td>\n      <td>Private</td>\n      <td>HS-grad</td>\n      <td>9.0</td>\n      <td>Married-civ-spouse</td>\n      <td>Farming-fishing</td>\n      <td>Husband</td>\n      <td>White</td>\n      <td>Male</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>50.0</td>\n      <td>United-States</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <th>White</th>\n      <th>Male</th>\n      <td>28.0</td>\n      <td>Local-gov</td>\n      <td>Assoc-acdm</td>\n      <td>12.0</td>\n      <td>Married-civ-spouse</td>\n      <td>Protective-serv</td>\n      <td>Husband</td>\n      <td>White</td>\n      <td>Male</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>40.0</td>\n      <td>United-States</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <th>Non-white</th>\n      <th>Male</th>\n      <td>44.0</td>\n      <td>Private</td>\n      <td>Some-college</td>\n      <td>10.0</td>\n      <td>Married-civ-spouse</td>\n      <td>Machine-op-inspct</td>\n      <td>Husband</td>\n      <td>Non-white</td>\n      <td>Male</td>\n      <td>7688.0</td>\n      <td>0.0</td>\n      <td>40.0</td>\n      <td>United-States</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <th>White</th>\n      <th>Male</th>\n      <td>34.0</td>\n      <td>Private</td>\n      <td>10th</td>\n      <td>6.0</td>\n      <td>Never-married</td>\n      <td>Other-service</td>\n      <td>Not-in-family</td>\n      <td>White</td>\n      <td>Male</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>30.0</td>\n      <td>United-States</td>\n    </tr>\n  </tbody>\n</table>\n</div>",
+      "text/plain": "                   age  workclass     education  education-num  \\\n  race      sex                                                  \n0 Non-white Male  25.0    Private          11th            7.0   \n1 White     Male  38.0    Private       HS-grad            9.0   \n2 White     Male  28.0  Local-gov    Assoc-acdm           12.0   \n3 Non-white Male  44.0    Private  Some-college           10.0   \n5 White     Male  34.0    Private          10th            6.0   \n\n                      marital-status         occupation   relationship  \\\n  race      sex                                                          \n0 Non-white Male       Never-married  Machine-op-inspct      Own-child   \n1 White     Male  Married-civ-spouse    Farming-fishing        Husband   \n2 White     Male  Married-civ-spouse    Protective-serv        Husband   \n3 Non-white Male  Married-civ-spouse  Machine-op-inspct        Husband   \n5 White     Male       Never-married      Other-service  Not-in-family   \n\n                       race   sex  capital-gain  capital-loss  hours-per-week  \\\n  race      sex                                                                 \n0 Non-white Male  Non-white  Male           0.0           0.0            40.0   \n1 White     Male      White  Male           0.0           0.0            50.0   \n2 White     Male      White  Male           0.0           0.0            40.0   \n3 Non-white Male  Non-white  Male        7688.0           0.0            40.0   \n5 White     Male      White  Male           0.0           0.0            30.0   \n\n                 native-country  \n  race      sex                  \n0 Non-white Male  United-States  \n1 White     Male  United-States  \n2 White     Male  United-States  \n3 Non-white Male  United-States  \n5 White     Male  United-States  "
      },
      "execution_count": 2,
      "metadata": {},
@@ -249,150 +74,81 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We can also easily load a version of the dataset which only contains numeric or binary columns and split it with scikit-learn:"
+    "We can then map the protected attributes to integers,"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 3,
    "metadata": {},
+   "outputs": [],
+   "source": [
+    "X.index = pd.MultiIndex.from_arrays(X.index.codes, names=X.index.names)\n",
+    "y.index = pd.MultiIndex.from_arrays(y.index.codes, names=y.index.names)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "and the target classes to 0/1,"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "y = pd.Series(y.factorize(sort=True)[0], index=y.index)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "split the dataset,"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "(X_train, X_test,\n",
+    " y_train, y_test) = train_test_split(X, y, train_size=0.7, random_state=1234567)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "and finally, one-hot encode the categorical features:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
    "outputs": [
     {
      "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th>age</th>\n",
-       "      <th>education-num</th>\n",
-       "      <th>race</th>\n",
-       "      <th>sex</th>\n",
-       "      <th>capital-gain</th>\n",
-       "      <th>capital-loss</th>\n",
-       "      <th>hours-per-week</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th></th>\n",
-       "      <th>race</th>\n",
-       "      <th>sex</th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <th>0</th>\n",
-       "      <th>1</th>\n",
-       "      <td>25.0</td>\n",
-       "      <td>7.0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>40.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <th>1</th>\n",
-       "      <th>1</th>\n",
-       "      <td>38.0</td>\n",
-       "      <td>9.0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>50.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <th>1</th>\n",
-       "      <th>1</th>\n",
-       "      <td>28.0</td>\n",
-       "      <td>12.0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>40.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <th>0</th>\n",
-       "      <th>1</th>\n",
-       "      <td>44.0</td>\n",
-       "      <td>10.0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>7688.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>40.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <th>1</th>\n",
-       "      <th>0</th>\n",
-       "      <td>18.0</td>\n",
-       "      <td>10.0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>30.0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "             age  education-num  race  sex  capital-gain  capital-loss  \\\n",
-       "  race sex                                                               \n",
-       "0 0    1    25.0            7.0     0    1           0.0           0.0   \n",
-       "1 1    1    38.0            9.0     1    1           0.0           0.0   \n",
-       "2 1    1    28.0           12.0     1    1           0.0           0.0   \n",
-       "3 0    1    44.0           10.0     0    1        7688.0           0.0   \n",
-       "4 1    0    18.0           10.0     1    0           0.0           0.0   \n",
-       "\n",
-       "            hours-per-week  \n",
-       "  race sex                  \n",
-       "0 0    1              40.0  \n",
-       "1 1    1              50.0  \n",
-       "2 1    1              40.0  \n",
-       "3 0    1              40.0  \n",
-       "4 1    0              30.0  "
-      ]
+      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th></th>\n      <th></th>\n      <th>0</th>\n      <th>1</th>\n      <th>2</th>\n      <th>3</th>\n      <th>4</th>\n      <th>5</th>\n      <th>6</th>\n      <th>7</th>\n      <th>8</th>\n      <th>9</th>\n      <th>...</th>\n      <th>90</th>\n      <th>91</th>\n      <th>92</th>\n      <th>93</th>\n      <th>94</th>\n      <th>95</th>\n      <th>96</th>\n      <th>97</th>\n      <th>98</th>\n      <th>99</th>\n    </tr>\n    <tr>\n      <th></th>\n      <th>race</th>\n      <th>sex</th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>30149</th>\n      <th>1</th>\n      <th>1</th>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>1.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>...</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>1.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>58.0</td>\n      <td>11.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>42.0</td>\n    </tr>\n    <tr>\n      <th>12028</th>\n      <th>1</th>\n      <th>0</th>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>1.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>...</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>51.0</td>\n      <td>12.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>30.0</td>\n    </tr>\n    <tr>\n      <th>36374</th>\n      <th>1</th>\n      <th>1</th>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>1.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>...</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>1.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>26.0</td>\n      <td>14.0</td>\n      <td>0.0</td>\n      <td>1887.0</td>\n      <td>40.0</td>\n    </tr>\n    <tr>\n      <th>8055</th>\n      <th>1</th>\n      <th>1</th>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>1.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>...</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>44.0</td>\n      <td>3.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>40.0</td>\n    </tr>\n    <tr>\n      <th>38108</th>\n      <th>1</th>\n      <th>1</th>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>1.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>1.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>...</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>1.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>33.0</td>\n      <td>6.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>40.0</td>\n    </tr>\n  </tbody>\n</table>\n<p>5 rows × 100 columns</p>\n</div>",
+      "text/plain": "                 0    1    2    3    4    5    6    7    8    9   ...   90  \\\n      race sex                                                    ...        \n30149 1    1    0.0  0.0  0.0  0.0  1.0  0.0  0.0  0.0  0.0  0.0  ...  0.0   \n12028 1    0    0.0  0.0  0.0  0.0  1.0  0.0  0.0  0.0  0.0  0.0  ...  0.0   \n36374 1    1    0.0  0.0  1.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0   \n8055  1    1    0.0  0.0  1.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0   \n38108 1    1    0.0  0.0  1.0  0.0  0.0  0.0  0.0  1.0  0.0  0.0  ...  0.0   \n\n                 91   92   93   94    95    96   97      98    99  \n      race sex                                                     \n30149 1    1    0.0  1.0  0.0  0.0  58.0  11.0  0.0     0.0  42.0  \n12028 1    0    0.0  0.0  0.0  0.0  51.0  12.0  0.0     0.0  30.0  \n36374 1    1    0.0  1.0  0.0  0.0  26.0  14.0  0.0  1887.0  40.0  \n8055  1    1    0.0  0.0  0.0  0.0  44.0   3.0  0.0     0.0  40.0  \n38108 1    1    0.0  1.0  0.0  0.0  33.0   6.0  0.0     0.0  40.0  \n\n[5 rows x 100 columns]"
      },
-     "execution_count": 3,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "X, y, sample_weight = fetch_adult(numeric_only=True)\n",
-    "(X_train, X_test,\n",
-    " y_train, y_test) = train_test_split(X, y, train_size=0.7, shuffle=False)\n",
+    "ohe = make_column_transformer(\n",
+    "        (OneHotEncoder(sparse=False), X_train.dtypes == 'category'),\n",
+    "        remainder='passthrough')\n",
+    "X_train  = pd.DataFrame(ohe.fit_transform(X_train), index=X_train.index)\n",
+    "X_test = pd.DataFrame(ohe.transform(X_test), index=X_test.index)\n",
+    "\n",
     "X_train.head()"
    ]
   },
@@ -400,27 +156,47 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "the protected attribute information is replicated in the labels:"
+    "Note: the column names are lost in this transformation. The same encoding can be done with Pandas, but this cannot be combined with other preprocessing in a Pipeline."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th></th>\n      <th></th>\n      <th>age</th>\n      <th>education-num</th>\n      <th>capital-gain</th>\n      <th>capital-loss</th>\n      <th>hours-per-week</th>\n      <th>workclass_Federal-gov</th>\n      <th>workclass_Local-gov</th>\n      <th>workclass_Private</th>\n      <th>workclass_Self-emp-inc</th>\n      <th>workclass_Self-emp-not-inc</th>\n      <th>...</th>\n      <th>native-country_Portugal</th>\n      <th>native-country_Puerto-Rico</th>\n      <th>native-country_Scotland</th>\n      <th>native-country_South</th>\n      <th>native-country_Taiwan</th>\n      <th>native-country_Thailand</th>\n      <th>native-country_Trinadad&amp;Tobago</th>\n      <th>native-country_United-States</th>\n      <th>native-country_Vietnam</th>\n      <th>native-country_Yugoslavia</th>\n    </tr>\n    <tr>\n      <th></th>\n      <th>race</th>\n      <th>sex</th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <th>0</th>\n      <th>1</th>\n      <td>25.0</td>\n      <td>7.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>40.0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>...</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <th>1</th>\n      <th>1</th>\n      <td>38.0</td>\n      <td>9.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>50.0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>...</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <th>1</th>\n      <th>1</th>\n      <td>28.0</td>\n      <td>12.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>40.0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>...</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <th>0</th>\n      <th>1</th>\n      <td>44.0</td>\n      <td>10.0</td>\n      <td>7688.0</td>\n      <td>0.0</td>\n      <td>40.0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>...</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <th>1</th>\n      <th>1</th>\n      <td>34.0</td>\n      <td>6.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>30.0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>...</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n  </tbody>\n</table>\n<p>5 rows × 100 columns</p>\n</div>",
+      "text/plain": "             age  education-num  capital-gain  capital-loss  hours-per-week  \\\n  race sex                                                                    \n0 0    1    25.0            7.0           0.0           0.0            40.0   \n1 1    1    38.0            9.0           0.0           0.0            50.0   \n2 1    1    28.0           12.0           0.0           0.0            40.0   \n3 0    1    44.0           10.0        7688.0           0.0            40.0   \n5 1    1    34.0            6.0           0.0           0.0            30.0   \n\n            workclass_Federal-gov  workclass_Local-gov  workclass_Private  \\\n  race sex                                                                  \n0 0    1                        0                    0                  1   \n1 1    1                        0                    0                  1   \n2 1    1                        0                    1                  0   \n3 0    1                        0                    0                  1   \n5 1    1                        0                    0                  1   \n\n            workclass_Self-emp-inc  workclass_Self-emp-not-inc  ...  \\\n  race sex                                                      ...   \n0 0    1                         0                           0  ...   \n1 1    1                         0                           0  ...   \n2 1    1                         0                           0  ...   \n3 0    1                         0                           0  ...   \n5 1    1                         0                           0  ...   \n\n            native-country_Portugal  native-country_Puerto-Rico  \\\n  race sex                                                        \n0 0    1                          0                           0   \n1 1    1                          0                           0   \n2 1    1                          0                           0   \n3 0    1                          0                           0   \n5 1    1                          0                           0   \n\n            native-country_Scotland  native-country_South  \\\n  race sex                                                  \n0 0    1                          0                     0   \n1 1    1                          0                     0   \n2 1    1                          0                     0   \n3 0    1                          0                     0   \n5 1    1                          0                     0   \n\n            native-country_Taiwan  native-country_Thailand  \\\n  race sex                                                   \n0 0    1                        0                        0   \n1 1    1                        0                        0   \n2 1    1                        0                        0   \n3 0    1                        0                        0   \n5 1    1                        0                        0   \n\n            native-country_Trinadad&Tobago  native-country_United-States  \\\n  race sex                                                                 \n0 0    1                                 0                             1   \n1 1    1                                 0                             1   \n2 1    1                                 0                             1   \n3 0    1                                 0                             1   \n5 1    1                                 0                             1   \n\n            native-country_Vietnam  native-country_Yugoslavia  \n  race sex                                                     \n0 0    1                         0                          0  \n1 1    1                         0                          0  \n2 1    1                         0                          0  \n3 0    1                         0                          0  \n5 1    1                         0                          0  \n\n[5 rows x 100 columns]"
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# there is one unused category ('Never-worked') that was dropped during dropna\n",
+    "X.workclass.cat.remove_unused_categories(inplace=True)\n",
+    "pd.get_dummies(X).head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The protected attribute information is also replicated in the labels:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
      "data": {
-      "text/plain": [
-       "   race  sex\n",
-       "0  0     1      0\n",
-       "1  1     1      0\n",
-       "2  1     1      1\n",
-       "3  0     1      1\n",
-       "4  1     0      0\n",
-       "Name: annual-income, dtype: int64"
-      ]
+      "text/plain": "       race  sex\n30149  1     1      0\n12028  1     0      1\n36374  1     1      1\n8055   1     1      0\n38108  1     1      0\ndtype: int64"
      },
-     "execution_count": 4,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -445,22 +221,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
      "data": {
-      "text/plain": [
-       "0.823858595509452"
-      ]
+      "text/plain": "0.8375469890174688"
      },
-     "execution_count": 5,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "y_pred = LogisticRegression(solver='liblinear').fit(X_train, y_train).predict(X_test)\n",
+    "y_pred = LogisticRegression(solver='lbfgs').fit(X_train, y_train).predict(X_test)\n",
     "accuracy_score(y_test, y_pred)"
    ]
   },
@@ -473,16 +247,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
      "data": {
-      "text/plain": [
-       "0.19826239080897468"
-      ]
+      "text/plain": "0.2905425926727236"
      },
-     "execution_count": 6,
+     "execution_count": 10,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -499,22 +271,19 @@
     "\n",
     "`average_odds_error()` computes the (unweighted) average of the absolute values of the true positive rate (TPR) difference and false positive rate (FPR) difference, i.e.:\n",
     "\n",
-    "$\\tfrac{1}{2}\\left(|FPR_{D = \\text{unprivileged}} - FPR_{D = \\text{privileged}}|\n",
-    "           + |TPR_{D = \\text{unprivileged}} - TPR_{D = \\text{privileged}}|\\right)$"
+    "$$ \\tfrac{1}{2}\\left(|FPR_{D = \\text{unprivileged}} - FPR_{D = \\text{privileged}}| + |TPR_{D = \\text{unprivileged}} - TPR_{D = \\text{privileged}}|\\right) $$"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
      "data": {
-      "text/plain": [
-       "0.12427040384779571"
-      ]
+      "text/plain": "0.09372170954260936"
      },
-     "execution_count": 7,
+     "execution_count": 11,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -539,22 +308,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {
-    "scrolled": false
-   },
+   "execution_count": 12,
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
-     "text": [
-      "0.8147819559134648\n",
-      "{'estimator__C': 10, 'reweigher__prot_attr': 'sex'}\n"
-     ]
+     "text": "0.8279649148669566\n{'estimator__C': 10, 'reweigher__prot_attr': 'sex'}\n"
     }
    ],
    "source": [
-    "rew = ReweighingMeta(estimator=LogisticRegression(solver='liblinear'))\n",
+    "rew = ReweighingMeta(estimator=LogisticRegression(solver='lbfgs'))\n",
     "\n",
     "params = {'estimator__C': [1, 10], 'reweigher__prot_attr': ['sex']}\n",
     "\n",
@@ -566,16 +330,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
      "data": {
-      "text/plain": [
-       "0.639237550613212"
-      ]
+      "text/plain": "0.5676803237673037"
      },
-     "execution_count": 9,
+     "execution_count": 13,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -593,47 +355,34 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "WARNING:tensorflow:From /anaconda/envs/aif360/lib/python3.5/site-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.\n",
-      "Instructions for updating:\n",
-      "Colocations handled automatically by placer.\n"
-     ]
-    },
     {
      "data": {
-      "text/plain": [
-       "0.8218794786050638"
-      ]
+      "text/plain": "0.8399056534237488"
      },
-     "execution_count": 10,
+     "execution_count": 14,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "adv_deb = AdversarialDebiasing(prot_attr='sex')\n",
+    "adv_deb = AdversarialDebiasing(prot_attr='sex', random_state=1234567)\n",
     "adv_deb.fit(X_train, y_train)\n",
     "adv_deb.score(X_test, y_test)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [
     {
      "data": {
-      "text/plain": [
-       "0.022611763594614448"
-      ]
+      "text/plain": "0.060623189820735834"
      },
-     "execution_count": 11,
+     "execution_count": 15,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -651,7 +400,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -669,24 +418,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
      "data": {
-      "text/plain": [
-       "0.7676926226711254"
-      ]
+      "text/plain": "0.8163190093609494"
      },
-     "execution_count": 13,
+     "execution_count": 17,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "cal_eq_odds = CalibratedEqualizedOdds('sex', cost_constraint='fnr')\n",
-    "log_reg = LogisticRegression(solver='liblinear')\n",
-    "postproc = PostProcessingMeta(estimator=log_reg, postprocessor=cal_eq_odds)\n",
+    "cal_eq_odds = CalibratedEqualizedOdds('sex', cost_constraint='fnr', random_state=1234567)\n",
+    "log_reg = LogisticRegression(solver='lbfgs')\n",
+    "postproc = PostProcessingMeta(estimator=log_reg, postprocessor=cal_eq_odds, random_state=1234567)\n",
     "\n",
     "postproc.fit(X_train, y_train)\n",
     "accuracy_score(y_test, postproc.predict(X_test))"
@@ -694,15 +441,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [
     {
      "data": {
-      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfUAAAEKCAYAAAALjMzdAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzs3Xdck1f7P/DPCXsLiGwRgRDCcFEUR92K/VkUseJotY66H63Vjm+X1qq1j9papFq11Yrax1W1rmprK9hqawVF2UslArIEwoaEnN8fSWyAAEEJCXjer1dekHvlusM4Oec+93URSikYhmEYhun8OJoOgGEYhmGY9sEadYZhGIbpIlijzjAMwzBdBGvUGYZhGKaLYI06wzAMw3QRrFFnGIZhmC5CrY06ISSIEJJKCMkghLynZH1PQsgVQshtQshdQshLCuv+T7ZfKiFkvDrjZBiGYZiugKjrPnVCiA6ANABjAWQDuAlgBqU0SWGbPQBuU0p3EUL4AC5QSnvJvv8fgAAADgAuA+BSSuvVEizDMAzDdAHq7KkHAMiglN6jlNYBOAJgUqNtKABz2fcWAHJl308CcIRSWkspvQ8gQ3Y8hmEYhmGaoavGYzsCeKjwPBvAwEbbrAPwCyHkPwBMAIxR2PfvRvs6Nn4BQshCAAsBwMTEZACPx2uXwDUtIQEwNgZ699Z0JAzTUGxsbBGl1EbTcTAMo5w6G3VVzADwPaV0GyEkEMBBQoiPqjtTSvcA2AMA/v7+NCYmRk1hdhyJRNqgv/EG8Pnnmo6GYRoihGRpOgaGYZqnzkY9B4CzwnMn2TJF8wEEAQCl9C9CiCGA7iru2yUVFgK1tUDPnpqOhGEYhuls1HlN/SYAD0KIKyFEH8B0AGcabSMAMBoACCFeAAwBFMq2m04IMSCEuALwAPCPGmPVGlmyfhBr1BmGYZi2UltPnVIqJoQsB3AJgA6AfZTSRELIegAxlNIzAFYD2EsIWQXppLnXqXQ6fiIh5BiAJABiAMuel5nvAoH0q4uLZuNgGIZhOh+1XlOnlF4AcKHRso8Vvk8CMKSZfTcC2KjO+LSRvFFnPXWmK4mNje2hq6v7LQAfsKRXDPO0JAASxGLxggEDBhQo20DTE+WYRrKyADMzwMJC05EwTPvR1dX91s7OzsvGxqaEw+GoJzkGw3RxEomEFBYW8vPy8r4FEKxsG/aJWcsIBNKhd0I0HQnDtCsfGxubMtagM8zT43A41MbGRgjpiJfybTowHkYFAgEbeme6JA5r0Bnm2cn+jpptu1mjrmVYo84wDMM8Ldaoa5HKSqCoiDXqDMMwzNNhjboWeShLqstuZ2MY9Th48GA3QsiA27dvG8qXpaam6nt4eHgDwLlz58xGjhzp/qyvExoa2mv//v2WABAWFuYSGxtrCADGxsb9nuW4586dM/v1119N2rqfo6Oj76NHj1SaGB0eHm49e/bsdutaDB8+3L2oqEgHADZs2NCjd+/e3sHBwa6HDx+2eP/99+3a63XkJBIJBg0axC0uLuYAgI6OzgAej8eXP1JTU/Xb+zXlnva9y83N1R02bJhHe8TAZr9rEXY7G8Oo15EjR6z69+9fERkZadWvX7/c1vd4dkePHm1Tal2RSAQ9PT2l637//XczU1PT+rFjx1a2S3AdIDo6OkP+/XfffWdz+fLlNDc3N5FskVDV47T0vig6duyYhbe3d7WVlZUEAAwMDCQpKSlJre2nSQ4ODmJbW1vRL7/8YjJu3Lhn+tmyRl2LsGxyzPNg3jw4JyTAuD2P6eODqn37GhSQakIoFHJu3rxpevny5dTg4GCPL7/8UuVGXSwWY+nSpU5XrlyxIITQOXPmFH3wwQcFa9assb948WK32tpajr+/f8Xhw4ezOJyGA6ABAQGeW7duffjiiy9WAcD8+fOdo6OjzW1sbEQ//vjjPQcHB3FAQICnj49P1T///GMaGhpa7OnpWbN582Z7kUjEsbS0FB89evReVVUVJzIy0obD4dBjx45Zb9++XeDn51czd+5cl5ycHH0A+OKLLwTjxo2rzMvL0wkNDe2dn5+vP2DAgIrmSmyfOHHC/OOPP3asr68nVlZW4r/++itNcf0PP/xg0TgOZ2dn8fnz501Xr17dEwAIIbh+/XpKWVmZTmhoaO+Kigqd+vp6smPHjqygoKAKR0dH35iYmOTVq1c7ZGdnG0yYMMFj1qxZRZaWlvUxMTEmkZGRgtzcXF1l5/HWW2853Lt3z0AgEBg4OjrWrl279tHcuXNdRSIRkUgk+PHHHzN9fX1rFWM+fPiw1aJFi4pa+3kuW7bM6dq1a2Z1dXXkjTfeKHj77beLzp07Z/bJJ584mJubi1NTU42Dg4OLfX19q3fu3GlbW1tLTp06lent7V3b3Pui+BrNnZOy987S0lIyefLk0sjISOtnbdTZ8LsWEQgAHR3AwUHTkTBM1/PDDz90GzFihNDPz6/W0tJS/Mcff6j8wWLbtm02AoFAPykpKTEtLS1pwYIFjwHg7bffLkhISEhOT09PrK6u5hw5cqTFDBPV1dUcf3//yoyMjMQhQ4aUv/fee0/+2uvq6khCQkLyJ598kj927NiKuLi4lOTk5KSpU6cWr1+/3s7T07Nu9uzZhYsXL85PSUlJCgoKqli0aJHzW2+9lZ+QkJB86tSpzMWLF/cCgPfee88hMDCwIiMjIzEkJKT00aNHTYacc3NzdZcvX97r5MmTmampqUmnT5/ObLyNsjhk74ddeHh4VkpKStLff/+dYmpqKtm3b5/V6NGjhSkpKUnJycmJAwcOrGr0/gt69Oghio6OTlu7dm2DxCnNnQcApKenG169ejX17Nmz93fs2GGzdOnS/JSUlKS7d+8mu7q61jWOOTY21nTIkCFPGsba2lqOfOh97NixbgCwffv27hYWFvUJCQnJd+7cST5w4IBNSkqKPgCkpKQY7du3T5Cenp5w4sQJ67S0NMP4+Pjk1157rWjbtm09WnpfVDknZe8dAAwZMqTyn3/+MVXya9MmrKeuRQQCwNER0GU/FaYLa61HrS7Hjh2zWrFiRQEAhIaGFh88eNBq2LBhVa3tBwC///67+eLFiwvlw7+2trb1APDzzz+bffHFF3Y1NTWc0tJSXT6fX40WhpQ5HA4WLFhQDADz5s17PGXKlCfX72fMmFEs//7+/fv6kydPdiosLNSrq6vjODs71yo73rVr18zT09ON5M8rKip0hEIh5++//zY7efJkBgBMnz5duGjRoiZptqOiokwCAgLKeTxeneI5KWoujkGDBlWsWbPGedq0acUzZswocXNzkwwaNKhy0aJFvUQiEWfq1KklgwcPrm75XW39PAAgKCio1NTUlAJAYGBg5datW+2zs7P1p0+fXtK4lw4AQqFQ19LSUiJ/rmz4/fLly+YpKSnGZ86csQSA8vJynaSkJEN9fX3q6+tb6eLiIgKAnj171k6YMEEIAH369KmOjo42a+l9UeWclL13gHQIvqCg4Jmv97OeuhbJymJD7wyjDvn5+Tp///232bJly1wcHR19IyIi7M6ePWspkUha37kZVVVVZPXq1S4nT57MTEtLS3r11VeLampq2vQ/lShkmTIzM3sSzPLly3suXbq0IC0tLSkiIiKrtrZW6XEppbh161ZySkpKUkpKSlJBQcFdCwuLpz+pRpqLY9OmTXnffvttVnV1NWfYsGG827dvG06YMKHi6tWrqY6OjnXz5s1zjYiIsFb1dVo6DxMTkyfns3jx4uKffvopw8jISDJx4kSPM2fOmDU+lo6ODq2vb7lUCKWUbNu2TSB/vZycnPgpU6aUAYCBgcGTaxUcDgeGhoZU/n19fT1p6X1R5ZyUvXeA9PfJwMDgmX92rFHXIvJscgzDtK+DBw9ahoSEFOfm5sbn5OTE5+Xl3XVycqq7dOmSSsOdo0ePLtu9e3d3kUg6vys/P1+nqqqKAwB2dnZioVDIOXv2rGVrx5FIJJDPiv/++++tAwICypVtV15ertOzZ0+RfDv5cjMzs/ry8nId+fOhQ4eWffbZZz3kz69fv24EAIMGDSqX73fs2DHzsrIyHTQyYsSIyn/++cdMPuycn5/fZJvm4khMTDQICAio3rhxY56fn19lQkKCYVpamr6Tk5No9erVRbNnzy68deuWypc3mjuPxpKSkvS9vLxqP/zww4Lx48eXxsXFNdnO1dW1Jjk52aCl1xs7dqxw165dNrW1tQQA7t69a1BWVqZye9jc+6LKOSl77wAgISHBkMvlqjy60RzWqGuJ+nogO5v11BlGHY4fP241ZcqUEsVlkyZNKjl06JCVKvuvWrWq0MnJqY7H43l7enryv/vuO6vu3bvXz5o1q9DLy8t75MiR3D59+rQ6wcnIyEjyzz//mHh4eHhfvXrV7LPPPnukbLsPPvggd8aMGW7e3t5e1tbWTyZghYaGlp4/f74bj8fjX7x40XTPnj0Pb926ZcLlcvlubm7eERERNgCwefPm3GvXrpm6u7t7nzx50tLe3r7JtWcHBwdxeHj4g5CQEHdPT09+SEhIb1Xj+O9//9vDw8PDm8vl8vX09OjUqVOFly5dMvPy8vL28vLi//jjj1bvvPNOvirvLQA0dx6NHTp0yIrL5XrzeDx+cnKy0aJFix433mbcuHHCX375pUkPXtGqVauKeDxeja+vr5eHh4f3G2+84SISiVROzt3c+6LKOSl77wDg119/NQsKClL5boDmkOZmRXY2/v7+NCYmRtNhPLXcXOn19F27gMWLNR0NwyhHCImllPq3db87d+486NOnT4szkhmmPWRlZenNmDGj1/Xr19M1HUtb+Pv7e/78888ZNjY2rZYZv3PnTvc+ffr0UraO9dS1BLudjWEY5tm5uLiI5s2bVyRPPtMZ5Obm6q5cuTJflQa9NWyetZaQJ55h19QZhmGezYIFC0pa30p7ODg4iF977bXS9jhWp/kk09XJG3VnZ83GwTAMw3RerFHXEllZQLdugLm5piNhGIZhOiu1NuqEkCBCSCohJIMQ8p6S9V8SQuJkjzRCSKnCunqFdWfUGac2YLezMQzDMM9KbdfUCSE6AL4GMBZANoCbhJAzlNInmX0opasUtv8PAMUKRtWU0r7qik/bsDrqDMMwzLNSZ089AEAGpfQepbQOwBEAk1rYfgaA/6kxHq3GsskxjPqx0qut62qlVwkhAyZNmuQqXy8SiWBpadmntZ/z0/4u1NTUEH9/f095oqKOps7Z745AgxzP2QAGKtuQEOICwBXA7wqLDQkhMQDEADZTSk+rK1BNKysDSkvZ8DvDqBsrvdrxNF161cjISJKammpUUVFBTE1N6alTp8xtbW3V1uIaGhrS4cOHl3377bdWS5YsKW59j/alLRPlpgM4QSlVvEfPRZbkYiaA7YQQt8Y7EUIWEkJiCCExhYWFHRVru3so++jDeurMc2HePGcEBHi262PevFbvG5GXXt2/f/+DU6dOqZRJTk4sFmPhwoVO8kxgGzdu7AEAa9assffx8fHy8PDwnjFjhouyXPIBAQGeV69efZIydf78+c7u7u7egYGB3NzcXF35NvPmzXP28fHx2rBhg+0PP/xg4efnx/Py8uIPHjyY+/DhQ93U1FT9yMhIm2+++cZWnlEuNzdXd/z48W4+Pj5ePj4+Xr/88osJAOTl5ekMGTLEw93d3TssLMylpdKrfD7fy9PTkx8YGMhtvF5ZHABw/vx5U3nlMy8vL35JSQknKytLz9/f35PH4/E9PDy8L168aAr8O0owc+bMnvLSq5988kkPxRGB5s7jrbfecpg8ebJr//79eVOmTHGNiYkx9PX19eLxeHwul8uPj49vkg728OHDViEhIQ1uDxszZozw+PHj3QDgf//7n1VoaOiTxvbKlSvGffv25Xl5efH79evHu3PnTpNjlpWVcV555ZVevr6+Xl5eXvxDhw51A4Dm4pk6dWrpkSNH2vQ71l7U2ajnAFD8Q3OSLVNmOhoNvVNKc2Rf7wGIQsPr7fJt9lBK/Sml/jY2SrMKdgry29lYo84w6sNKrzb0vJReBYDXXnut+OjRo5ZVVVUkOTnZODAw8Mn6Pn361Ny8eTMlOTk5ae3atTnvvPOOU+Njvv/++/YjR44si4+PT/7jjz9SP/zwQ6eysjJOc/G88MIL1Xfv3m3zZZL2oM7h95sAPAghrpA25tMh7XU3QAjhAbAE8JfCMksAVZTSWkJIdwBDAPxXjbFqFMsmxzxX9u1jpVfBSq+qch7As5deBYCBAwdWZ2dnG+zdu9dqzJgxDX4+xcXFOmFhYa4PHjwwJIRQZTngo6KizC9dutQtPDzcDgBqa2tJRkaGfnPx6OrqQk9Pj5aUlHAax6JuauupU0rFAJYDuAQgGcAxSmkiIWQ9ISRYYdPpAI7QhuNDXgBiCCF3AFyB9Jp6g3q4XYlAAOjpAfb2mo6EYbomVnr16XSl0qtBQUGla9eudZ49e3aD69zvvvuu4/Dhw8vT09MTz549m1FXV6e0jOqJEycy5PE9evQovn///jUtxSMSiYixsXGHF1dR6zV1SukFSimXUupGKd0oW/YxpfSMwjbrKKXvNdrvOqXUl1LaR/b1O3XGqWkCAeDkBHC0ZYYDw3QxrPQqK726ZMmSojVr1uQGBAQ0GEEoKyvTcXJyqgOA3bt3d1f2uiNHjizbtm2brfxD4LVr14xaiicvL0+nW7duYsXa7B2FNSNagN3OxjDqxUqvstKrbm5uog8//LCg8fJ33303b926dU5eXl58sVhpFVVs3rw5VywWEx6Px3d3d/f+8MMPHVuK5+effzZvPMzfUVjpVS3g4gKMGAEcOKDpSBimZaz0KqPttKH06rhx49y2bt2a7efnp3QuxLNipVe1mFgM5OSwnjrDMEx70HTp1ZqaGhIcHFyqrga9Naz0qobl5gL19axRZxiGaS+aLL1qaGhIly9f3uSyQEdhPXUNY3XUGYZhmPbCGnUNY4lnGIZhmPbCGnUNkzfqzq0muWQYhmGYlrFGXcOysgBra8BEIwkFGUY7SSTAb7/BJDIS3X77DSbPkCPmiczMTL3Ro0e7ubi4+Dg7O/vMnTvXuaampkn2MAB48OCBXlBQUJNbvBpTrEDWVm+99ZbDxx9/bKvq9s9a4U3Rf//7Xxt5cpjbt28bynO4JyYmGvTr14/3rMcPCgrqnZSUpA9Ic79zuVy+PFf801SZU1VnrazWnlijrmECAbuezjCKjh6FhYMD/IKDwV26FL1efhlcBwf4HT2KFvOqt0QikWDy5MnuwcHBpVlZWQn3799PqKys5KxcudKx8bYikQi9evUSXbx48V5rx42Ojs7o3r170/RlWu6dd94plE/mOn78eLfg4OCS5OTkJG9v79rbt2+nqHociUSCxtnbYmJiDOvr6wmfz39yb3x0dHSaPBubNlaYU6yspulYnhVr1DVMIGDX0xlG7uhRWMyZg975+dCrqgKnshI61dXg5OdDb84c9H7ahv3s2bNmBgYGkpUrVz4GpLm5v/nmm4dHjx7tXl5ezgkPD7ceNWqU+6BBg7iDBw/2VKyxXl5eznnppZd6u7m5eY8dO9bNz8+PJ6+6Jq9Alpqaqt+7d2/v6dOnu7i7u3sPGTLEo6KiggDAtm3buvv4+Hh5enryx48f71ZeXt7i/92HDx/qjh071s3T05Pv6enZpGcrFAo5gYGBXD6f78Xlcp9UDCsrK+OMGDHC3dPTk+/h4eG9d+9eSwBYunSpo5ubmzeXy+UvXLjQCfh3lODo0aMWe/bssf3+++9tBg4cyAUajgh89NFHtj4+Pl5cLpe/atUqB0Baf75Xr14+ISEhvbhcrndmZmaDYjHff/+99csvv9ygSpoyzR3b1dXVOzQ0tFevXr18goODXU+fPm3Wv39/nouLi8+VK1eMga5XWa09sUZdgyhl2eQYRk4iAVasgEttrfL/S7W14KxcCZenGYqPj4836tOnT4PiLVZWVhJ7e/u6pKQkAwBITEw0/umnnzJv3ryZqrjdli1bbLp161afmZmZuGnTppykpCSlw8cCgcBwxYoVBRkZGYkWFhb1kZGRlgAwa9askoSEhOTU1NQkT0/P6vDwcKWpSOUWL17cc9iwYeWpqalJiYmJSf37969RXG9sbCw5f/58RlJSUnJ0dHTa+++/7ySRSHDy5ElzOzs7UWpqalJ6enrilClTyvLy8nQuXLhgmZ6enpiWlpa0adOmBhnswsLChPLKbzdu3EhTXHfy5EnzjIwMw7t37yYnJycnxcXFGf/888+msnM1WL58eWFGRkYil8ttkK3uxo0bpoMGDWrwXg8fPpzL4/H4fn5+vNaO/fDhQ8N33303PzMzMyEzM9Pw8OHD1jExMSkbN27M3rhxoz3Q9SqrtSd2n7oGCYVAeTkbfmcYALhyBSYVFWjx+nR5OXSiomAyahTafQh32LBhZcoqlV2/ft105cqVBQDwwgsv1HC5XKWV3RwdHWvllcn69etX9eDBAwMAiI2NNfr4448dy8vLdSorK3WGDx/eYvrQ69evm504ceI+IB1RsLa2bhCTRCIhb775ptPff/9tyuFwUFBQoJ+dna3bv3//6g8++MB5yZIljpMmTRIGBQVViEQiGBgYSMLCwnpNnDixNCwsTOXUpRcvXjS/evWqOZ/P5wNAVVUVJyUlxbB379519vb2daNHj1b6MygsLNSzs7NrcHE6Ojo6zd7e/kkO1paO7ejoWCvPz87lcqtHjRpVxuFw0L9//6oNGzY4AF2vslp7Yj11DWK3szHMv3JyoEcIWsxbTQhodjb02npsHx+f6jt37jQoMFJcXMx59OiRPp/PrwWkPeC2HleRvr7+k9h1dHSoWCwmALBw4ULXiIgIQVpaWtK7776b21zFNVXt3r3b6vHjx7rx8fHJKSkpSdbW1qLq6mqOn59f7a1bt5J8fX2rP/roI8c1a9bY6+npIS4uLnnq1Kkl586d6zZixAgPVV+HUoo333zzkfxauEAgSFi1alUR0PJ7ZWBgIKmurm7xHFs6tuL7yOFwYGhoSAFAR0cH9fX1BOh6ldXaE2vUNYjVUWeYfzk6QiSRQOlsdDlKQZyc0OYpysHBweU1NTUc+YxvsViMpUuXOr/yyitFiiVPlQkMDKw4cuSIJQDExsYapqWlKa0g1pyqqipOz549RbW1tUSVa7ZDhgwp37Jli408zsePHzcYvRAKhTrdu3cXGRgY0LNnz5rl5ubqA9IZ+2ZmZpKlS5cWv/XWW3lxcXHGQqGQI+vVCr/55puHKSkpKldOmzBhQtnBgwe7y+ua379/Xy8nJ6fV0V0PDw+lVdLa49hyXa2yWntiw+8axLLJMcy/Ro5EpZkZ6qurm+9smJmhfsSItg+9czgcnD59OmPhwoUuW7ZssZdIJBg1apQwPDw8p7V933777cJp06b1cnNz83Zzc6txd3evsbS0VHnG+3vvvZcbEBDgZWVlJe7fv39FRUVFi5cYdu3aJXj99ddduFxudw6Hg4iIiKwxY8Y8OecFCxYUT5gwwZ3L5fL9/PyqXF1dawDpMP///d//OXE4HOjq6tKdO3dmlZaW6kycONG9traWAMCnn376UNW4p0yZUpaYmGj4wgsv8ABp7/zw4cP3dXV1W2z0JkyYUPr777+bTZ48WWlZ2Wc5tty7776bt2DBAtfPP//cYezYsUon5W3evDl34cKFPXk8Hl8ikRBnZ+faK1euZBw6dMjq2LFj1rq6utTGxkb06aefPgI0W1mtPbEqbRr07rvAV18BVVWsljrTOai7Spt89ruyyXIGBpAcOIB7YWHo0H+8YrEYdXV1xNjYmCYmJhqMGzeOm5mZmSAfFmYaqqioIEOGDPGMjY1N0dXtPP1GdVdWa08tVWnrPO94FyQQSDPJsQadYaRkDfa9lSvhUl4OHUJAKQUxM0P9V18hq6MbdEB6S9uwYcM8RSIRoZTiyy+/zGINevNMTU3pxx9/nHv//n19Dw+PJnXctZGmK6u1J9ZT16DBgwEjI+C33zQdCcOopqPqqUskQFQUTLKzoefkBNGIEahkH34ZRor11LWUQACMG6fpKBhG+3A4gDpuW2OYrk6tn30JIUGEkFRCSAYh5D0l678khMTJHmmEkFKFdXMIIemyxxx1xqkJIpG0ljqb+d6FPH4M/PWXNKsQwzCMBqitUSeE6AD4GsAEAHwAMwghfMVtKKWrKKV9KaV9AewAcFK2rxWAtQAGAggAsJYQYqmuWDUhO1v6v5816l3I3r3SayoZGZqOhGGY55Q6e+oBADIopfcopXUAjgCY1ML2MwD8T/b9eAC/UkqLKaUlAH4FEKTGWDscu52tixGLgV27gFGjAA+V83swDMO0K3U26o4AFO+JzJYta4IQ4gLAFcDvbdmXELKQEBJDCIkpLCxsl6A7Cssm18WcPSv9of7nP5qOpGuQ1l41QWRkN/z2mwnaofYqK736r44uvTpgwABPxfU8Ho8vL5jTHMWiOm01ePBgbmFh4VP9XDo7bZlPOh3ACUppm0oYUkr3UEr9KaX+NjY2agpNPeSNulOTMgRMpxQRIf2ENnGipiPp/I4etYCDgx+Cg7lYurQXXn6ZCwcHPxw9ykqvtpOOLr1aWVmpk5GRoQcAt27dMmyn02jWjBkzHm/durVzNQrtRJ2Neg4AZ4XnTrJlykzHv0Pvbd23U8rKAnr0kN7SxnRyiYnA778DS5cCnSjZhlY6etQCc+b0Rn6+HqqqOKis1EF1NQf5+XqYM6f30zbsrPSqZkuvTp48uTgyMtIKACIjI61CQ0OL5etSU1P1BwwY4Mnn8734fL5X4/MFpAmAFi1a5CSPZcuWLd0BICsrS8/f399T3vO/ePGiKQBMnz699OTJk9Ytvc9dlTob9ZsAPAghroQQfUgb7jONNyKE8ABYAvhLYfElAOMIIZayCXLjZMu6DIGAXU/v9OQ9lIgIwMAAmD9fs/F0dtLaqy5oruBJbS0HK1e6PM1QPCu9qtnSqzNmzCg5e/asJQBcunSp25QpU540+g4ODuI//vgjLSkpKfno0aP3Vq1a1eSi5Pbt27tbWFjUJyQkJN+5cyf5wIEDNikpKfr79u2zGj16tDAlJSUpOTlVXsC/AAAgAElEQVQ5ceDAgVUAYGNjU19XV0fy8vKeuyF4tXUrKKViQshySBtjHQD7KKWJhJD1AGIopfIGfjqAI1QhCw6ltJgQ8imkHwwAYD2ltBhdiEAA8PkNl9XXAzrP3a9gJ5WSAgQGAr/8AkRGAjNnAt1b/F/NtObKFRO0khcd5eU6iIoywahRrPRqJyq92qNHj3oLCwvxnj17LN3d3atNTU2ffDKrq6sj8+fPd0lKSjLicDjIyspqUgzm8uXL5ikpKcZnzpyxBIDy8nKdpKQkw0GDBlUuWrSol0gk4kydOrVE/v4DgLW1tVggEOjb2dlVNz5eV6bWa+qU0guUUi6l1I1SulG27GOFBh2U0nWU0ib3sFNK91FK3WWP/eqMs6NRKh1+V5wkl5IibRNSU5vfj9ESlALz5gFlZcC0adLk/cuXazqqzi8nRw+EtHyTPyEU2dms9GonLL06derUknfeecdlxowZDTpoGzdutO3Ro4coOTk5KT4+PkkkEikro0q2bdsmkMeSk5MTP2XKlLIJEyZUXL16NdXR0bFu3rx5rvLJf4C0fvqz/kw7I22ZKPdcKS6WtgPy4XfFNmLePJa7ROv9+CNw9650uDgrC/D0BPr313RUnZ+jowgSSYulV0EpgZMTK73aCUuvzpo1q2TZsmV5U6ZMKWt8Pvb29iIdHR3s3LnTuvHEOwAYO3ascNeuXTbyanN37941KCsr46Slpek7OTmJVq9eXTR79uzCW7duGQPSCXyFhYV6np6enT6Xe1uxWT0a0Ph2thMngNu3pW3ErVvSNmPqVM3Fx7SgogJYvBiolI08Ugrk5Eifmyi91MqoauTISpiZ1UNJL+8JM7N6jBjBSq92wtKrlpaWko0bN+Y13v7NN98sCA0NdTty5Ij1qFGjhEZGRk0+ZK1atarowYMHBr6+vl6UUmJlZSW6cOFC5qVLl8zCw8PtdHV1qbGxcf3hw4fvA8Cff/5p3K9fv0o9vTYP6nR6rKCLBpw+DYSEADExwIMH0hFcxbk/HA5w/DgwZYrGQmSas3q1NMlMtcJlOkND6cz3bds0F1cHUXtBF/nsd2VD1AYGEhw4cA9tuC7cHljp1bbRhtKrc+fOdZ48eXLppEmTmq3p3pm1VNCFDb9rgLynnpYGhIWhyWReiUTa0F+40PGxMS1ISWnaoANATY10OZsQ8ezCwoQ4cOAebG1FMDaWwMSkHsbGEtjaijTRoAPSW9oCAgJ4np6e/JCQEDdWerVliqVXNRWDj49PdVdt0FvTZXrqrq6udO3atQ2WeXt744UXXoBIJMLhw4eb7NO3b1/07dsXVVVVOHbsWJP1/v7+8PHxgVAoxKlTp5qsDwwMhKenJ4qKinDu3Lkm61988UX07t0beXl5uHjx4pPlmZnSYi43b45GfLwznJ0fYvTopvVX//knCAkJdrh//x6uXr3aZP3EiRPRvXt3pKam4q+//mqyPiQkBBYWFkhISICyUYxp06bB2NgYcXFxiIuLa7J+1qxZ0NPTw82bN5GYmNhk/euvvw4AuH79OtLSGtwNAz09PcyaNQsAEB0djfv37zdYb2xsjGnTpgEALl++jOzs7Abrzc3NMUU2VHHx4kXk5TUctbO2tsbLL78MADh79iweP37cYL2dnR2CgqSZhU+ePImysgaX8eDk5IQxY8YAAI4dO4aqqoYTml1dXTF8+HAAwOHDhyESiaTXSGTH4aalYfD16wCA72XvA8zNgX7SW3y19XdPbvTo0XB2dsbDhw/xm5Lav0FBQbCzs8O9ew1/9+bOndshpVdltVdNkJ2tBycnEUaMqASrvcowAFjpVa1TWwvo6QHCVvocJSXAjRvSJDWMhlVXA+WtfPAvL5duxzIKPTtp7VVWepVh2qjL9NQ70zX1gQOlHb4UFZIx7twJLFmi/piYVlAKDBki/ZSlLPkJhwMMGgT8+SdAWp7A3Zmp/Zp6I2IxS9LHMI2xa+paRiCQFvJqbTSRw3kymstoGiHAvn2AfjOXCQ0MpOu7cIPe0W7fhqGVFfreuYMmt0cxDKMca9Q7WG0tkJcH+Pu3noDMxkbaq2e0BI+nvGCLkZF0OMXTs+k65qlIJMDcuehVUQGd119Hr3Yo0sYwzwXWqHewh7K7RF1cgP37m08Lq6PDOn5aqa6u6Q/F2BhYv14z8XRRBw7AMi0NRpQCqakwjoxEt2c9po6OzgB54Y8JEyb0bq2wijLr16/v8TT7dQZtLXUaGhraa//+/Zbt8dqNS92+/PLLrlwul//JJ5/0ePPNNx1Onz5t9izHP3jwYLc1a9bYA9JiNj169PDj8Xh8Ho/HX7p0qdKS4O1FXvSnrfstXLjQ6cyZM20+7y75y6nN5LezubgAL70EHDvWdBiew5Euf+mljo+PaYFAAJw7B0ya9G+iGRMTYPdulnimHQmF4KxahZ7V1dL/T9XV4Lz5JlzKyp7t/5WBgYEkJSUlKT09PVFPT49u27atzaU5d+/ebVtRUfHUcSgrVcqgQalbgUCge+fOHZO0tLSktWvXFmzfvj23cSKblohETRMOfvHFF3arV68ulD9fvHhxvjzl7M6dO7WyAuiaNWsKPv/8c7u27sca9Q7WOJvclCnAkSP/Tpg2NASOHmWJZ7TSrl3Sr19+Cfj5ST999enDfljtbM0aONTUNPzfVFMDzurVcGiv1xg6dGhFRkaGAQCsW7fO1sPDw9vDw8N7/fr1PQDlZUw3bNjQo6CgQG/48OFceZlSReHh4dajR492CwgI8HRxcfFZvXq1PaC8VOnu3butuFwu38PDw3vJkiVPeoonTpww5/P5Xp6envzAwECuPJZXXnmll6+vr5eXl9eTUqsxMTGGvr6+Xjwej8/lcvnx8fEGzZVf/eOPP4xfeOEFT29vb6+hQ4d6ZGVl6cmXy0u8fvHFF83eZ/PBBx/Ycblcvqenp9Ke7Zo1a+x9fHy8PDw8vGfMmOEikV0v2bBhQw952deJEyf2BoDz58+bynvJXl5e/JKSEo7iKMGYMWO4BQUF+jwej3/x4kVTxRGB5s4jICDAc968ec4+Pj5eGzZssFWM7e7duwb6+voSe3t7cUu/Ey0de/78+c4+Pj5evXv39o6OjjYeN26cm4uLi8+KFSue/E6OGTPGzdvb28vd3d1769atSi+u7ty500r+M5s5c6aLWCyGWCxGaGhoLw8PD2/56AQAcLncutLSUl2BQNC2Xj6ltNkHpNXVrrS0jbY8BgwYQDuDdesoJYTS2tp/l0kklAYGUsrhUDp4sPQ5o2Wqqym1tqY0JET6PDmZ0m7dKE1J0WxcHQzSCott/vuMi4t7QCmNae1x6xZNMDCgEuntBg0fBgZUEhdH41U5jrKHkZFRPaU0pq6uLmbUqFElmzdvzrp69WqSh4dHlVAovFVaWnrLzc2t+s8//0zcv39/RlhYWKF836KiotuU0hgHB4fa3NzcOGXH/+qrr+5379697tGjR7fLy8tj3d3dq6Ojo5NSUlLuEkLo5cuXkymlMffv379jZ2dXm5OTE1dXVxczcODAssjIyIycnJw4W1vbuuTk5LuU0pi8vLzblNKYZcuWPfr666/vUUpjCgsLb7u4uNQIhcJbs2fPzt+5c+c9SmlMdXV1bHl5eayyuGtqamL79u1bkZOTE0cpjdmzZ0/m1KlTiyilMR4eHlUXLlxIoZTGLFy4MM/d3b268XkdPXo0rW/fvhVlZWW3FOOaMmVK0b59+zIVl1FKYyZNmvT48OHD6ZTSGBsbm7qqqqpYeeyU0piRI0eWXrp0KZlSGlNaWnqrrq4uJiUl5a78tRW/V3ydls7jhRdeKJ81a1aBsp/L9u3b7y9YsCBP/nzVqlW5NjY2dZ6enlWenp5VJ06cSGvt2IsXL35EKY1Zv369wMbGpu7Bgwd3qqqqYnv06FH36NGj24rvgfxnL18u/52JjY1NGDlyZGlNTU0spTRm1qxZBTt27Lh/9erVpMDAQKE8Pvn7RCmNCQsLK9y/f39G43OS/T0p/Vtr8RMApbSeECIhhFhQSjs8k1NXJBAA9vYNJ1HLJ1YHBrLr6FrryBHg8eN/q7HxeEBREauV247kk+OUjJ4CAEQi4PXX0Ss2FqlPk4emtraWw+Px+AAwcODA8pUrVxZt2bLF5qWXXio1NzeXAMD/+3//r+TKlStmwcHBwsZlTFV5jaFDh5bZ2dnVy48VFRVlGhYWVqpYqvTPP/80GTRoULmDg4MYAMLCwoqjo6NNdXR0aEBAQDmPx6sDAHkZ2KioKPNLly51Cw8Pt5OdB8nIyNAPDAys3Lp1q312drb+9OnTS3x9fWuVlV+9efOmYXp6utGoUaO40vdZAhsbG1FRUZFOeXm5zoQJEyoAYN68eY9///13i8bn9Ouvv5q/+uqrTwrfKCtP+/PPP5t98cUXdjU1NZzS0lJdPp9fDUDo6elZHRIS4hocHFw6a9asUgAYNGhQxZo1a5ynTZtWPGPGjBI3NzeVpkHevXvXQNl5yNc3rv4m9+jRIz0bG5sGvfTFixfnr1+/Pl/+vLn3SL4+JCSkFAD69OlT7e7uXu3i4iICAGdn59p79+7p29nZVX/++ee258+f7wYAeXl5eomJiYZ2dnZPci1cvHjRLCEhwbhPnz5eAFBTU8Pp0aOHOCwsrPThw4cGc+bMcX755ZeFISEhTzJl2djYiHNyctqUmU+Vbn0FgHhCyK8AngRIKV3RlhdipASChiVX5VgbocUoBXbsALy9gZEj/13OfljtKikJBgkJMGluprtEAhIfD9PkZBh4e6PN1bfk19RV2VZexvTHH3+0+OijjxwvX75ctnXr1keK20RGRnbbtGmTAwDs2bPnAQCQRp/I5c+fpQQopRQnTpzI6NOnT4Nz7t+/f82wYcMqT506ZTFx4kSPHTt2ZAUHB5c3jnvatGml7u7u1XFxcQ0yYxQVFbXLL3BVVRVZvXq1y40bN5Lc3d1Fb731lkNNTQ0HAK5cuZL+888/m/30008WW7dutU9NTU3ctGlT3uTJk4U//fSTxbBhw3jnz59PV+X9oZQSZech11y1PSMjI4lQKGytA9viseVpgTkcDgwMDJ4kd+FwOBCLxeTcuXNm0dHRZjExMSlmZmaSgIAAz8blZyml5JVXXnn89ddfN7mGn5CQkHTq1Cnzb775xubo0aNWx48ffwAANTU1RFmBm5ao8nn3JICPAFwFEKvwYJ5Cc406wNoIrfX339LyecuXs2EUNeLzUevjg0oOB0ozYnE4oL6+qPDyanuD3pyRI0dWXLhwoVt5eTmnrKyMc+HCBcuRI0eWKytjCgAmJib18lKks2fPLpVPtnrxxRerAODPP/80z8/P16moqCAXLlzoNnz48CY9/GHDhlXeuHHD7NGjR7pisRjHjx+3GjFiRMWIESMq//nnH7OUlBR9AMjPz9eRxVi2bds2W/l16mvXrhkBQFJSkr6Xl1fthx9+WDB+/PjSuLg4I2Vx+/n51RQXF+tevnzZBJD29GNiYgy7d+9eb2ZmVn/p0iVTAPj++++VloUdP3582aFDh7rLZ/3L45KrqqriAICdnZ1YKBRyzp49awkA9fX1yMzM1H/55ZfLv/7665yKigodoVCok5iYaBAQEFC9cePGPD8/v8qEhARDVX5WzZ1Ha/t5e3vXZGZmtpjr4GmPLVdaWqpjYWFRb2ZmJrl9+7bhnTt3msycDQoKKjt37pylvHxtfn6+Tlpamv6jR4906+vr8frrr5d+9tlnOfHx8U/K42ZmZhr26dOnuvGxWtJqT51SeqAtB2SaR6m0UQ8O1nQkTJtERAAWFsCrr2o6ki6NwwH278eDwEDwa5U023p6wPff40F7poAfOnRo1cyZMx/379/fCwBee+21wiFDhlT/+OOP5o3LmALAnDlzioKCgri2trZ1N27cSGt8PD8/v8rg4GC3vLw8/alTpz5+8cUXq1JTUxsMn7q4uIjWrl2bM3z4cC6llIwZM6b01VdfLQWA8PDwByEhIe4SiQTW1tai69evp2/evDl34cKFPXk8Hl8ikRBnZ+faK1euZBw6dMjq2LFj1rq6utTGxkb06aefPvrzzz9NGsdtaGhIjxw5krlixYqe5eXlOvX19WTJkiX5/v7+Nd99992DBQsW9CKEYMSIEWWNzwcApk6dWnbr1i3jvn37eunp6dExY8YIIyIinvQ2u3fvXj9r1qxCLy8vbxsbG3GfPn0qAUAsFpOZM2e6lpeX61BKyYIFCwq6d+9ev3r1aofr16+bE0Kop6dn9dSpU4UCgaDVGqktnUdL+40fP77ivffec5ZIJOA088vztMeWCw0NFe7Zs8emd+/e3r17966RvweKBgwYUPPhhx/mjB49miuRSKCnp0fDw8MFxsbGkvnz5/eSSCQEANavX58NSD9YPHjwwODFF19sU7rkVtPEEkKGAFgHwAXSDwEEAKWU9m5pv47WGdLEFhQAtrbSkVz5pVlGy+XlSYdWli2Tznp/znVEmtg33oDTwYOwqa39dyTRwACS115D4d69yG5pX00KDw+3jomJMYmMjBRoOhamoblz5zpPmjSptC23xmlaZGRkt9jYWOOvvvoqt/G6Z00T+x2ALwAMBfACAH/Z11YRQoIIIamEkAxCyHvNbDONEJJECEkkhPygsLyeEBIne5xR5fW0XePb2ZhOYPdu6QytpUs1HclzY9s25BoaosF1RENDSLZtQ5N/bgyjivXr1z+qrKzsVLdwi8Vi8tFHH+W3vmVDqkyUE1JKf27rgQkhOgC+BjAWQDaAm4SQM5TSJIVtPAD8H4AhlNISQojifZLVlNK+bX1dbZaVJf3KGvVOoq4O+OYbYMIEabJ+pkOYm0Py5ZcQLFuGXtXV4BgZQbJ9O7LMzaHVyWJXrFjxGMDjVjdkOpyzs7N41qxZneoOrnnz5pU8zX6qfHK5QgjZQggJJIT0lz9U2C8AQAal9B6ltA7AEQCTGm3zBoCvKaUlAEApLWhT9J2MYjY5phM4eVI6/M6ulXS4OXNQwuWimhDA0xNVs2ejVNMxMUxnoEpPXV5SRPE6GgUwqpX9HAE8VHierXAsOS4AEEKuQZroZh2l9KJsnSEhJAaAGMBmSunpxi9ACFkIYCEA9OwE3V+BADA1Bbo9cxZrpkNERADu7kBQkKYjee7IJ82NfFHM+/573XadHMcwXVmzjTohZCWl9CsAH1FK/1Tj63sAGAHACcBVQogvpbQUgAulNIcQ0hvA74SQeEpppuLOlNI9APYA0olyaoqx3WRlSYfe2V1RncDt28C1a9LJcaxF0Yh+uI0SMhwEfwDoo+lwGKZTaOm/1VzZ1/CnPHYOAGeF506yZYqyAZyhlIoopfcBpEHayINSmiP7eg9AFIBOX1lcIGBD751GRIS0+trrr2s6kueTNL1cL1JRoYPXX+8FVnuVYVTSUqOeTAhJB+BJCLmr8IgnhNxV4dg3AXgQQlwJIfoApgNoPIv9NKS9dBBCukM6HH+PEGJJCDFQWD4EgEqZoLRZS4lnGC3y+DHwww/A7NnsWommHDhgibQ0I0hrrxojMpKVXlWz56n0KiFkQEJCwpOENOvXr+9BCBlw9epV4+aPIi3u0to2ymzatMlm+/bt1m2PvO2a/eWklM4AMAxABoCXFR4TZV9bRCkVA1gO4BKAZADHKKWJhJD1hBB5+pVLAB4TQpIAXAHwNqX0MQAvADGEkDuy5ZsVZ813RtXVQGEha9Q7hW+/BWpqpPemMx1PKORg1aqekKfZrK7m4M03XVBWxkqvdlEdXXrVw8OjOjIy8kkGvdOnT1u5u7urlGjmafznP/95vHv3btvWt3x2Lf5yUkrzKKV9KKVZjR+qHJxSeoFSyqWUulFKN8qWfUwpPSP7nlJK36KU8imlvpTSI7Ll12XP+8i+fvesJ6pp7B71TqK+Hti5U5rj3cdH09E8n9ascYAsd/gTNTUcrF7NSq+y0qvtUnr1pZdeKr1w4UI3AEhMTDQwMzMTW1paPlk/a9asnj4+Pl7u7u7eq1atUvp7d/LkSfO+ffvy+Hy+14QJE3rL0wcvXbrUUX7OCxcudAKkeemdnJxqr1y50uZeflu1rU4r89TY7WydxNmz0h8Wyx6nGbdvG+LgwR6orW04nbS2loODB3tg+fJCNCps0lYikQiXLl0yHzduXNkff/xh/MMPP1jHxsYmU2kJZ6/Ro0eXp6enG9jZ2YmioqIyAODx48c61tbW9bt27bKNjo5Oa6429927d03i4+MTTU1NJf369eNPmjRJaGtrKxYIBAbffffd/dGjRz948OCB3rp16xxjY2OTbWxsxMOGDeMePHiw2+jRoyuWL1/eKyoqKoXH49XJc6y///779iNHjiw7fvz4g6KiIh1/f3+v4ODgsh07dtgsXbo0f8mSJcU1NTVELBbjxIkTFo3jrq2tJStWrOh5/vz5DAcHB/HevXst16xZ43j8+PEH8+fP7/XVV18JJkyYULFo0SInZed07Ngx8wsXLnSLjY1NMTMzkzTO/Q4Ab7/9doG84M3kyZNdjxw5YjFz5kxheHi4XVZWVryRkRGVF5DZtm2bXXh4eNa4ceMqhUIhx9jYWFJQ8O/dzGfPns2YOHGih7z4zt69e7sD0rSpzZ0HANTV1ZGEhITkxrFduXLF1M/Pr0pxmbm5eb2Dg0PdzZs3DU+cONFt6tSpJQcPHnxSA/2LL77IsbW1rReLxRg8eLDnjRs3jAYOHPgkB/ujR490N23aZH/16tU0c3NzyQcffGD36aef2q5Zs6bgwoULlvfu3UvgcDgNiub079+/MioqymzkyJENYmlvXfLakDZiPfVOIiICcHZmCfo1QTY5Di3XXn3qSXPy0qu+vr58JyenupUrVxZFRUWZykuvWlhYSOSlV/v371/9xx9/mC9ZssTx4sWLptbW1iqNmctLr5qamlJ56VUAaK70qp6e3pPSq1FRUSbNlV798ssv7Xk8Hn/o0KGeiqVXt23bZv/BBx/Ypaen65uamlJlcSuWLOXxePwtW7bY5+bm6ikrvarsnFQtvern58fjcrn869evmyUkJBgBgLz06s6dO6309PQo8G/p1Q0bNvQoKirS0dNrNe07gIalVxXPQ76+LaVXAWDatGnFBw8etDp//rzlrFmzGiR6OXDggBWfz/fi8/n89PR0wzt37jQo7hIVFWWSmZlpGBAQwOPxePwjR45YCwQCfWtr63oDAwNJWFhYrwMHDnQzNTV98svao0cPsWK86sJ66h0kK0t6Z5RDuw0gMu0uKQn47Tfgs88AXfan0eGSkgyQkGDSbKMtkRDEx5siOdkA3t6s9CorvdpAW0uvhoWFCT/++GMnX1/fKisrqyf7pqSk6EdERNjKRlLqQ0NDe9U0uhxEKcXQoUPLzp49e7/xcePi4pLPnDljfuLECctdu3b1+Pvvv9MAaf30tpZRfRrN9tQJIWcJIWeae6g7sK5GIAAcHaWVphgt9fXXgIEBsGCBpiN5PvH5tfDxqQSHozznBIdD4etbAS8vVnoVrPTqs5ZeNTMzk6xbty77o48+avBhraSkRMfIyEhiZWVV//DhQ92oqCiLxvuOGDGiMiYmxlQ+g76srIxz9+5dA6FQyCkuLtYJCwsTfvPNNw9TUlKeXENPS0sz8PHxaVMZ1afRUndkq+zrFAB2AA7Jns8A0OYk8887djublhMKgQMHgBkzgO7dW9+eaX/SNHIPEBjIR/O1Vx+0ZzIgVnr1+S69unDhwib51QMDA6t9fHyq3NzcfOzt7esGDBjQ5IOZg4ODePfu3Q+mT5/eu66ujgDA2rVrcywsLCQTJ050r5XNCfn000+fZFW9efOm6eeff672okSqlF6NaVxqUdkyTdP20qtubsDAgdLbnxkt9NVXwJtvAjExwIABmo5Ga3VE6VW88YYTDh60QW3tv/+BDQwkeO21Quzdy0qvMm2m6dKr165dM9qyZYvd6dOnmwzXP41nLb1qIkvVCgAghLgCMGmPwJ4XEgnw8CGb+a61JBLp0HtgIGvQtcG2bbkwNGx47dHQUIJt21jpVeapaLr0akFBgd7nn3/eOKOqWqgyG2gVgChCyD0ABIALgEVqjaqLyc+XTtxlw+9a6pdfgPR0YN06TUfCAIC5uQRffinAsmW9UF3NgZGRBNu3Z8HcXKtzxbLSq9pL06VXQ0JClF7aUIdWP7nIqqZ5AFgJYAUAT0rpJXUH1pWw29m0XEQEYGcHTJ2q6Ui6MolEIlG9lNGcOSXgcqshrb1ahdmzWelVhgEg+ztq9gNuq406IcQYwNsAllNK7wDoSQiZ2H4hdn1Zsvx7rFHXQpmZwIULwKJFgL5+69szTyuhsLDQQuWGXT5pztS0vr0nxzFMZyWRSEhhYaEFgITmtlFl+H0/gFgAgbLnOQCOAzj3zBE+J1g2OS22cyegowMsXKjpSLo0sVi8IC8v79u8vDwfqJr0isMBoqKyAZjhzp1nKujBMF2EBECCWCxu9r5bVRp1N0ppGCFkBgBQSqtI4wwLTIsEAsDCAjA313QkTAOVlcC+fdJhd5YVSK0GDBhQAICl6WMYNVPlE3MdIcQIAAUAQogbgHZL/vA8yMpiQ+9a6dAhoLQUWL5c05EwDMO0C1V66usAXATgTAg5DGlt89fVGFOXIxCwoXetQ6l0gly/fsDgwZqOhmEYpl202qhTSn8hhMQCGATpLW0rKaWqJZFgAEgbddZuaJnoaCAhAfjuO4BdTWIYpotQZfb7bwAGUkrPU0rPUUqLCCF7OiC2LqGiAiguZsPvWiciArC2lqaFZRiG6SJUuabuCuBdQshahWValSJWm7GZ71ro4UPg9Glp4RYjI01HwzAM025UadRLAYwGYCur3NakYg3TPJZ4Rgt98430mvqSJZqOhGEYpl2p0qgTSqmYUroUwI8A/gTQQ5WDE0KCCCGphJAMQsh7zWwzjXt1Np4AABokSURBVBCSRAhJJIT8oLB8DiEkXfaYo8rraSPWqGuZmhpgzx4gOJgNnzAM0+WoMvv9G/k3lNLvCSHxAJa1thMhRAfA1wDGAsgGcJMQcoZSmqSwjQeA/wMwhFJaQgjpIVtuBWAtpMP8FECsbN8mZfK0XVYWoKsL2NtrOhIGAHD0KFBUxG5jYximS2q2p04IkadKOU4IsZI/ANwHsEaFYwcAyKCU3qOU1gE4AmBSo23eAPC1vLGmlBbIlo8H8CultFi27lcAQSqflRYRCAAnJ2nSMkbDKAV27AC8vIBRozQdDcMwTLtrqaf+A4CJkKaIpZDeziZHAfRWtpMCRwAPFZ5nAxjYaBsuABBCrgHQAbBOVkBG2b6OjV+AELIQwEIA6Kml49sCARt61xo3bgCxsdIyq+w2NoZhuqBmG3VK6UTZV1c1v74HgBEAnABcJYT4qrozpXQPgD0A4O/vT9UR4LPKygKGDdN0FAwA6W1s5ubA7NmajoRhGEYtmm3UCSH9W9qRUnqrlWPnAHBWeO4kW6YoG8ANSqkIwH1CSBqkjXwOpA294r5Rrbye1qmvB7Kz2XwsrZCXBxw7BixdCpiaajoahmEYtWhp+H1bC+sogNYuSt4E4EEIcYW0kZ4OYGajbU4DmAFgPyGkO6TD8fcAZALYRAixlG03DtIJdZ3Ko0fShp0Nv2uBvXsBkUjaqDMMw3RRLQ2/j3yWA1NKxYSQ5QAuQXq9fB+lNJEQsh5ADKX0jGzdOEJIEoB6AG9TSh8DACHkU0g/GADAekpp8bPEownsdjYtIRJJ700PCgK4XE1HwzAMozaq3NIGQogPAD4AQ/kySmlka/tRSi8AuNBo2ccK31MAb8kejffdB2CfKvFpq6ws6VfWqGvYyZNAbq70/nSGYZgurNVGXZYedgSkjfoFABMgTUDTaqP+vGM9dS0REQH07g1MmKDpSBiGYdRKlYxyUyFNE5tHKZ0LoA8AlipWBQIBYGXF5mVpVFwc8OefwLJlAEeVX3eGYZjOS5X/ctWUUgkAsSwhTQEazmpnmpGVxXrpGhcRARgbA/PmaToShmEYtVPlmnoMIaQbgL2QJqKpAPCXWqPqIgQC6agvoyGPHwOHDwNz5gDdumk6GoZhGLVrsVEnhBAAn1FKSwF8Qwi5CMCcUnq3Q6Lr5AQCYMQITUfxHNu3T1rAZVmrpQoYhmG6hBYbdUopJYRcAOAre/6gI4LqCoRC6YMNv2tIfT2wc6f0U5WvykkKGYZhOjVVrqnfIoS8oPZIuhj5zHeWTU5Dzp0DHjxg1dgYhnmuqHJNfSCAWYSQLACVkBZ2oZRSP7VG1smx29k0LCJCWh5vUuPCgAzDMF2XKo36eLVH0QWxRl2DkpOBy5eBjRulxewZhmGeE60Ov1NKsyC9hW2U7PsqVfZ73mVlAfr6gK2tpiN5Dn39NWBgALzxhqYjYRiG6VCtNs6yjHLv4t+CKnoADqkzqK5AIACcnVm+kw5XVgYcOABMnw7Y2Gg6GoZhmA6lSpMTAiAY0uvpoJTmAjBTZ1BdgUDAht414sABoKKCTZBjGOa5pEqjXicrvEIBgBBiot6QugaWTU4DJBLpBLlBgwB/f01HwzAM0+FUmUV0jBCyG0A3QsgbAOZBml2OaYZIJC0Kxm5n62C//gqkpQGH2NUhhmGeT6026pTSrYSQsQDKAHgC+JhS+qvaI+vEcnOlnUbWU+9gERHSmYmvvKLpSBiGYTRCpft9ZI04a8hVxOqoa8C9e8D588CHH0pvO2AYhnkOqTL7fQohJJ0QIiSElBFCygkhZR0RXGfFsslpwM6dgI4OsHixpiNhGIbRGFV66v8F8DKlNFndwXQV8kbdmRWo7RiVlcB33wGhoYCDg6ajYRiG0RhVZr/nP22DTggJIoSkEkIyCCHvKVn/OiGkkBASJ3ssUFhXr7D8zNO8vqYIBNJbpI2MNB3Jc+KHH4DSUnYbG8Mwzz1V66kfBXAaQK18IaX0ZEs7EUJ0AHwNYCyAbAA3CSFnKKVJjTY9SilV9t+4mlLaV4X4tA67na0DUQrs2AH07QsMGaLpaBiGYTRKlUbdHNLUsOMUllEALTbqAAIAZFBK7wEAIeQIgEkAGjfqXY5AAPB4mo7iOXH1KhAfD3z7LUCIpqNhGIbRKFVuaZv7lMd2BPBQ4Xk2pBXfGgslhLwIIA3AKkqpfB9DQkgMADGAzZTS0413JIQsBLAQAHpqSdeYUmmjPm5c69sy7SAiArCyAmbO1HQkDMMwGqfK7HcuIeQ3QkiC7LkfIeTDdnr9swB6ycq4/grggMI6F0qpP4CZALYTQtwa70wp3UMp9aeU+ttoSZ7vkhJpllIt+YzRtT18CJw6BcyfzyYwMAzDQLWJcnshLeYiAgD6/9u7/2iryjqP4+8PKJCCgwaGgs3FQs1KTS/YL5nWSk1LwdLK0hUsLGIp1RonZ2rV1KR/TL+Ws5oLEmBkZqn5YymOOGpNpjkJXFMxTSZU7gW0QH6JCsiF7/yx92WdbvfHPvecffc5535ea511795n732/z4V1v+fZ+3m+T8Qq4MIM520gWd2t04R0334RsTkiOp/TXwucUvLehvTrc8ADwLsy/MzCeTrbAFq4MKnyc+mlRUdiZlYTsiT1gyJiRZd9HRnOWwlMkjRR0jCSDwJ/NYpd0hElm9OAP6b7D5U0PP1+DPA+6uRZvNdRHyC7dsGiRXDuudDUVHQ0ZmY1IctAuZfSW9+dC7pcALzY10kR0SFpLnAvMBRYEhFPSboSaI2IpcAXJU0j+ZCwBZiZnv42YKGkfSQfPL7dzaj5muRqcgPklltg0yb4wheKjsTMrGYoWYCtlwOko4FFwHuBrcDzwMURsTb36MrQ3Nwcra2tRYfBFVckY7dee82DsXM1ZQrs2AFPP+1f9ACS9Gg61sXMalCW0e/PAaenS64OiYgd+YdVvzrXUXeeydHy5bByZfLpyb9oM7P9+kzqki7vsg2wHXg0Ih7PKa661ZnULUfz5sGoUfCZzxQdiZlZTckyUK4ZmEMy73w88HngLGCxpH/OMba65GpyOfvLX+Dmm2HmzCSxm5nZflkGyk0ATo6IVwAkfRO4G5gKPEqy4IsBu3fDiy96OluuFi+GPXvgssuKjsTMrOZk6akfTknNd5L56m+KiJ1d9g96G9JZ+O6p52TPHliwICnXd+yxRUdjZlZzsvTUfwYsl3Rnun0u8PN04FxdTDMbKJ7OlrM77oAXXkiKzpiZ2d/IMvr9Kkn3kBSAAZgTEZ1zxy7KLbI65GpyOWtpgaOPhrPPLjoSM7OalKWnTprEi58EXuM6k/qECcXG0ZCeeAIeegi+/30YOrToaMzMalKWZ+qWUVsbjBsHw4cXHUkDmjcvWbRl1qyiIzEzq1lO6lXU3u5b77nYsgV+9jO4+GI49NCiozEzq1lO6lXkwjM5WbIEdu6EuXOLjsTMrKY5qVdJhJN6LvbuhfnzYepUOOGEoqMxM6tpTupV8tJLSWfSSb3Kli2DtWu9GpuZWQZO6lXi6Ww5aWlJphOcd17RkZiZ1Twn9SrpTOruqVfRM8/A/ffDnDlwQKbZl2Zmg5qTepW4mlwO5s+HYcPgc58rOhIzs7rgpF4l7e1w8MFw2GFFR9IgXn4ZrrsOPvlJOPzwoqMxM6sLTupV0jnyPVlu3ip2/fXwyiseIGdmVoZck7qksyStlrRG0le6eX+mpE2SHk9fny15b4akP6WvGXnGWQ1eR72K9u1LKshNmQKTJxcdjZlZ3cgtqUsaCswHzgaOBz4l6fhuDr05Ik5KX9em5x4GfBM4FZgCfFNSTZcSczW5KvrlL2H1anjPe+CRR5IiAGZm1qc8e+pTgDUR8VxEvA7cBEzPeO6HgPsjYktEbAXuB87KKc6K7dwJGze6p14Vy5bB9OnJc4wlS+CMM5Jf7LJlRUdmZlbz8kzq44F1Jdvr031dnS9plaRbJR1VzrmSZktqldS6adOmasVdtvXrk69O6hVatgw+9jHYtSvpne/YkTxXX78eLrjAid3MrA9FD5S7C2iKiBNIeuM/KefkiFgUEc0R0Tx27NhcAszC09mqIAJmz4bdu7t/f+dO+PznfSvezKwXeSb1DcBRJdsT0n37RcTmiOj8K34tcErWc2uJq8lVwfLlsG1b78ds2wYrVgxMPGZmdSjPpL4SmCRpoqRhwIXA0tIDJB1RsjkN+GP6/b3AmZIOTQfInZnuq0nt7ckj4PHdPVywbF58MVm8pTdDhsALLwxMPGZmdSi32psR0SFpLkkyHgosiYinJF0JtEbEUuCLkqYBHcAWYGZ67hZJV5F8MAC4MiK25BVrpdra4Mgj4cADi46kjo0bB6+/3vsx+/Ylv2gzM+tWrgW1I2IZsKzLvm+UfP9V4Ks9nLsEWJJnfNXi6WxVsGdPkrR7M3p0MnfdzMy6VfRAuYbgddSrYN48GDkSRozo/v03vAEWLnTJPjOzXjipV2jfPli3zkm9IuvXw+23J6ux3XZbstTqyJFwyCHJ1wkT4NZb4cMfLjpSM7Oa5vUsK7RxYzILy0m9AgsXJp+OLr0UJk5Mbn2sWJEMijvyyOSWu3voZmZ9clKvkKezVWj3bli0CM45J0nokCTwU08tNi4zszrk2+8V6kzq7qn30y23JLc7vBqbmVnFnNQr5GpyFWppgWOPhQ9+sOhIzMzqnpN6hdrbk/Fco0cXHUkdWrEiec2dmxSWMTOzivgvaYU8na0C8+bBqFEwY0bRkZiZNQQn9Qq1tTmp98vGjXDzzUlCHzWq6GjMzBqCk3qFXE2unxYvTsrCXnZZ0ZGYmTUMJ/UKvPoqbN7snnrZ9uyBBQvgjDPguOOKjsbMrGF4nnoF1q1Lvjqpl+nOO2HDhiSxm5lZ1binXgFPZ+unlhZoanLZVzOzKnNSr4CryfXDqlXw4IPJs/ShQ4uOxsysoTipV6C9PclLRxxRdCR1ZN68ZMW1WbOKjsTMrOE4qVegrQ3Gj4cDPDIhmy1b4IYb4KKL4LDDio7GzKzhOKlXwNPZyvTjH8POnUkFOTMzqzon9Qq4mlwZ9u6F+fPhtNPgxBOLjsbMrCHlmtQlnSVptaQ1kr7Sy3HnSwpJzel2k6Sdkh5PXz/MM87+2Ls3mdLmpJ7RPffA8897NTYzsxzl9jRY0lBgPnAGsB5YKWlpRDzd5bhRwJeA5V0u8WxEnJRXfJX685+ho8O33zNraUkGIJx3XtGRmJk1rDx76lOANRHxXES8DtwETO/muKuA7wC7coyl6ryOehlWr4b77oM5c+DAA4uOxsysYeWZ1McD60q216f79pN0MnBURNzdzfkTJT0m6TeSTssxzn5xUi/D/PkwbBjMnl10JGZmDa2wyViShgBXAzO7eftF4M0RsVnSKcAdkt4eES93ucZsYDbAmwc4u7qaXEY7dsB118EnPgGHH150NGZmDS3PnvoG4KiS7Qnpvk6jgHcAD0haC7wbWCqpOSJ2R8RmgIh4FHgWOKbrD4iIRRHRHBHNY8eOzakZ3Wtvh0MP9aqhfbr++iSxe4CcmVnu8kzqK4FJkiZKGgZcCCztfDMitkfEmIhoiogm4BFgWkS0ShqbDrRD0tHAJOC5HGMtm6ezZRCRVJCbPBmmTCk6GjOzhpfb7feI6JA0F7gXGAosiYinJF0JtEbE0l5OnwpcKWkPsA+YExFb8oq1P9raPPK9T7/6FTzzTNJbNzOz3OX6TD0ilgHLuuz7Rg/HfqDk+9uA2/KMrVLt7TB1atFR1LiWFhg7NnmebmZmuXNFuX54+WXYts2333u1di3cdVcy4n348KKjMTMbFJzU+8HT2TK45hoYMiSZm25mZgPCSb0fvI56H157Da69Fj76UZgwoehozMwGDSf1fnBPvQ833ghbt3oam5nZAHNS74f29qTa6bhxRUdSgyKSAXLvfGeyIpuZmQ2YwirK1bO2tuSu8hB/JPpbDz8MTzwBixaBVHQ0ZmaDitNSP7S3+3l6j1paYPRo+PSni47EzGzQcVLvB1eT68GGDXD77XDJJXDwwUVHY2Y26Dipl6mjI8ldTurdWLgQ9u6FSy8tOhIzs0HJSb1ML7yQ5C3ffu9i9+4kqX/kI3D00UVHY2Y2KDmpl8nT2Xpw662wcaOnsZmZFchJvUxeR70HLS1wzDFw+ulFR2JmNmg5qZfJPfVurFwJy5fD3Lme52dmViD/BS5TezuMGQMHHVR0JDVk3jwYORJmzCg6EjOzQc1JvUyeztbFpk1w001JQj/kkKKjMTMb1JzUy9TW5qT+VxYvhtdfT269m5lZoZzUyxCRJHVPZ0t1dMCCBcnguOOOKzoaM7NBz7Xfy7B9O7zyinvq+915J6xfD/PnFx2JmZnhnnpZPJ2ti5YWaGpKCs6YmVnhck3qks6StFrSGklf6eW48yWFpOaSfV9Nz1st6UN5xplV53Q2334HnnwSfvObpCTs0KFFR2NmZuR4+13SUGA+cAawHlgpaWlEPN3luFHAl4DlJfuOBy4E3g4cCfxS0jERsTeveLPwHPUS8+bBiBEwa1bRkZiZWSrPnvoUYE1EPBcRrwM3AdO7Oe4q4DvArpJ904GbImJ3RDwPrEmvV6j2dhg+HMaOLTqSgm3dCjfcABddBG98Y9HRmJlZKs+BcuOBdSXb64FTSw+QdDJwVETcLemKLuc+0uXc8V1/gKTZwOx0c7ekP1Qj8L4UcLd5DPDSgP/UvvzoR8mr/2qzXdXRqG07tugAzKxnhY1+lzQEuBqY2d9rRMQiYFF6vdaIaO7jlLrUqG1r1HZB47ZNUmvRMZhZz/JM6huAo0q2J6T7Oo0C3gE8IAlgHLBU0rQM55qZmVkXeT5TXwlMkjRR0jCSgW9LO9+MiO0RMSYimiKiieR2+7SIaE2Pu1DScEkTgUnAihxjNTMzq3u59dQjokPSXOBeYCiwJCKeknQl0BoRS3s59ylJvwCeBjqAyzKMfF9UrdhrUKO2rVHbBY3btkZtl1lDUEQUHYOZmZlVgSvKmZmZNQgndTMzswZRd0m9r9Kz6eC6m9P3l0tqGvgoy5ehXVMl/V5Sh6QLioixvzK07XJJT0taJelXkuqiEG+Gds2R9KSkxyX9Nq2UWBcqKfFsZsWpq6ReUnr2bOB44FPd/KG8BNgaEW8F/oOkWl1Ny9iudpI5/T8f2Ogqk7FtjwHNEXECcCvw3YGNsnwZ2/XziHhnRJxE0qarBzjMfsnYtm5LPJtZseoqqZOt9Ox04Cfp97cCH1Q6Eb6G9dmuiFgbEauAfUUEWIEsbft1RLyWbj5CUpeg1mVp18slmwcD9TIqtZISz2ZWoHpL6t2Vnu1aPnb/MRHRAWwHar1AeZZ21aty23YJcE+uEVVHpnZJukzSsyQ99S8OUGyV6rNtpSWeBzIwM+tdvSV1a2CSLgaage8VHUu1RMT8iHgL8C/A14uOpxpKSjz/U9GxmNlfq7eknqV87P5jJB0A/B2weUCi679GLoubqW2STge+RlJVcPcAxVaJcv/NbgLOyzWi6imnxPNa4N0kJZ49WM6sYPWW1HstPZtaCsxIv78A+J+o/Qo7WdpVr/psm6R3AQtJEvrGAmLsjyztmlSy+RHgTwMYXyUqKfFsZgWqq6SePiPvLD37R+AXnaVn04VgAH4EvFHSGuByoMfpOLUiS7skTZa0Hvg4sFDSU8VFnF3Gf7PvASOBW9LpXzX/gSZju+ZKekrS4yT/F2f0cLmakrFtZlaDXCbWzMysQdRVT93MzMx65qRuZmbWIJzUzczMGoSTupmZWYNwUjczM2sQTuo24CQ90FmoRNIySaMrvN4HJP1XD+/dmK7+9o+V/Awzs3pwQNEBWONJF9BRRPS5+ExEfDjHOMYBk9MV+7Kec0A6T9vMrO64pz5ISPrXdH3s36a91y+n+98i6b8lPSrpIUnHpfuvk/Sfkv5X0nOla7hLukLSyrQH/K10X1N6/euBPwBHSVogqTUtwPKtHuJaK2lMuvb44+nreUm/Tt8/U9LvlKwlf4ukken+syQ9I+n3wMd6aPZ9wPj0mqeldwh+kG7/QdKU9Fr/Jumnkh4GflqN37eZWRGc1AcBSZOB84ETSdbILq3RvQj4QkScAnwZuKbkvSOA9wPnAN9Or3UmMIlkec6TgFMkTU2PnwRcExFvj4g24GsR0QycAPyDpBN6ijEifpiuOz6ZZFWwqyWNIVkE5fSIOBloBS6XNAJYDJwLnAKM6+Gy04BnI+KkiHgo3XdQ+nMuBZaUHHt8+nM+1VOMZma1zrffB4f3AXdGxC5gl6S7ANJe73tJyrN2Hju85Lw70lvoT0t6U7rvzPT1WLo9kiSZtwNtEfFIyfmfkDSb5P/ZESSJc1Ufsf6ApF7/XZLOSc95OI1vGPA74Djg+Yj4U9qOG4DZGX8XNwJExIOSDil5nr80InZmvIaZWU1yUh/chgDb0p5rd0pXS1PJ13+PiIWlB0pqAl4t2Z5I0vOfHBFbJV0HjOgtGEkzgb8nqTve+bPu79p7ltRTvFl0rYvcuf1q1wPNzOqNb78PDg8D50oakfbOzwGIiJeB5yV9HJIBbpJO7ONa9wKzSp5tj5d0eDfHHUKSKLenvfyze7uopM7b/xeXDLB7BHifpLemxxws6RjgGaBJ0lvS48q5Zf7J9FrvB7ZHxPYyzjUzq2nuqQ8CEbEyXflsFfAX4EmgM5ldBCyQ9HXgQJJ1v5/o5Vr3SXob8Lv0lvgrwMXA3i7HPSHpMZIEvI7kg0Vv5gKHAb9Or9saEZ9Ne+83Sup8LPD1iPi/9Lb+3ZJeAx4iWeM7i11pXAcCszKeY2ZWF7xK2yAhaWREvCLpIOBBYHZE/L7ouAaSpAeAL3vdbzNrVO6pDx6LJB1P8lz7J4MtoZuZDQbuqZuZmTUID5QzMzNrEE7qZmZmDcJJ3czMrEE4qZuZmTUIJ3UzM7MG8f+4tntQUvTdwQAAAABJRU5ErkJggg==\n",
-      "text/plain": [
-       "<Figure size 432x288 with 1 Axes>"
-      ]
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfUAAAEKCAYAAAALjMzdAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nOydd1gU1/7/32fpXTpSBAWWZSk2ghI1dsX8DBZUVG5sMbZ4NUZTboomxniTqxhDjFETG2q+9hi7NyaCSbwxgiJSlqIU6SCydNhlz++P3SULLLBI2QXP63nmWWbmzMxnhoXPnPZ+E0opGAwGg8Fg9Hw46g6AwWAwGAxG58CSOoPBYDAYvQSW1BkMBoPB6CWwpM5gMBgMRi+BJXUGg8FgMHoJLKkzGAwGg9FL6NKkTggJJIQkE0LSCCHvKdn/JSEkVrakEEJKFfYtJISkypaFXRkng8FgMBi9AdJV89QJIVoAUgBMBJAN4A6AeZTSxBbK/xPAYErpEkKIBYBoAH4AKIAYAEMppU+7JFgGg8FgMHoBXVlT9weQRil9RCmtA3AcwLRWys8D8H+ynycD+JlSWiJL5D8DCOzCWBkMBoPB6PFod+G5HQA8VljPBjBMWUFCiDOA/gB+beVYByXHLQOwDACMjIyG8ni8jketJvLygNxcYPBggMNGOjA0lJiYmGJKqbW642AwGMrpyqTeHuYCOE0prW/PQZTSfQD2AYCfnx+Njo7uiti6hddeAy5dAu7eVXckDEbLEEIy1R0Dg8Foma6sE+YAcFJYd5RtU8Zc/N303t5jewXp6UD//uqOgsFgMBg9ma5M6ncAuBNC+hNCdCFN3OebFiKE8ACYA/ifwuZrACYRQswJIeYAJsm29VpYUmcwGAxGR+mypE4pFQNYDWkyTgJwklKaQAjZTAgJUig6F8BxqjAMn1JaAuBTSF8M7gDYLNvWKxGLgcePWVJnMBgMRsfo0j51SullAJebbNvYZP3jFo49AOBAlwWnQTx+DNTXs6TO6L3ExMTYaGtrfw/AG0z0isF4ViQA4sVi8dKhQ4cWKiugKQPlnmvS06WfLKn3cqKjge+/Bz79FLB+vgaQa2trf29nZ+dpbW39lMPhdI04BoPRy5FIJKSoqIifn5//PYAgZWXYG7MGkJEh/WRJvZezbRtw/Digr6/uSNSBt7W1dRlL6AzGs8PhcKi1tbUQ0hYv5WW6MR5GC6SnS+emOzm1XZbRQ0lPB06fBpYvB0xM1B2NOuCwhM5gdBzZ31GLuZsldQ0gPR1wdAR0dNQdCaPL2LkT0NIC1qxRdyQMBqMXw5K6BsCms/VySkqkfenz5wMOzYQRGQwGo9NgSV0DYEm9l7NnD1BVBaxfr+5InnuOHDnShxAy9N69ew0DG5KTk3Xd3d29AODixYsmY8eOdevodYKDg10OHjxoDgAhISHOMTEx+gBgaGg4uCPnvXjxosnPP/9s1N7jHBwcfPLy8lQaGB0eHm65YMGCfu2PTjmjR492Ky4u1gKALVu22AwYMMArKCio/7Fjx8zef/99u866jhyJRILhw4dzS0pKOACgpaU1lMfj8eVLcnKybmdfU86zPrvc3FztUaNGuXdGDGz0u5qprpbqvrOk3kuprQXCw4HAQMDHR93RPPccP37cYsiQIRUREREWgwcPzu2Oa544caJd0roikQg6LfTF/frrrybGxsb1EydOrOyU4LqBqKioNPnP+/fvt75+/XqKq6urSLZJqOp5Wnsuipw8edLMy8ur2sLCQgIAenp6EoFAoNQdVFOwt7cX29raiv773/8aTZo0qUO/W5bU1Uym7M+dJfVeytGjQEEBsGGDuiPRGJYsgVN8PAw785ze3qg6cKCRCVQzhEIh586dO8bXr19PDgoKcv/yyy9VTupisRirVq1yvHHjhhkhhC5cuLD4gw8+KNywYUPfq1ev9qmtreX4+flVHDt2LJPTxJHJ39/fY/v27Y9feumlKgB47bXXnKKiokytra1FZ86ceWRvby/29/f38Pb2rvrrr7+Mg4ODSzw8PGo+//zzviKRiGNubi4+ceLEo6qqKk5ERIQ1h8OhJ0+etNy5c2eWr69vzeLFi51zcnJ0AWDHjh1ZkyZNqszPz9cKDg4eUFBQoDt06NCKliy2T58+bbpx40aH+vp6YmFhIf7f//6Xorj/hx9+MGsah5OTk/jSpUvG69ev7wcAhBDcunVLUFZWphUcHDygoqJCq76+nnz99deZgYGBFQ4ODj7R0dFJ69evt8/OztabMmWKe2hoaLG5uXl9dHS0UURERFZubq62svt466237B89eqSXlZWl5+DgULtp06a8xYsX9xeJREQikeDMmTMPfXx8ahVjPnbsmMXy5cuL2/p9vvHGG45//PGHSV1dHXn99dcL33777eKLFy+afPLJJ/ampqbi5ORkw6CgoBIfH5/q3bt329bW1pIff/zxoZeXV21Lz0XxGi3dk7JnZ25uLpk+fXppRESEZUeTOmt+VzNsOlsvRiIBwsKAQYOAcePUHc1zzw8//NBnzJgxQl9f31pzc3Pxb7/9pvKLRVhYmHVWVpZuYmJiQkpKSuLSpUufAMDbb79dGB8fn5SamppQXV3NOX78uFlr56murub4+flVpqWlJYwYMaL8vffes5fvq6urI/Hx8UmffPJJwcSJEytiY2MFSUlJibNmzSrZvHmznYeHR92CBQuKVqxYUSAQCBIDAwMrli9f7vTWW28VxMfHJ/34448PV6xY4QIA7733nn1AQEBFWlpawowZM0rz8vKaNTnn5uZqr1692uXs2bMPk5OTE8+dO/ewaRllccieh114eHimQCBI/PPPPwXGxsaSAwcOWIwfP14oEAgSk5KSEoYNG1bV5Pln2djYiKKiolI2bdrUSDilpfsAgNTUVP2bN28mX7hwIf3rr7+2XrVqVYFAIEiMi4tL6t+/f13TmGNiYoxHjBjRkBhra2s58qb3iRMnugLAzp07rczMzOrj4+OT7t+/n3T48GFrgUCgCwACgcDgwIEDWampqfGnT5+2TElJ0X/w4EHSq6++WhwWFmbT2nNR5Z6UPTsAGDFiROVff/1lrORr0y5YTV3NyIVnXFzUGgajK7hyBUhKAo4dAwhRdzQaQ1s16q7i5MmTFmvWrCkEgODg4JIjR45YjBo1qqqt4wDg119/NV2xYkWRvPnX1ta2HgCuXLlismPHDruamhpOaWmpNp/Pr0YrTcocDgdLly4tAYAlS5Y8mTlzZkP//bx58xqksNPT03WnT5/uWFRUpFNXV8dxcnKqVXa+P/74wzQ1NdVAvl5RUaElFAo5f/75p8nZs2fTAGDu3LnC5cuXN3PAjIyMNPL39y/n8Xh1ivekSEtxDB8+vGLDhg1Oc+bMKZk3b95TV1dXyfDhwyuXL1/uIhKJOLNmzXr64osvVrf+VNu+DwAIDAwsNTY2pgAQEBBQuX379r7Z2dm6c+fOfdq0lg4AQqFQ29zcXCJfV9b8fv36dVOBQGB4/vx5cwAoLy/XSkxM1NfV1aU+Pj6Vzs7OIgDo169f7ZQpU4QAMHDgwOqoqCiT1p6LKvek7NkB0ib4wsLCDvf3s5q6mklPB3R1AXv7tssyehjbtknFB2bPVnckzz0FBQVaf/75p8kbb7zh7ODg4LNr1y67CxcumEskkrYPboGqqiqyfv1657Nnzz5MSUlJ/Mc//lFcU1PTrv+pROFlz8TEpCGY1atX91u1alVhSkpK4q5duzJra2uVnpdSirt37yYJBIJEgUCQWFhYGGdmZvbsN9WEluLYunVr/vfff59ZXV3NGTVqFO/evXv6U6ZMqbh582ayg4ND3ZIlS/rv2rXLUtXrtHYfRkZGDfezYsWKkp9++inNwMBAMnXqVPfz5883E33Q0tKi9fWtu3hTSklYWFiW/Ho5OTkPZs6cWQYAenp6DX0VHA4H+vr6VP5zfX09ae25qHJPyp4dIP0+6enpdfh3x5K6mklPB5ydpeIzjF7EnTtAVBSwbh0TINAAjhw5Yj5jxoyS3NzcBzk5OQ/y8/PjHB0d665du6ZSc+f48ePL9u7dayUSScd3FRQUaFVVVXEAwM7OTiwUCjkXLlwwb+s8EokE8lHxhw4dsvT39y9XVq68vFyrX79+Ink5+XYTE5P68vJyLfn6yJEjy/7973/byNdv3bplAADDhw8vlx938uRJ07KyMi00YcyYMZV//fWXibzZuaCgoFmZluJISEjQ8/f3r/7ss8/yfX19K+Pj4/VTUlJ0HR0dRevXry9esGBB0d27d1Xu3mjpPpqSmJio6+npWfvhhx8WTp48uTQ2NrZZuf79+9ckJSXptXa9iRMnCr/99lvr2tpaAgBxcXF6ZWVlKv8Xbum5qHJPyp4dAMTHx+tzuVyVWzdagqUSNcOms/VStm8HzMyApUvVHQkDwKlTpyxmzpz5VHHbtGnTnh49etRClePXrVtX5OjoWMfj8bw8PDz4+/fvt7CysqoPDQ0t8vT09Bo7dix34MCBbQ5wMjAwkPz1119G7u7uXjdv3jT597//naes3AcffJA7b948Vy8vL09LS8uGAVjBwcGlly5d6sPj8fhXr1413rdv3+O7d+8acblcvqurq9euXbusAeDzzz/P/eOPP4zd3Ny8zp49a963b99mfc/29vbi8PDwjBkzZrh5eHjwZ8yYMUDVOP7zn//YuLu7e3G5XL6Ojg6dNWuW8Nq1ayaenp5enp6e/DNnzli88847Bao8WwBo6T6acvToUQsul+vF4/H4SUlJBsuXL3/StMykSZOE//3vf1uVbVy3bl0xj8er8fHx8XR3d/d6/fXXnUUikcp9ZC09F1XuSdmzA4Cff/7ZJDAwUOXZAC1BWhoV2dPw8/Oj0dHR6g6j3VhaSltn9+xRdySMTiM9HXBzk454/+ILdUfTqRBCYiilfu097v79+xkDBw5sdUQyg9EZZGZm6sybN8/l1q1bqeqOpT34+fl5XLlyJc3a2rr1vgMA9+/ftxo4cKCLsn2spq5GysqkYmOspt7LYJKwDIbacHZ2Fi1ZsqRYLj7TE8jNzdVeu3ZtgSoJvS3Y6Hc1wqaz9UKYJCyDoXaWLl36tO1SmoO9vb341VdfLe2Mc/WYN5neCJvO1gthkrAMBkONsKSuRuRJndXUewlMEpbBYKiZLk3qhJBAQkgyISSNEPJeC2XmEEISCSEJhJAfFLbXE0JiZcv5roxTXaSnA0ZGgJWVuiNhdApMEpbBYKiZLutTJ4RoAfgGwEQA2QDuEELOU0oTFcq4A/gXgBGU0qeEEBuFU1RTSgd1VXyagHw6GxMb6wUwSVgGg6EBdGVN3R9AGqX0EaW0DsBxANOalHkdwDeU0qcAQCktxHMEm6Pei5BLwr79NntL02CY9Wrb9DbrVULI0GnTpjX8pxWJRDA3Nx/Y1u/5Wb8LNTU1xM/Pz0MuVNTddOXodwegkcZzNoBhTcpwAYAQ8gcALQAfU0qvyvbpE0KiAYgBfE4pPdeFsXY7lEqTOqvU9RKYJGyPgFmvdj/qtl41MDCQJCcnG1RUVBBjY2P6448/mtra2nZZxtXX16ejR48u+/777y1WrlxZ0vYRnYu6B8ppA3AHMAbAPADfEUL6yPY5y0Qu5gPYSQhxbXowIWQZISSaEBJdVFTUXTF3Ck+eAJWVrKbeK2CSsO1jyRIn+Pt7dOqyZIlTW5eVW68ePHgw48cff1RJSU6OWCzGsmXLHOVKYJ999pkNAGzYsKGvt7e3p7u7u9e8efOclWnJ+/v7e9y8ebNBMvW1115zcnNz8woICODm5uZqy8ssWbLEydvb23PLli22P/zwg5mvry/P09OT/+KLL3IfP36snZycrBsREWG9Z88eW7miXG5urvbkyZNdvb29Pb29vT3/+9//GgFAfn6+1ogRI9zd3Ny8QkJCnFuzXuXz+Z4eHh78gIAAbtP9yuIAgEuXLhnLnc88PT35T58+5WRmZur4+fl58Hg8vru7u9fVq1eNgb9bCebPn99Pbr36ySef2Ci2CLR0H2+99Zb99OnT+w8ZMoQ3c+bM/tHR0fo+Pj6ePB6Pz+Vy+Q8ePGgmB3vs2DGLGTNmNJoeNmHCBOGpU6f6AMD//d//WQQHBzck2xs3bhgOGjSI5+npyR88eDDv/v37zc5ZVlbGmT17touPj4+np6cn/+jRo30AoKV4Zs2aVXr8+PF2fcc6i65M6jkAFP/QHGXbFMkGcJ5SKqKUpgNIgTTJg1KaI/t8BCASQLNmK0rpPkqpH6XUz9paqaqgxsJGvvcimCRsj4BZrzbmebFeBYBXX3215MSJE+ZVVVUkKSnJMCAgoGH/wIEDa+7cuSNISkpK3LRpU84777zj2PSc77//ft+xY8eWPXjwIOm3335L/vDDDx3Lyso4LcXzwgsvVMfFxbW7m6Qz6Mrm9zsA3Akh/SFN5nMhrXUrcg7SGvpBQogVpM3xjwgh5gCqKKW1su0jAPynC2Ptdtgc9V5Cejpw+rR0xLtJq3LTDDkHDjDrVTDrVVXuA+i49SoADBs2rDo7O1vvu+++s5gwYUKj309JSYlWSEhI/4yMDH1CCFWmAR8ZGWl67dq1PuHh4XYAUFtbS9LS0nRbikdbWxs6Ojr06dOnnKaxdDVdVlOnlIoBrAZwDUASgJOU0gRCyGZCSJCs2DUATwghiQBuAHibUvoEgCeAaELIfdn2zxVHzfcGWE29l8AkYXsEzHr12ehN1quBgYGlmzZtclqwYEGjfu53333XYfTo0eWpqakJFy5cSKurq1Nqo3r69Ok0eXx5eXkPhgwZUtNaPCKRiBgaGna7uUqX9qlTSi9TSrmUUldK6WeybRsppedlP1NK6VuUUj6l1IdSely2/ZZsfaDsc39XxqkO0tMBCwvA1FTdkTCeGSYJ22Ng1qvMenXlypXFGzZsyPX392/UglBWVqbl6OhYBwB79+5VqhoyduzYsrCwMFv5S+Aff/xh0Fo8+fn5Wn369BErerN3F+oeKPfcwqaz9QKYJGyPgVmvMutVV1dX0Ycffths2vS7776b//HHHzt6enryxWKlLqr4/PPPc8ViMeHxeHw3NzevDz/80KG1eK5cuWLatJm/u2DWq2qCywUGDgROnVJ3JIxnorYWcHYGBg+WzlF/TmDWqwxNRxOsVydNmuS6ffv2bF9fX6VjIToKs17VMCQSIDOT1dR7NEwSlsHQSNRtvVpTU0OCgoJKuyqhtwWzXlUDeXlAXR1L6j0WJgnLYGg06rRe1dfXp6tXr27WLdBdsKSuBth0th6OXBL22DEmCctgMDQK1vyuBth0th4Ok4RlMBgaCkvqaoDV1HswTBKWwWBoMCypq4H0dKBvX0Bfv+2yDA2DScJ2CxIJ8MsvMIqIQJ9ffoFRBzRiGnj48KHO+PHjXZ2dnb2dnJy8Fy9e7FRTU6O0/yQjI0MnMDCw2RSvpig6kLWXt956y37jxo22qpbvqMObIv/5z3+s5eIw9+7d05druCckJOgNHjyY19HzBwYGDkhMTNQFpNrvXC6XL9eKfxaXOVXpqc5qnQlL6mqAzVHvocglYZcvZ5KwXciJEzCzt4dvUBC4q1bB5ZVXwLW3h++JE2hVV701JBIJpk+f7hYUFFSamZkZn56eHl9ZWclZu3ZtM9UgkUgEFxcX0dWrVx+1dd6oqKg0Kyur5vJlGs4777xTJB/MderUqT5BQUFPk5KSEr28vGrv3bsnUPU8EokETdXboqOj9evr6wmfz2+YGx8VFZUiV2PTRIc5RWc1dcfSUVhSVwMZGSyp90iYJGyXc+IEzBYuxICCAuhUVYFTWQmt6mpwCgqgs3AhBjxrYr9w4YKJnp6eZO3atU8AqTb3nj17Hp84ccKqvLycEx4ebjlu3Di34cOHc1988UUPRY/18vJyzssvvzzA1dXVa+LEia6+vr48ueua3IEsOTlZd8CAAV5z5851dnNz8xoxYoR7RUUFAYCwsDArb29vTw8PD/7kyZNdy8vLW/2/+/jxY+2JEye6enh48D08PJrVbIVCIScgIIDL5/M9uVxug2NYWVkZZ8yYMW4eHh58d3d3r++++84cAFatWuXg6urqxeVy+cuWLXME/m4lOHHihNm+fftsDx06ZD1s2DAu0LhF4KOPPrL19vb25HK5/HXr1tkDUv95FxcX7xkzZrhwuVyvhw8fNjKLOXTokOUrr7zSyCVNGS2du3///l7BwcEuLi4u3kFBQf3PnTtnMmTIEJ6zs7P3jRs3DIHe56zWmbDR792MSAQ8fsySeo+DScJ2ORIJsGYNnGtrlVc2amvBWbsWzrNnI47TzurIgwcPDAYOHNjIvMXCwkLSt2/fusTERD0ASEhIMIyLi0uwtbWtT05ObkhU27Zts+7Tp0/9w4cPE+7cuaMfEBDgpewaWVlZ+kePHn304osvZr788ssDIiIizFetWlUSGhr6dP369cUAsGbNGvvw8HCrDz74oJmymZwVK1b0GzVqVPnGjRsfisViCIXCRs37hoaGkkuXLqVZWFhI8vLytIcNG8abP39+6dmzZ03t7OxEkZGRaQDw5MkTrfz8fK3Lly+bP3r0KJ7D4aBpV0FISIjw9u3bRcbGxvWbN29upAB39uxZ07S0NP24uLgkSikmTJjgduXKFeMBAwbUZWVl6e3fvz99/PjxGU3jv337tnFTffXRo0dzORwOdHV1JXFxcYLWzv348WP9EydOPBo6dGiGr6+v57Fjxyyjo6MFP/zwQ5/PPvus79ixYx/KndV0dHRw7tw5k3feecfx2rVrjVzm5M5qp06dyiguLtby8/PzDAoKKpM7q61cubKkpqaGyFXk1Oms1pmwpN7NPH4s/efFBsn1MJgkbJdz4waMKirQav90eTm0IiNhNG4cOr0Jd9SoUWXKnMpu3bplvHbt2kIAeOGFF2q4XK5SZzcHB4dauTPZ4MGDqzIyMvQAICYmxmDjxo0O5eXlWpWVlVqjR49uVT701q1bJqdPn04HpC0KlpaWjWKSSCTkzTffdPzzzz+NORwOCgsLdbOzs7WHDBlS/cEHHzitXLnSYdq0acLAwMAKkUgEPT09SUhIiMvUqVNLQ0JCVJYuvXr1qunNmzdN+Xw+HwCqqqo4AoFAf8CAAXV9+/atGz9+vNLfQVFRkY6dnV2jzumoqKiUvn37NmiwtnZuBweHWrk+O5fLrR43blwZh8PBkCFDqrZs2WIP9D5ntc6ENb93M2w6Ww+kthYIDwcCAwEfH3VH02vJyYEOIWhVt5oQ0OxstHvagbe3d/X9+/cbGYyUlJRw8vLydPl8fi0grQG397yK6OrqNsSupaVFxWIxAYBly5b137VrV1ZKSkriu+++m9uS45qq7N271+LJkyfaDx48SBIIBImWlpai6upqjq+vb+3du3cTfXx8qj/66COHDRs29NXR0UFsbGzSrFmznl68eLHPmDFj3FW9DqUUb775Zp68LzwrKyt+3bp1xUDrz0pPT09SXV3d6j22dm7F58jhcKCvr08BQEtLC/X19QTofc5qnQlL6t0MS+o9ECYJ2y04OEAkkaBVNR9KQRwd0e4hykFBQeU1NTUc+YhvsViMVatWOc2ePbtY0fJUGQEBARXHjx83B4CYmBj9lJQUpQ5iLVFVVcXp16+fqLa2lqjSZztixIjybdu2WcvjfPLkSaPWC6FQqGVlZSXS09OjFy5cMMnNzdUFpCP2TUxMJKtWrSp566238mNjYw2FQiFHVqsV7tmz57FAIFDZOW3KlCllR44csZL7mqenp+vk5OS02brr7u6u1CWtM84tp7c5q3UmrPm9m0lPl461cnJSdyQMlWCSsN3G2LGoNDFBfXV1y5UNExPUjxnT/qZ3DoeDc+fOpS1btsx527ZtfSUSCcaNGycMDw/PaevYt99+u2jOnDkurq6uXq6urjVubm415ubmKo94f++993L9/f09LSwsxEOGDKmoqKhotYvh22+/zVq0aJEzl8u14nA42LVrV+aECRMa7nnp0qUlU6ZMceNyuXxfX9+q/v371wDSZv5//etfjhwOB9ra2nT37t2ZpaWlWlOnTnWrra0lAPDpp58+VjXumTNnliUkJOi/8MILPEBaOz927Fi6trZ2q0lvypQppb/++qvJ9OnTldrKduTcct599938pUuX9v/iiy/sJ06cqHRQ3ueff567bNmyfjwejy+RSIiTk1PtjRs30o4ePWpx8uRJS21tbWptbS369NNP8wD1Oqt1JsylrZuZPx/43//+rrEzNJxLl4CpU6WSsPPnqzsatdPVLm3y0e/KBsvp6UFy+DAehYSgW//xisVi1NXVEUNDQ5qQkKA3adIk7sOHD+PlzcKMxlRUVJARI0Z4xMTECLS1e069saud1TqT1lzaes4T7yWw6Ww9DCYJ263IEvajtWvhXF4OLUJAKQUxMUH9V18hs7sTOiCd0jZq1CgPkUhEKKX48ssvM1lCbxljY2O6cePG3PT0dF13d/dmPu6aiLqd1ToTltS7mfR04OWX1R0FQyXkkrA7djBJ2G4kJATC2bMRFxkJo+xs6Dg6QjRmDCrbO42tszA3N5fEx8cnqefqPZPg4OAydcfQHtTtrNaZsKTejVRXA/n5bDpbjyEsjEnCqgkOB+iKaWsMRm+nS999CSGBhJBkQkgaIeS9FsrMIYQkEkISCCE/KGxfSAhJlS0LuzLO7iIjQ/rJmt97AOnpwKlTTBKWwWD0KLqspk4I0QLwDYCJALIB3CGEnKeUJiqUcQfwLwAjKKVPCSE2su0WADYB8ANAAcTIjlWb8X1nwKaz9SCYJCyDweiBdGVN3R9AGqX0EaW0DsBxANOalHkdwDfyZE0plUsnTgbwM6W0RLbvZwCBXRhrt8CSeg+BScIyGIweSlcmdQcAinMis2XbFOEC4BJC/iCE/EkICWzHsSCELCOERBNCoouKijox9K4hPR3Q0wPs7NQdCaNVmCSs+pF6rxohIqIPfvnFCJ3gvcqsV/+mu61Xhw4d6qG4n8fj8eWGOS2haKrTXl588UVuUVHRM/1eejrqVpTTBuAOYAyAeQC+I4T0UfVgSuk+SqkfpdTP2tq6i0LsPDIypIPk1DWKl5SXMIYAACAASURBVKECTBJW/Zw4YQZ7e18EBXGxapULXnmFC3t7X5w4waxXO4nutl6trKzUSktL0wGAu3fv6nfSbbTIvHnznmzfvl3zk0IX0JXpJQeAom6ao2ybItkAzlNKRZTSdAApkCZ5VY7tcTAf9R4Ak4RVLydOmGHhwgEoKNBBVRUHlZVaqK7moKBABwsXDnjWxM6sV9VrvTp9+vSSiIgICwCIiIiwCA4ObnBxS05O1h06dKgHn8/35PP5nk3vF5AKAC1fvtxRHsu2bdusACAzM1PHz8/PQ17zv3r1qjEAzJ07t/Ts2bOWrT3n3kpXJvU7ANwJIf0JIboA5gI436TMOUhr6SCEWEHaHP8IwDUAkwgh5oQQcwCTZNt6NCypazhMEla9SL1XndGS4UltLQdr1zo/S1O8qtarP/3008M7d+4kK5ZTtF7dunVrTmJiolJ7zqysLP01a9YUpqWlJZiZmdVHRESYA0BoaOjT+Pj4pOTk5EQPD4/q8PBwpVrlcuTWq8nJyYkJCQmJQ4YMqVHcL7deTUxMTIqKikp5//33HSUSCeTWq8nJyYmpqakJM2fOLJNbr6ampiakpKQkbt26NU/xXCEhIcIFCxYUrVixouD27dspivsU7VGTkpISY2NjDa9cuWIsu1e91atXF6WlpSVwudxGAjO3b982Hj58eKNnPW/evKcXLlwwB4Br1671mTlzZkPSt7e3F//2228piYmJSSdOnHi0bt26fk2fyc6dO63MzMzq4+Pjk+7fv590+PBha4FAoHvgwAGL8ePHCwUCQWJSUlLCsGHDqgDA2tq6vq6ujuTn5z93TfBdNvqdUiomhKyGNBlrAThAKU0ghGwGEE0pPY+/k3cigHoAb1NKnwAAIeRTSF8MAGAzpbSk+VV6DkIh8PQpm6Ou0Vy5AiQlSSVhSau+Ioyu4MYNI7Shi47yci1ERhph3DhmvdqDrFdtbGzqzczMxPv27TN3c3OrNjY2bngzq6urI6+99ppzYmKiAYfDQWZmZjMzmOvXr5sKBALD8+fPmwNAeXm5VmJiov7w4cMrly9f7iISiTizZs16Kn/+AGBpaSnOysrStbOzq256vt5Ml/buUkovU0q5lFJXSulnsm0bZQkdVMpblFI+pdSHUnpc4dgDlFI32XKwK+PsDtjI9x4Ak4RVLzk5OiCkdflVQiiys5n1ag+0Xp01a9bTd955x3nevHmNKmifffaZrY2NjSgpKSnxwYMHiSKRSJmNKgkLC8uSx5KTk/Ng5syZZVOmTKm4efNmsoODQ92SJUv6ywf/AVL/9I7+TnsibMhWN8GSuoYjl4Rdt45JwqoLBwcRJJLWm0goJXB0ZNarPdB6NTQ09Okbb7yRP3PmzEYSskKhUKtv374iLS0t7N6927LpwDsAmDhxovDbb7+1lrvNxcXF6ZWVlXFSUlJ0HR0dRevXry9esGBB0d27dw0B6QC+oqIiHQ8Pjx6v5d5emExsN8GSuobDJGHVz9ixlTAxqYeSWl4DJib1GDOGWa/2QOtVc3NzyWeffZbftPybb75ZGBwc7Hr8+HHLcePGCQ0MDJq9ZK1bt644IyNDz8fHx5NSSiwsLESXL19+eO3aNZPw8HA7bW1tamhoWH/s2LF0APj9998NBw8eXKnzHL6gM+vVbmLNGuDQIWnfOuuu1TDS0wE3N+mI9y++UHc0Gk1XW682jH5X1kStpyfB4cOP0I5+4c6AWa+2D02wXl28eLHT9OnTS6dNm9aip3tPhlmvagDyke8soWsgTBJWc5Am7EdYu9YZ5eVaIISCUgITk3p89VVmdyd0gFmvthdNsF719vau7q0JvS1YUu8m5JVBhobBJGE1j5AQIWbPjkNkpBGys3Xg6CjCmDGV6lJtYtar7Ufd1qvr169vu1Wol8KSejdAqTSpT5ig7kgYzWCSsJqJ1HuVWa8yGO2EjX7vBoqKpHmDDZLTMJgkrMYjFqs7AgajZ8GSejfARr5rKEwSVqO5dw/6FhYYdP8+mk2PYjAYymFJvRtgSV0DYZKwGo1EAixeDJeKCmgtWgSXTjBpYzCeC1hS7wYyMqSfLKlrEHJJ2LffZlMSNJDDh2GekgIDSoHkZBhGREBl98aW0NLSGio3/pgyZcqAtoxVlLF582abZzmuJ9Beq9Pg4GCXgwcPmnfGtZta3b7yyiv9uVwu/5NPPrF588037c+dO2fSkfMfOXKkz4YNG/oCUjMbGxsbXx6Px+fxePxVq1Z16QhZuelPe49btmyZ4/nz59t932ygXDeQng5YWQHGxuqOhNEAk4TVWIRCcNatQ7/qammlo7oanDffhPPMmSgzNcUz19n19PQkAoEgEQCCgoL6h4WFWX/88ccF7TnH3r17bV9//fWStlToWkIikYBSCi2t585npFUUrW6zsrK079+/b5SVlRX/LOcSiURoKjqzY8cOu8uXL6fJ11esWFGwefPmdv3uu5sNGzYULl682DkoKKhdU/N65RunpsHc2TQMJgmr0WzYAPuamsb/m2pqwFm/HvaddY2RI0dWpKWl6QHAxx9/bOvu7u7l7u7utXnzZhtAuY3pli1bbAoLC3VGjx7NlduUKhIeHm45fvx4V39/fw9nZ2fv9evX9wWUW5Xu3bvXgsvl8t3d3b1WrlzZUFM8ffq0KZ/P9/Tw8OAHBARw5bHMnj3bxcfHx9PT07PBajU6Olrfx8fHk8fj8blcLv/Bgwd6Ldmv/vbbb4YvvPCCh5eXl+fIkSPdMzMzdeTb5RavO3bssGnpeX3wwQd2XC6X7+HhobRmu2HDhr7e3t6e7u7uXvPmzXOWyPpLtmzZYiO3fZ06deoAALh06ZKxvJbs6enJf/r0KUexlWDChAncwsJCXR6Px7969aqxYotAS/fh7+/vsWTJEidvb2/PLVu22CrGFhcXp6erqyvp27dvq8MuWzv3a6+95uTt7e05YMAAr6ioKMNJkya5Ojs7e69Zs6bhOzlhwgRXLy8vTzc3N6/t27crdeLbvXu3hfx3Nn/+fGexWAyxWIzg4GAXd3d3L3nrBABwudy60tJS7aysrPZVvimlLS6QuqvdaK2MpixDhw6lmoqbG6WzZ6s7CkYDISGUmplRWlam7kh6HJA6LLb77zM2NjaDUhrd1nL3Lo3X06MS6UTQxoueHpXExtIHqpxH2WJgYFBPKY2uq6uLHjdu3NPPP/888+bNm4nu7u5VQqHwbmlp6V1XV9fq33//PeHgwYNpISEhRfJji4uL71FKo+3t7Wtzc3NjlZ3/q6++SreysqrLy8u7V15eHuPm5lYdFRWVKBAI4ggh9Pr160mU0uj09PT7dnZ2tTk5ObF1dXXRw4YNK4uIiEjLycmJtbW1rUtKSoqjlEbn5+ffo5RGv/HGG3nffPPNI0ppdFFR0T1nZ+caoVB4d8GCBQW7d+9+RCmNrq6ujikvL49RFndNTU3MoEGDKnJycmIppdH79u17OGvWrGJKabS7u3vV5cuXBZTS6GXLluW7ublVN72vEydOpAwaNKiirKzsrmJcM2fOLD5w4MBDxW2U0uhp06Y9OXbsWCqlNNra2rquqqoqRh47pTR67NixpdeuXUuilEaXlpberaurixYIBHHyayv+rHid1u7jhRdeKA8NDS1U9nvZuXNn+tKlS/Pl6+vWrcu1trau8/DwqPLw8Kg6ffp0SlvnXrFiRR6lNHrz5s1Z1tbWdRkZGferqqpibGxs6vLy8u4pPgP5716+Xf6diYmJiR87dmxpTU1NDKU0OjQ0tPDrr79Ov3nzZmJAQIBQHp/8OVFKo0NCQooOHjyY1vSeZH9PSv/WWn0DoJTWE0IkhBAzSmm3Kzn1BurrgcxMYOZMdUfCACBtNjl1Sjri3aRD3XSMTkY+OE7Ugl2LSAQsWgSXmBgkP4sOTW1tLYfH4/EBYNiwYeVr164t3rZtm/XLL79campqKgGA//f//t/TGzdumAQFBQmb2piqco2RI0eW2dnZ1cvPFRkZaRwSElKqaFX6+++/Gw0fPrzc3t5eDAAhISElUVFRxlpaWtTf37+cx+PVAYDcBjYyMtL02rVrfcLDw+1k90HS0tJ0AwICKrdv3943Oztbd+7cuU99fHxqldmv3rlzRz81NdVg3LhxXOlzlsDa2lpUXFysVV5erjVlypQKAFiyZMmTX3/91azpPf3888+m//jHPxqMb5TZ0165csVkx44ddjU1NZzS0lJtPp9fDUDo4eFRPWPGjP5BQUGloaGhpQAwfPjwig0bNjjNmTOnZN68eU9dXV1V6sqIi4vTU3Yf8v1N3d/k5OXl6VhbWzeqpTdtfm/pGcn3z5gxoxQABg4cWO3m5lbt7OwsAgAnJ6faR48e6drZ2VV/8cUXtpcuXeoDAPn5+ToJCQn6dnZ2DVoLV69eNYmPjzccOHCgJwDU1NRwbGxsxCEhIaWPHz/WW7hwodMrr7winDFjRoNwj7W1tTgnJ0dXlecjR5VqfQWAB4SQnwE0BEgpZZqaKpCbK/1nxJrfNQQmCauxJCZCLz4eRi2NdJdIQB48gHFSEvS8vNBu9y3FPvW2kNuYnjlzxuyjjz5yuH79etn27dvzFMtERET02bp1qz0A7Nu3LwMASJNBl/L1jliAUkpx+vTptIEDBza65yFDhtSMGjWq8scffzSbOnWq+9dff50ZFBRU3jTuOXPmlLq5uVXHxsYKFI8vLi7ulI79qqoqsn79eufbt28nurm5id566y37mpoaDgDcuHEj9cqVKyY//fST2fbt2/smJycnbN26NX/69OnCn376yWzUqFG8S5cuparyfCilRNl9yGlpnIOBgYFEKBS2VYFt9dxyWWAOhwM9Pb0GiWAOhwOxWEwuXrxoEhUVZRIdHS0wMTGR+Pv7ezS1n6WUktmzZz/55ptvmpkIxcfHJ/7444+me/bssT5x4oTFqVOnMgCgpqaGKDO4aQ1V3nfPAvgIwE0AMQoLQwXYdDYNgknCajR8Pmq9vVHJ4UCprjqHA+rjgwpPz/Yn9JYYO3ZsxeXLl/uUl5dzysrKOJcvXzYfO3ZsuTIbUwAwMjKql1uRLliwoFTu7/3SSy9VAcDvv/9uWlBQoFVRUUEuX77cZ/To0c1q+KNGjaq8ffu2SV5enrZYLMapU6csxowZUzFmzJjKv/76y0QgEOgCQEFBgZYsxrKwsDBbeT/1H3/8YQAAiYmJup6enrUffvhh4eTJk0tjY2MNlMXt6+tbU1JSon39+nUjQFrTj46O1reysqo3MTGpv3btmjEAHDp0SKkt7OTJk8uOHj1qJR/1L49LTlVVFQcA7OzsxEKhkHPhwgVzAKivr8fDhw91X3nllfJvvvkmp6KiQksoFGolJCTo+fv7V3/22Wf5vr6+lfHx8fqq/K5auo+2jvPy8qp5+PBhq1oHz3puOaWlpVpmZmb1JiYmknv37unfv3/fqGmZwMDAsosXL5rL7WsLCgq0UlJSdPPy8rTr6+uxaNGi0n//+985Dx48aLDHffjwof7AgQOrVY0DUKGmTik93J4TMhrDprNpEEwSVqPhcICDB5EREAB+rZK0raMDHDqEjM6UgB85cmTV/PnznwwZMsQTAF599dWiESNGVJ85c8a0qY0pACxcuLA4MDCQa2trW3f79u2Upufz9fWtDAoKcs3Pz9edNWvWk5deeqkqOTm5UfOps7OzaNOmTTmjR4/mUkrJhAkTSv/xj3+UAkB4eHjGjBkz3CQSCSwtLUW3bt1K/fzzz3OXLVvWj8fj8SUSCXFycqq9ceNG2tGjRy1Onjxpqa2tTa2trUWffvpp3u+//27UNG59fX16/Pjxh2vWrOlXXl6uVV9fT1auXFng5+dXs3///oylS5e6EEIwZswYpXrts2bNKrt7967hoEGDPHV0dOiECROEu3btaqhtWllZ1YeGhhZ5enp6WVtbiwcOHFgJAGKxmMyfP79/eXm5FqWULF26tNDKyqp+/fr19rdu3TIlhFAPD4/qWbNmCbOystocsdrafbR23OTJkyvee+89J4lEAk4LX55nPbec4OBg4b59+6wHDBjgNWDAgBr5M1Bk6NChNR9++GHO+PHjuRKJBDo6OjQ8PDzL0NBQ8tprr7lIJBICAJs3b84GpC8WGRkZei+99FK75JLbtF4lhIwA8DEAZ0hfAggASikd0Npx3Y2mWq9+8ol0qa4G9JgulvqorQWcnYHBg6Vz1BnPRJdbrwJ4/XU4HjkC69rav1sS9fQgefVVFH33HbLbe+3uIjw83DI6OtooIiIiS92xMBqzePFip2nTppU29XjXZCIiIvrExMQYfvXVV7lN97VmvarKO+9+ADsAjATwAgA/2WebEEICCSHJhJA0Qsh7SvYvIoQUEUJiZctShX31CtvPq3I9TSQ9HbC3Zwld7TBJ2B5DWBhy9fUbz0fX14ckLAzN/rkxGKqwefPmvMrKyh41hVssFpOPPvqo3XPpVamp36aUDmv3iQnRApACYCKAbAB3AMyjlCYqlFkEwI9SulrJ8RWUUpXlWjS1pj56tHQE/O+/qzuS5xiJBPD2lr5Z3b3LFOQ6QHfU1AHg4EGYv/EGXKqrwTEwgGT3bqQvWoTS9l6XweiNdLSmfoMQso0QEkAIGSJfVDjOH0AapfQRpbQOwHEA01QPu3fAhGc0ACYJ2+NYuBBPuVxUEwJ4eKBqwQKW0BkMVVBlSpu8lq74dk4BtOWC4QDgscJ6tsK5FAkmhLwEaa1+HaVUfow+ISQagBjA55TSc00PJIQsA7AMAPr169fWfXQ7dXVAdjZL6mqHScL2OOSD5sa+JOYdOqTdqYPjGIzeTIt/KoSQtbIfP6KUjm2ydJat1QUALpRSXwA/A1Acae8sa+abD2AnIcS16cGU0n2UUj9KqZ+1tXUnhdR5ZGVJtbBYUlcjTBK2xzIY9/CUWGAg7qs7FAajx9Da++9i2Wf4M547B4CTwrqjbFsDlNInlFL55JXvAQxV2Jcj+3wEIBLA4GeMQ22w6WwaQFgYYGYGLF3adlmG5iCVl3MhFRVaWLTIBcx7lcFQidaSehIhJBWAByEkTmF5QAiJU+HcdwC4E0L6E0J0AcwF0GgUOyGkr8JqEIAk2XZzQoie7GcrACMAqKQEpUkw4Rk1I5eEXb6cScL2NA4fNkdKigGk3quGiIhg1qtdzPNkvUoIGRofH98wJ2nz5s02hJChN2/eNGz5LFJzl7bKKGPr1q3WO3futGx/5O2nxS8npXQegFEA0gC8orBMlX22CqVUDGA1gGuQJuuTlNIEQshmQkiQrNgaQkgCIeQ+gDUAFsm2ewKIlm2/AWmfeo9M6tragKOjuiN5TmGSsD0ToZCDdev6QS6zWV3NwZtvOqOsrEPJVC4Tm5qamqCjo0PDwsLa3We3d+9e24qKimeOQyKRoL6+mXT6c48y69WUlJTETZs2Fe7cuTO3PfPLRUrMA3bs2GG3fv36Ivm6u7t7dURERIOC3rlz5yzc3NxUEpp5Fv75z38+2bt3r23bJTtOq19OSmk+pXQgpTSz6aLKySmllymlXEqpK6X0M9m2jZTS87Kf/0Up9ZJdYyylVCDbfotS6iPb7kMp3d/RG1UH6elAv37SvMLoZpgkbM9lwwZ7yLTDG6ip4WD9ema9yqxXO8V69eWXXy69fPlyHwBISEjQMzExEZubmzfsDw0N7eft7e3p5ubmtW7dOqXfu7Nnz5oOGjSIx+fzPadMmTJALh+8atUqB/k9L1u2zBGQ6tI7OjrW3rhxo921/PbSPp9WRrtITwdcXNQdxXMKk4Ttmdy7p48jR2xQW9t47mFtLQdHjthg9eoiNDE2aS8ikQjXrl0znTRpUtlvv/1m+MMPP1jGxMQkUamFs+f48ePLU1NT9ezs7ESRkZFpAPDkyRMtS0vL+m+//dY2KioqpSVv7ri4OKMHDx4kGBsbSwYPHsyfNm2a0NbWVpyVlaW3f//+9PHjx2dkZGTofPzxxw4xMTFJ1tbW4lGjRnGPHDnSZ/z48RWrV692iYyMFPB4vDq5xvr777/fd+zYsWWnTp3KKC4u1vLz8/MMCgoq+/rrr61XrVpVsHLlypKamhoiFotx+vRps6Zx19bWkjVr1vS7dOlSmr29vfi7774z37Bhg8OpU6cyXnvtNZevvvoqa8qUKRXLly9X2qZ48uRJ08uXL/eJiYkRmJiYSJpqvwPA22+/XSg3vJk+fXr/48ePm82fP18YHh5ul5mZ+cDAwIDKDWTCwsLswsPDMydNmlQpFAo5hoaGksLCwoZzXbhwIW3q1KnucvOd7777zgqQyqa2dB8AUFdXR+Lj45Oaxnbjxg1jX1/fKsVtpqam9fb29nV37tzRP336dJ9Zs2Y9PXLkSIMH+o4dO3JsbW3rxWIxXnzxRY/bt28bDBs2rEGDPS8vT3vr1q19b968mWJqair54IMP7D799FPbDRs2FF6+fNn80aNH8RwOp5FpzpAhQyojIyNNxo4d2yiWzqZX9g1pCmyOupqorQXCw4HAQMDHR93RMFRFNjgOrXuvPvOgObn1qo+PD9/R0bFu7dq1xZGRkcZy61UzMzOJ3Hp1yJAh1b/99pvpypUrHa5evWpsaWmpUpu53HrV2NiYyq1XAaAl61UdHZ0G69XIyEijlqxXv/zyy748Ho8/cuRID0Xr1bCwsL4ffPCBXWpqqq6xsTFVFreiZSmPx+Nv27atb25uro4y61Vl96Sq9aqvry+Py+Xyb926ZRIfH28AAHLr1d27d1vo6OhQ4G/r1S1bttgUFxdr6ag4K6Wl+5Dvb4/1KgDMmTOn5MiRIxaXLl0yDw0Nfaq47/DhwxZ8Pt+Tz+fzU1NT9e/fv9/I3CUyMtLo4cOH+v7+/jwej8c/fvy4ZVZWlq6lpWW9np6eJCQkxOXw4cN9jI2NG76sNjY2YsV4uwpWU+8iKiuBwkKW1NUCk4TtmSQm6iE+3qjFpC2REDx4YIykJD14eTHrVWa92oj2Wq+GhIQIN27c6Ojj41NlYWHRcKxAINDdtWuXrawlpT44ONilpkl3EKUUI0eOLLtw4UJ60/PGxsYmnT9/3vT06dPm3377rc2ff/6ZAkj909tro/ostDZP/QIh5HxLS1cH1tPJlI06YEm9m5FIpNPYBg0CxnWWnAKjW+Dza+HtXQkOR7l2NYdD4eNTAU9PZr0KZr3aUetVExMTyccff5z90UcfNXpZe/r0qZaBgYHEwsKi/vHjx9qRkZFmTY8dM2ZMZXR0tLF8BH1ZWRknLi5OTygUckpKSrRCQkKEe/bseSwQCBr60FNSUvS8vb3bZaP6LLRWU98u+5wJwA7AUdn6PADtFpl/3mDT2dSEXBL22DEmCdvTkMrIZSAggI+WvVcz0Inycsx69fm2Xl22bNnTpuUDAgKqvb29q1xdXb379u1bN3To0GYvZvb29uK9e/dmzJ07d0BdXR0BgE2bNuWYmZlJpk6d6lYrGxPy6aefNqiq3rlzx/iLL77oclMiVQxdopsaOCjbpm40zdBl1y7gn/8E8vIAOzt1R/McMWYM8OgR8PAhU5DrArrF0OX11x1x5Ig1amv//g+spyfBq68W4bvvmPUqo92o23r1jz/+MNi2bZvduXPnmjXXPwsdNXQxIoQ0iAIQQvoDMOqMwHoz6emAvj5g2y0zExkAmCRsbyEsLBf6+o37HvX1JQgLY9arjGdC3darhYWFOl988UVO2yU7jioD5dYBiCSEPAJAADgDWN6lUfUC5NPZWAtwN8IkYXsHpqYSfPllFt54wwXV1RwYGEiwc2cmTE01Wit2zZo1TwAoHUHOUC9OTk7i0NBQobquP2PGDKVdG11Bm28ulNKrANwBrIVU9c2DUnqtqwPr6bDpbN0Mk4TVdCQSiUT1V9yFC5+Cy62G1Hu1CgsWMOtVBgOA7O+oxRfcNpM6IcQQwNsAVlNK7wPoRwiZ2nkh9k5YUu9mmCSsphNfVFRkpnJilw+aMzau7+zBcQxGT0UikZCioiIzAPEtlVGl+f0ggBgAAbL1HACnAFzscIS9lNJSQChkSb3bKCkB9u9nkrAajFgsXpqfn/99fn6+N1QVveJwgMjIbAAmuH+fNb8wGNIaerxYLG6xj1GVpO5KKQ0hhMwDAEppFWmqsMBoBJvO1s3s2SNV+2GSsBrL0KFDCyF1YmQwGF2IKm/MdYQQAwAUAAghrgA6TfyhN9KRpM4MnNoJk4RlMBiMBlRJ6h8DuArAiRByDMAvAN7pyqB6Os+a1AUCwMoKSE7u/Jh6JfX1TBKWwWAwFGiz+Z1S+l9CSAyA4ZBOaVtLKVVNROI5JT0dMDUF+vRR/RhKgSVLgLIy6efvv7PpcK0iEADDh0vfgpgkLIPBYABQbfT7LwCGUUovUUovUkqLCSH7uiG2Hot85Ht7kvLp08C9e1Lp8rt3gTNnui6+Ho/iG9DDh9JaOnsDYjAYDJWa3/sDeJcQsklhm0ZJxGoa7Z3OduYMMHcuUCNTMK6pAUJCgLNnuya+Hs+ZM0BcnDS5EwJoM7NBBoPBAFRL6qUAxgOwlTm3NXOsYfwNpUBGhupJ/fJlaQJv6jYpkQBz5kj3MxSoqABWrJCOdgekD/yNN/5eZzAYjOcYVZI6oZSKKaWrAJwB8DsAG1VOTggJJIQkE0LSCCHvKdm/iBBSRAiJlS1LFfYtJISkypaFqt6QuiksBKqrVUvqlAKLF7c84r2+XtrK3IbnzvPFpk1AVVXjbVVVwMaN6omHwWAwNAhVkvoe+Q+U0kMAFgH4b1sHEUK0AHwDYAoAPoB5hBC+kqInKKWDZMv3smMtAGwCMAyAP4BNhBBzFWJVO+0Z+f7nn0BxG0MOi4qA27c7HlevQCAAvv1W+takSHW1dDubNsBgMJ5zWuyMJISYUkrLAJySJVk56QBUmT/kDyCNUvpIdr7jAKYBSFTh2MkAfqaUlsiO/RlAIID/a+mAJ0+efLdabAAAGX9JREFU4NChQ422eXl54YUXXoBIJMKxY8eaHTNo0CAMGjQIVVVVOHnyZLP9fn5+8Pb2hlAoxI8//thsf0BAADw8PFBcXIyLF6UCe4WFwKJFQGYm8OjRSxgwYADy8/Nx9erVZscLheMhkTjByekxxo//pdn+q1cDkZ9vh9u3H0EguNls/9SpU2FlZYXk5GT873//a7Z/xowZMDMzQ3x8PJTZ0s6ZMweGhoaIjY1FbGxss/2hoaHQ0dHBnTt3kJCQ0Gz/okWLAAC3bt1CSkpja2kdHR2EhoYCAKKiopCe3thx0NDQEHPmzAEAXL9+HdnZjR01TU1NMXPmTNlzuIr8/HzpSMKQEACA5ZMneOXCBQDAhVdewRNLS2D3bmDwYACAnZ0dAgMDAQBnz55FWVljPwVHR0dMmDABAHDy5ElUNan99+/fH6NHjwYAHDt2DCKRqNF+LpeLF198EQCafe8A9Xz3FHnppda/e+PHj4eTkxMeP36MX35p/t0LDAyEnZ0dHj16hJs3m3/3GAyGZtJaTf0H2WcMgGjZZ4zCels4AHissJ4t29aUYEJIHCHkNCHEqT3HEkKWEUKiCSHRTf/pqgv5YDd9/bbLcrloU9KawwHc3DoeV4+nuhoob8MKuby8eS2ewWAwniMI7aIOW0LILACBlNKlsvVXIZ0at1qhjCWACkppLSFkOYAQSuk4QsgGAPqU0i2ych8BqKaUbm/pen5+flRZbbS7ef114KefpDX2tqAUsLNrvaytLZCXx2ZsgVJgxAhpn4Wy7yyHI523zib4dymEkBhKKZv9wmBoKC3WEwkhQ1pbVDh3DgAnhXVH2bYGKKVPKKVyydnvAQxV9VhNpT3T2QgBDh6UmospQ0sLOHCA5SgA0oewZ0/Lowb19NjDYjAYzz2tTfANa2UfBdCWhNcdAO6EkP6QJuS5AOYrFiCE9KWU5slWgwAkyX6+BmCrwuC4SQD+1cb1NIKMDMCvHfWYl18GTp4EZs9uPK2Nw5Fuf/nlTg+x53LnjvRTVxeoq/t7u4EBsHIl4OGhnrgYDAZDQ2gxqVNKx3bkxJRSMSFkNaQJWgvAAUppAiFkM4BoSul5AGsIIUEAxABKIB1ZD0ppCSHkU0hfDABgs3zQnCZTXw9kZUkTdHuYORM4fhxYuFDaJayvDxw5It3OkCGRAGFhgK8vkJ0ttVuVY2gIbN6svtgYDAZDQ1CpT50Q4g3ptLSG4V+U0ogujKvdaEKfelYW4OwM7N0LLFvWvmPlXca3b7OuYaVcugRMnQocOyatqS9aJBWcMTICDh8GgoPVHeFzAetTZzA0G1W03zcB+Fq2jAXwHzBfZKV0xHKVEGmXsKkp6xpWyrZtgJOTtBkkOFhaY+dwgIEDWZMGg8FgyFBFfGYWpDKx+ZTSxQAGAmBSsUroSFIHAB5PKkbDuoabcOcOEBUFvPkmoKPD3oAYDAajBVRxwqimlEoIIWJCiCmAQjQemc6QkZ4uzS9OHXg6LY2Ef64JCwPMzKTzBeXI34DYA2MwGIwGVEnq0YSQPgC+g1R4pgJAc/kyBtLTAQcH6ewqRieRng6cOiW1VzUxabyPJXQGg8FoRKtJnRBCAPybUloKYA8h5CoAU0ppXLdE18NojzsbQ0V27pQm7zVr1B0Jg8FgaDyt9qlT6dD4ywrrGSyht0x7fdQZbVBSAuzfD8yfL20CYTAYDEarqDJQ7i4h5IUuj6SHU1sL5OSwpN6p7Nkjnba2fr26I2EwGIwegSp96sMAhBJCMgFUAiCQVuJ9uzSyHkZWlnSuOUvqnURtLRAeDgQGAj4+6o6GwWAwegSqJPXJXR5FL6Cj09kYTTh6FCgokA6QYzAYDIZKtNn8TinNhHQK2zjZz1WqHPe8IU/qLi5qDaN3IJeEHTQIGNeWxQCDwWAw5LRZU5cpyvkB8ABwEIAOgKMARnRtaD2L9HSpLgobz9UJXLkCJCVJJWGZsAyDwWCojCo17hmQysJWAgClNBeASatHPIdkZAD9+rGp052CoiQsg8FgMFRGlaReJ5vaRgGAEGLUtSH1TNh0tk6iqSQsg8FgMFRGlaR+khCyF0AfQsjrAK5Dqi7HUIAl9U5CmSQsg8FgMFSizT51Sul2QshEAGWQ9qtvpJT+3OWR9SAqKoCiIpbUO0xrkrAMBoPBaBNVprRBlsRZIm+BjAzpJ0vqHYRJwjIYDEaHUMVPfSYhJJUQIiSElBFCygkhZd0RXE+BTWfrBJgkLIPBYHQYVWrq/wHwCqU0qauD6akw4ZlOgEnCMhgMRodRZaBcwbMmdEJIICEkmRCSRgh5r5VywYQQSgjxk627EEKqCSGxsmXPs1y/u8jIAAwNARsbdUfSQ2GSsAwGg9EpqOqnfgLAOQC18o2U0rOtHUQI0QLwDYCJALIB3CGEnKeUJjYpZwJgLYDbTU7xkFI6SIX41E56urTpnemkPCPHjjFJWAaDwegEVEnqppBKw05S2EYBtJrUAfgDSKOUPgIAQshxANMAJDYp9ymALwC8rUrAmgibztYBJBJg+3YmCctgMBidgCpT2hY/47kdADxWWM+G1PGtAULIEABOlNJLhJCmSb0/IeQepFPpPqSU/tb0AoSQZQCWAUC/fv2eMcyOQak0qY8apZbL93yYJCyDwWB0GqqMfucSQn4hhMTL1n0JIR929MKEEA6AHQCUjYzKA9CPUjoYwFsA/n979x9kVXnfcfz9QQWVH2UMqAgIaLAWf4Toipma2IxVQ4xipmKLxpk4SYNMoTUqNjqxmUb/sFHGTpzBKI5OYqpSgzN1UyGmP8SoFWEVgoKxAhcBoxYVQRNAgW//OOea68rde+7u3l/nfl4zO7vn3Oec+31WnO8+5z7P93lA0rDujSJiQUR0RETHyJEj+xpSr2zbBjt2eKTeay4Ja2bWb7JMlLsbuB74ECAiVgMzMlz3GsnubkVj0nNFQ4ETgaWSNgKfAzoldUTE7oh4O32/54D1wHEZ3rPuPPO9D1wS1sysX2VJ6odGxPJu5/ZkuG4FMFHSBEkDSf4Q6Cy+GBHbI2JERIyPiPHAMmBaRHRJGplOtEPSMcBEYEOG96w7r1HvA5eENTPrV1mS+luSjuUPG7pMJ3k83qOI2APMAR4DXgIeiog1km6UNK3C5WcCqyWtAhYBsyLinQyx1p2ryfVSsSTsFVe4JKyZWT/JMvt9NrAAOF7Sa0ABuCzLzSNiMbC427nvlWn7xZKfHwYezvIejVYowPDhyZdVwSVhzcz6XZbZ7xuAs9MtVwdExHu1D6t1eDlbLxRLwp5zDixfDqNGwemne/a7mVkfVUzqkq7udgywHXguIlbVKK6WUSjApEmNjqLFXHVVUhL28cfhqaeSterDh8Ndd8F55zU6OjOzlpXlM/UOYBbJuvPRwBXAVOBuSX9fw9iaXkTymbpH6lV45BG4777k5507k/WA778PW7bA9OmweHHP15uZWVlZkvoY4JSIuCYirgFOBQ4nmcx2eQ1ja3pvvAG7djmpZxYBl19e/vWdO5OJcxF1C8nMLE+yJPXDKan5TrJe/YiI2NntfNvxcrYqPfMMbN/ec5t3300+Zzczs6plmf1+P/CspEfS4wtIKrwN5pN13NuKl7NV6dFHK4/CBwyA3/62PvGYmeVMltnvN0laApyRnpoVEV3pz1+rWWQtwCP1Ki1Zksxw7ymx79sHRx1Vv5jMzHIky0idNIl3VWzYZgoFOOKIZC91q2DFCli5EoYNSybHlTN8OEyZUr+4zMxyJMtn6laG16hXoVgS9p574JBD9t/mkEOSZW1er25m1itO6n3gpJ5RaUnY6dNh0SIYMwaGDElG7kOGJMeLFnmduplZH2R6/G6ftGcPbNoEl1zS6EhaQPeSsOedl/zyli9PJsUddVTyyN0jdDOzPnFS76UtW2DvXk+Sq6hYEvbSS2H06D+cl5LSsGZm1m/8+L2XvJwtozvvTErCXnNNoyMxM8s9J/VeKi5nc1Lvwe7dcPvtMHUqnHRSo6MxM8s9J/VeKhSSOilHH93oSJrY/ffDm2/C3LmNjsTMrC04qfdSoZBM2D7ooEZH0qT27YN582DyZDjrrEZHY2bWFjxRrpe8nK2CJUvgpZeS0bpntZuZ1YVH6r3kpF7BrbfC2LFw8cWNjsTMrG3UNKlLmirpZUnrJF3XQ7uLJIWkjpJz16fXvSzpS7WMs1q7diXLq72crYwVK+CJJ+Db3/bnE2ZmdVSzx++SDgDmA+cAW4AVkjojYm23dkOBK4FnS85NAmYAJwBHAf8p6biI2FureKuxaVPy3SP1MoolYb/1rUZHYmbWVmo5Up8CrIuIDRHxAbAQuHA/7W4CfgDsKjl3IbAwInZHRAFYl96vKXg5Ww9KS8IOHdroaMzM2kotk/poYHPJ8Zb03EcknQKMjYhHq702vX6mpC5JXVu3bu2fqDNwUu9B95KwZmZWNw2bKCdpAHAb0OtSYxGxICI6IqJj5MiR/RdcBYUCDBzobb8/oVxJWDMzq4taLml7DRhbcjwmPVc0FDgRWKpkydORQKekaRmubahCAcaNS4rPWAmXhDUza6hapqUVwERJEyQNJJn41ll8MSK2R8SIiBgfEeOBZcC0iOhK282QNEjSBGAisLyGsVbFy9n2wyVhzcwarmZJPSL2AHOAx4CXgIciYo2kG9PReE/XrgEeAtYCvwBmN8vMd3BS3y+XhDUzazhFRKNj6BcdHR3R1dVV8/d57z0YNgxuvhmuK7vyvs3s2wcnngiDBsHzz7uCXI5Jei4iOiq3NLNGcJnYKnnL1f1wSVgzs6bgqV5V8nK2/XBJWDOzpuCkXiUn9W5cEtbMrGk4qVepUIDBg2HEiEZH0iRcEtbMrGk4qVepOPPdHx3jkrBmZk3GSb1KXs5WwiVhzcyaipN6FSKS2e/echWXhDUza0JO6lV4551knbpH6rgkrJlZE3JSr4JnvqdcEtbMrCk5qVfBST3lkrBmZk3JSb0KTuokJWHnzYPJk+GssxodjZmZlXCZ2CoUCnDYYUnt97blkrBmZk3LI/UqeDkbySjdJWHNzJqSk3oV2n45W1cXLF3qkrBmZk3KST2jffuSpN7WI/V581wS1sysiTmpZ/TGG8lKrrZN6i4Ja2bW9JzUM2r7me8uCWtm1vSc1DNq66TukrBmZi2hpkld0lRJL0taJ+m6/bw+S9ILklZJekrSpPT8eEk70/OrJN1ZyzizKCb1tpwo55KwZmYtoWbr1CUdAMwHzgG2ACskdUbE2pJmD0TEnWn7acBtwNT0tfURMblW8VWrUIBRo+DggxsdSZ25JKyZWcuo5Uh9CrAuIjZExAfAQuDC0gYRsaPkcDAQNYynT9p2OZtLwpqZtYxaJvXRwOaS4y3puY+RNFvSeuAWoHQW1gRJKyU9IekLNYwzk7YsPOOSsGZmLaXhE+UiYn5EHAt8B7ghPf06cHREfBa4GnhA0ieKs0qaKalLUtfWrVtrFuOePbB5cxsm9WJJ2GuvdUlYM7MWUMuk/howtuR4THqunIXAVwEiYndEvJ3+/BywHjiu+wURsSAiOiKiY+TIkf0WeHebN8PevW2Y1F0S1syspdQyqa8AJkqaIGkgMAPoLG0gaWLJ4VeAV9LzI9OJdkg6BpgIbKhhrD1qy+VsLglrZtZyajb7PSL2SJoDPAYcANwbEWsk3Qh0RUQnMEfS2cCHwDbg6+nlZwI3SvoQ2AfMioh3ahVrJW2Z1F0S1sys5dR069WIWAws7nbueyU/X1nmuoeBh2sZWzUKhaSY2tixldvmQrEk7Ny5LglrZtZCGj5RrhVs3Jgk9APbZfd5l4Q1M2tJTuoZFApttEbdJWHNzFqWk3oGbbVG3SVhzcxalpN6BTt3wuuvt0lSd0lYM7OW5qRewauvJt/bIqm7JKyZWUtzUq+gbZazuSSsmVnLa5f53L3WNkm9WBL2/vtdEtbMrEV5pF7Bxo0waBAceWSjI6kxl4Q1M2t5TuoVFAowbhwMyPNvyiVhzcxyIc+pql+0xXI2l4Q1M8sFJ/UKcp/UiyVhr7jCJWHNzFqck3oPduxICqzlOqm7JKyZWW44qfcg9zPfXRLWzCxXnNR7kPuk7pKwZma54qTeg40bk++5TOouCWtmljtO6j0oFGDIEDjssEZHUgMuCWtmljtO6j0oznzPXYE1l4Q1M8sll4ntQaEAxxzT6ChqwCVhzcxyySP1MiJyvEbdJWHNzHKppkld0lRJL0taJ+m6/bw+S9ILklZJekrSpJLXrk+ve1nSl2oZ5/689VYyMTx3Sd0lYc3McqtmSV3SAcB84MvAJOCS0qSdeiAiToqIycAtwG3ptZOAGcAJwFTgjvR+dZPb5WwuCWtmllu1HKlPAdZFxIaI+ABYCFxY2iAidpQcDgYi/flCYGFE7I6IArAuvV/d5HI528aNLglrZpZjtZwoNxrYXHK8BTi9eyNJs4GrgYFAcSr2aGBZt2s/UfJM0kxgZnq4W9KLfQ/7404+ub/v2CsjgLf67W633JJ8NV7/9qu55LVvf9zoAMysvIbPfo+I+cB8SZcCNwBfr+LaBcACAEldEdFRmygbK699y2u/IL99k9TV6BjMrLxaPn5/DRhbcjwmPVfOQuCrvbzWzMys7dUyqa8AJkqaIGkgycS3ztIGkiaWHH4FeCX9uROYIWmQpAnARGB5DWM1MzNreTV7/B4ReyTNAR4DDgDujYg1km4EuiKiE5gj6WzgQ2Ab6aP3tN1DwFpgDzA7IvZWeMsFtepLE8hr3/LaL8hv3/LaL7NcUERUbmVmZmZNzxXlzMzMcsJJ3czMLCdaLqlnKD07SNK/pq8/K2l8/aOsXoZ+nSnpeUl7JE1vRIy9laFvV0taK2m1pP+SNK4RcVarL2WQm12lvpW0u0hSSMrd8j2zVtRSST1j6dlvAtsi4tPAPwM/qG+U1cvYr03A5cAD9Y2ubzL2bSXQEREnA4tISgY3tb6UQW52GfuGpKHAlcCz9Y3QzMppqaROhtKz6fFP0p8XAX8uNf3+ollK6m6MiNXAvkYE2AdZ+vZ4RPw+PVxGUpeg2fWlDHKzy/L/GcBNJH8076pncGZWXqsl9f2Vnu1ePvajNhGxB9gOfKou0fVeln61qmr79k1gSU0j6h+Z+iVptqT1JCP1v6tTbH1VsW+STgHGRsSj9QzMzHrWakndckzSZUAHcGujY+kvETE/Io4FvkNSBrnlSRpA8lHCNY2Oxcw+rtWSepbysR+1kXQg8EfA23WJrvfyXBY3U9/SIkTfBaZFxO46xdYXfSmD3Owq9W0ocCKwVNJG4HNApyfLmTVeqyX1iqVn0+PipjDTgf+O5q+wk6VfrSpLueDPAneRJPT/a0CMvdGXMsjNrse+RcT2iBgREeMjYjzJPIhpEeHNXswarKWSevoZebH07EvAQ8XSs5Kmpc3uAT4laR3Jlq5ll+M0iyz9knSapC3AxcBdktY0LuLsMv43uxUYAvwsXf7V9H/QZOzXHElrJK0i+beYeQfCRsrYNzNrQi4Ta2ZmlhMtNVI3MzOz8pzUzczMcsJJ3czMLCec1M3MzHLCSd3MzCwnnNSt7iQtLRYqkbRY0vA+3u+Lkv69zGsPpru/XdWX9zAzawUHNjoAy590Ax1FRMXNZyLivBrGcSRwWrpjX9ZrDkzXaZuZtRyP1NuEpH9I98d+Kh29zk3PHyvpF5Kek/SkpOPT8z+WdLuk/5G0oXQPd0nXSlqRjoC/n54bn97/PuBFYKykH0nqSguwfL9MXBsljUj3Hl+VfhUkPZ6+fq6kZ5TsJf8zSUPS81Ml/UbS88BflOn2L4HR6T2/kD4h+GF6/KKkKem9/lHSTyU9Dfy0P37fZmaN4KTeBiSdBlwEfIZkj+zSGt0LgL+NiFOBucAdJa+NAj4PnA/8U3qvc4GJJNtzTgZOlXRm2n4icEdEnBARrwLfjYgO4GTgzySdXC7GiLgz3Xf8NJJdwW6TNIJkE5SzI+IUoAu4WtLBwN3ABcCpwJFlbjsNWB8RkyPiyfTcoen7/A1wb0nbSen7XFIuRjOzZufH7+3hDOCRiNgF7JL0c4B01PunJOVZi20HlVz3b+kj9LWSjkjPnZt+rUyPh5Ak803AqxGxrOT6v5Q0k+Tf2SiSxLm6Qqw/JKnX/3NJ56fXPJ3GNxB4BjgeKETEK2k//gWYmfF38SBARPxK0rCSz/M7I2JnxnuYmTUlJ/X2NgB4Nx257k/pbmkq+X5zRNxV2lDSeOB3JccTSEb+p0XENkk/Bg7uKRhJlwPjSOqOF9/rP7qPniWVizeL7nWRi8e/697QzKzV+PF7e3gauEDSweno/HyAiNgBFCRdDMkEN0mfqXCvx4BvlHy2PVrS4ftpN4wkUW5PR/lf7ummkoqP/y8rmWC3DDhD0qfTNoMlHQf8Bhgv6di0XTWPzP8qvdfnge0Rsb2Ka83MmppH6m0gIlakO5+tBt4EXgCKyexrwI8k3QAcRLLv9697uNcvJf0J8Ez6SPx94DJgb7d2v5a0kiQBbyb5w6Inc4DDgMfT+3ZFxF+no/cHJRU/FrghIv43faz/qKTfA0+S7PGdxa40roOAb2S8xsysJXiXtjYhaUhEvC/pUOBXwMyIeL7RcdWTpKXAXO/7bWZ55ZF6+1ggaRLJ59o/abeEbmbWDjxSNzMzywlPlDMzM8sJJ3UzM7OccFI3MzPLCSd1MzOznHBSNzMzy4n/ByovRNlIJOlHAAAAAElFTkSuQmCC\n",
+      "image/svg+xml": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"no\"?>\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n  \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<!-- Created with matplotlib (https://matplotlib.org/) -->\n<svg height=\"265.995469pt\" version=\"1.1\" viewBox=\"0 0 500.739162 265.995469\" width=\"500.739162pt\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n <defs>\n  <style type=\"text/css\">\n*{stroke-linecap:butt;stroke-linejoin:round;}\n  </style>\n </defs>\n <g id=\"figure_1\">\n  <g id=\"patch_1\">\n   <path d=\"M -0 265.995469 \nL 500.739162 265.995469 \nL 500.739162 0 \nL -0 0 \nz\n\" style=\"fill:none;\"/>\n  </g>\n  <g id=\"axes_1\">\n   <g id=\"patch_2\">\n    <path d=\"M 50.14375 228.439219 \nL 267.58375 228.439219 \nL 267.58375 10.999219 \nL 50.14375 10.999219 \nz\n\" style=\"fill:#ffffff;\"/>\n   </g>\n   <g id=\"PathCollection_1\">\n    <defs>\n     <path d=\"M 0 4.330127 \nC 1.148363 4.330127 2.249847 3.873878 3.061862 3.061862 \nC 3.873878 2.249847 4.330127 1.148363 4.330127 0 \nC 4.330127 -1.148363 3.873878 -2.249847 3.061862 -3.061862 \nC 2.249847 -3.873878 1.148363 -4.330127 0 -4.330127 \nC -1.148363 -4.330127 -2.249847 -3.873878 -3.061862 -3.061862 \nC -3.873878 -2.249847 -4.330127 -1.148363 -4.330127 0 \nC -4.330127 1.148363 -3.873878 2.249847 -3.061862 3.061862 \nC -2.249847 3.873878 -1.148363 4.330127 0 4.330127 \nz\n\" id=\"m145838425a\" style=\"stroke:#0000ff;\"/>\n    </defs>\n    <g clip-path=\"url(#pf1fb1c960f)\">\n     <use style=\"fill:#0000ff;stroke:#0000ff;\" x=\"99.558162\" xlink:href=\"#m145838425a\" y=\"118.323\"/>\n    </g>\n   </g>\n   <g id=\"PathCollection_2\">\n    <defs>\n     <path d=\"M 0 4.330127 \nC 1.148363 4.330127 2.249847 3.873878 3.061862 3.061862 \nC 3.873878 2.249847 4.330127 1.148363 4.330127 0 \nC 4.330127 -1.148363 3.873878 -2.249847 3.061862 -3.061862 \nC 2.249847 -3.873878 1.148363 -4.330127 0 -4.330127 \nC -1.148363 -4.330127 -2.249847 -3.873878 -3.061862 -3.061862 \nC -3.873878 -2.249847 -4.330127 -1.148363 -4.330127 0 \nC -4.330127 1.148363 -3.873878 2.249847 -3.061862 3.061862 \nC -2.249847 3.873878 -1.148363 4.330127 0 4.330127 \nz\n\" id=\"m15d4b4a51c\" style=\"stroke:#ff0000;\"/>\n    </defs>\n    <g clip-path=\"url(#pf1fb1c960f)\">\n     <use style=\"fill:#ff0000;stroke:#ff0000;\" x=\"159.239314\" xlink:href=\"#m15d4b4a51c\" y=\"157.20024\"/>\n    </g>\n   </g>\n   <g id=\"PathCollection_3\">\n    <defs>\n     <path d=\"M -0 7.071068 \nL 4.242641 0 \nL 0 -7.071068 \nL -4.242641 -0 \nz\n\" id=\"m790b3b3eff\" style=\"stroke:#0000ff;\"/>\n    </defs>\n    <g clip-path=\"url(#pf1fb1c960f)\">\n     <use style=\"fill:#0000ff;stroke:#0000ff;\" x=\"99.558162\" xlink:href=\"#m790b3b3eff\" y=\"118.323\"/>\n    </g>\n   </g>\n   <g id=\"PathCollection_4\">\n    <defs>\n     <path d=\"M -0 7.071068 \nL 4.242641 0 \nL 0 -7.071068 \nL -4.242641 -0 \nz\n\" id=\"m84d4c46120\" style=\"stroke:#ff0000;\"/>\n    </defs>\n    <g clip-path=\"url(#pf1fb1c960f)\">\n     <use style=\"fill:#ff0000;stroke:#ff0000;\" x=\"174.533658\" xlink:href=\"#m84d4c46120\" y=\"119.839165\"/>\n    </g>\n   </g>\n   <g id=\"matplotlib.axis_1\">\n    <g id=\"xtick_1\">\n     <g id=\"line2d_1\">\n      <defs>\n       <path d=\"M 0 0 \nL 0 3.5 \n\" id=\"md50692fb8a\" style=\"stroke:#000000;stroke-width:0.8;\"/>\n      </defs>\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"50.14375\" xlink:href=\"#md50692fb8a\" y=\"228.439219\"/>\n      </g>\n     </g>\n     <g id=\"text_1\">\n      <!-- 0.0 -->\n      <defs>\n       <path d=\"M 31.78125 66.40625 \nQ 24.171875 66.40625 20.328125 58.90625 \nQ 16.5 51.421875 16.5 36.375 \nQ 16.5 21.390625 20.328125 13.890625 \nQ 24.171875 6.390625 31.78125 6.390625 \nQ 39.453125 6.390625 43.28125 13.890625 \nQ 47.125 21.390625 47.125 36.375 \nQ 47.125 51.421875 43.28125 58.90625 \nQ 39.453125 66.40625 31.78125 66.40625 \nz\nM 31.78125 74.21875 \nQ 44.046875 74.21875 50.515625 64.515625 \nQ 56.984375 54.828125 56.984375 36.375 \nQ 56.984375 17.96875 50.515625 8.265625 \nQ 44.046875 -1.421875 31.78125 -1.421875 \nQ 19.53125 -1.421875 13.0625 8.265625 \nQ 6.59375 17.96875 6.59375 36.375 \nQ 6.59375 54.828125 13.0625 64.515625 \nQ 19.53125 74.21875 31.78125 74.21875 \nz\n\" id=\"DejaVuSans-48\"/>\n       <path d=\"M 10.6875 12.40625 \nL 21 12.40625 \nL 21 0 \nL 10.6875 0 \nz\n\" id=\"DejaVuSans-46\"/>\n      </defs>\n      <g transform=\"translate(42.192188 243.037656)scale(0.1 -0.1)\">\n       <use xlink:href=\"#DejaVuSans-48\"/>\n       <use x=\"63.623047\" xlink:href=\"#DejaVuSans-46\"/>\n       <use x=\"95.410156\" xlink:href=\"#DejaVuSans-48\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"xtick_2\">\n     <g id=\"line2d_2\">\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"104.50375\" xlink:href=\"#md50692fb8a\" y=\"228.439219\"/>\n      </g>\n     </g>\n     <g id=\"text_2\">\n      <!-- 0.1 -->\n      <defs>\n       <path d=\"M 12.40625 8.296875 \nL 28.515625 8.296875 \nL 28.515625 63.921875 \nL 10.984375 60.40625 \nL 10.984375 69.390625 \nL 28.421875 72.90625 \nL 38.28125 72.90625 \nL 38.28125 8.296875 \nL 54.390625 8.296875 \nL 54.390625 0 \nL 12.40625 0 \nz\n\" id=\"DejaVuSans-49\"/>\n      </defs>\n      <g transform=\"translate(96.552188 243.037656)scale(0.1 -0.1)\">\n       <use xlink:href=\"#DejaVuSans-48\"/>\n       <use x=\"63.623047\" xlink:href=\"#DejaVuSans-46\"/>\n       <use x=\"95.410156\" xlink:href=\"#DejaVuSans-49\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"xtick_3\">\n     <g id=\"line2d_3\">\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"158.86375\" xlink:href=\"#md50692fb8a\" y=\"228.439219\"/>\n      </g>\n     </g>\n     <g id=\"text_3\">\n      <!-- 0.2 -->\n      <defs>\n       <path d=\"M 19.1875 8.296875 \nL 53.609375 8.296875 \nL 53.609375 0 \nL 7.328125 0 \nL 7.328125 8.296875 \nQ 12.9375 14.109375 22.625 23.890625 \nQ 32.328125 33.6875 34.8125 36.53125 \nQ 39.546875 41.84375 41.421875 45.53125 \nQ 43.3125 49.21875 43.3125 52.78125 \nQ 43.3125 58.59375 39.234375 62.25 \nQ 35.15625 65.921875 28.609375 65.921875 \nQ 23.96875 65.921875 18.8125 64.3125 \nQ 13.671875 62.703125 7.8125 59.421875 \nL 7.8125 69.390625 \nQ 13.765625 71.78125 18.9375 73 \nQ 24.125 74.21875 28.421875 74.21875 \nQ 39.75 74.21875 46.484375 68.546875 \nQ 53.21875 62.890625 53.21875 53.421875 \nQ 53.21875 48.921875 51.53125 44.890625 \nQ 49.859375 40.875 45.40625 35.40625 \nQ 44.1875 33.984375 37.640625 27.21875 \nQ 31.109375 20.453125 19.1875 8.296875 \nz\n\" id=\"DejaVuSans-50\"/>\n      </defs>\n      <g transform=\"translate(150.912188 243.037656)scale(0.1 -0.1)\">\n       <use xlink:href=\"#DejaVuSans-48\"/>\n       <use x=\"63.623047\" xlink:href=\"#DejaVuSans-46\"/>\n       <use x=\"95.410156\" xlink:href=\"#DejaVuSans-50\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"xtick_4\">\n     <g id=\"line2d_4\">\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"213.22375\" xlink:href=\"#md50692fb8a\" y=\"228.439219\"/>\n      </g>\n     </g>\n     <g id=\"text_4\">\n      <!-- 0.3 -->\n      <defs>\n       <path d=\"M 40.578125 39.3125 \nQ 47.65625 37.796875 51.625 33 \nQ 55.609375 28.21875 55.609375 21.1875 \nQ 55.609375 10.40625 48.1875 4.484375 \nQ 40.765625 -1.421875 27.09375 -1.421875 \nQ 22.515625 -1.421875 17.65625 -0.515625 \nQ 12.796875 0.390625 7.625 2.203125 \nL 7.625 11.71875 \nQ 11.71875 9.328125 16.59375 8.109375 \nQ 21.484375 6.890625 26.8125 6.890625 \nQ 36.078125 6.890625 40.9375 10.546875 \nQ 45.796875 14.203125 45.796875 21.1875 \nQ 45.796875 27.640625 41.28125 31.265625 \nQ 36.765625 34.90625 28.71875 34.90625 \nL 20.21875 34.90625 \nL 20.21875 43.015625 \nL 29.109375 43.015625 \nQ 36.375 43.015625 40.234375 45.921875 \nQ 44.09375 48.828125 44.09375 54.296875 \nQ 44.09375 59.90625 40.109375 62.90625 \nQ 36.140625 65.921875 28.71875 65.921875 \nQ 24.65625 65.921875 20.015625 65.03125 \nQ 15.375 64.15625 9.8125 62.3125 \nL 9.8125 71.09375 \nQ 15.4375 72.65625 20.34375 73.4375 \nQ 25.25 74.21875 29.59375 74.21875 \nQ 40.828125 74.21875 47.359375 69.109375 \nQ 53.90625 64.015625 53.90625 55.328125 \nQ 53.90625 49.265625 50.4375 45.09375 \nQ 46.96875 40.921875 40.578125 39.3125 \nz\n\" id=\"DejaVuSans-51\"/>\n      </defs>\n      <g transform=\"translate(205.272188 243.037656)scale(0.1 -0.1)\">\n       <use xlink:href=\"#DejaVuSans-48\"/>\n       <use x=\"63.623047\" xlink:href=\"#DejaVuSans-46\"/>\n       <use x=\"95.410156\" xlink:href=\"#DejaVuSans-51\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"xtick_5\">\n     <g id=\"line2d_5\">\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"267.58375\" xlink:href=\"#md50692fb8a\" y=\"228.439219\"/>\n      </g>\n     </g>\n     <g id=\"text_5\">\n      <!-- 0.4 -->\n      <defs>\n       <path d=\"M 37.796875 64.3125 \nL 12.890625 25.390625 \nL 37.796875 25.390625 \nz\nM 35.203125 72.90625 \nL 47.609375 72.90625 \nL 47.609375 25.390625 \nL 58.015625 25.390625 \nL 58.015625 17.1875 \nL 47.609375 17.1875 \nL 47.609375 0 \nL 37.796875 0 \nL 37.796875 17.1875 \nL 4.890625 17.1875 \nL 4.890625 26.703125 \nz\n\" id=\"DejaVuSans-52\"/>\n      </defs>\n      <g transform=\"translate(259.632187 243.037656)scale(0.1 -0.1)\">\n       <use xlink:href=\"#DejaVuSans-48\"/>\n       <use x=\"63.623047\" xlink:href=\"#DejaVuSans-46\"/>\n       <use x=\"95.410156\" xlink:href=\"#DejaVuSans-52\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"text_6\">\n     <!-- generalized fpr -->\n     <defs>\n      <path d=\"M 45.40625 27.984375 \nQ 45.40625 37.75 41.375 43.109375 \nQ 37.359375 48.484375 30.078125 48.484375 \nQ 22.859375 48.484375 18.828125 43.109375 \nQ 14.796875 37.75 14.796875 27.984375 \nQ 14.796875 18.265625 18.828125 12.890625 \nQ 22.859375 7.515625 30.078125 7.515625 \nQ 37.359375 7.515625 41.375 12.890625 \nQ 45.40625 18.265625 45.40625 27.984375 \nz\nM 54.390625 6.78125 \nQ 54.390625 -7.171875 48.1875 -13.984375 \nQ 42 -20.796875 29.203125 -20.796875 \nQ 24.46875 -20.796875 20.265625 -20.09375 \nQ 16.0625 -19.390625 12.109375 -17.921875 \nL 12.109375 -9.1875 \nQ 16.0625 -11.328125 19.921875 -12.34375 \nQ 23.78125 -13.375 27.78125 -13.375 \nQ 36.625 -13.375 41.015625 -8.765625 \nQ 45.40625 -4.15625 45.40625 5.171875 \nL 45.40625 9.625 \nQ 42.625 4.78125 38.28125 2.390625 \nQ 33.9375 0 27.875 0 \nQ 17.828125 0 11.671875 7.65625 \nQ 5.515625 15.328125 5.515625 27.984375 \nQ 5.515625 40.671875 11.671875 48.328125 \nQ 17.828125 56 27.875 56 \nQ 33.9375 56 38.28125 53.609375 \nQ 42.625 51.21875 45.40625 46.390625 \nL 45.40625 54.6875 \nL 54.390625 54.6875 \nz\n\" id=\"DejaVuSans-103\"/>\n      <path d=\"M 56.203125 29.59375 \nL 56.203125 25.203125 \nL 14.890625 25.203125 \nQ 15.484375 15.921875 20.484375 11.0625 \nQ 25.484375 6.203125 34.421875 6.203125 \nQ 39.59375 6.203125 44.453125 7.46875 \nQ 49.3125 8.734375 54.109375 11.28125 \nL 54.109375 2.78125 \nQ 49.265625 0.734375 44.1875 -0.34375 \nQ 39.109375 -1.421875 33.890625 -1.421875 \nQ 20.796875 -1.421875 13.15625 6.1875 \nQ 5.515625 13.8125 5.515625 26.8125 \nQ 5.515625 40.234375 12.765625 48.109375 \nQ 20.015625 56 32.328125 56 \nQ 43.359375 56 49.78125 48.890625 \nQ 56.203125 41.796875 56.203125 29.59375 \nz\nM 47.21875 32.234375 \nQ 47.125 39.59375 43.09375 43.984375 \nQ 39.0625 48.390625 32.421875 48.390625 \nQ 24.90625 48.390625 20.390625 44.140625 \nQ 15.875 39.890625 15.1875 32.171875 \nz\n\" id=\"DejaVuSans-101\"/>\n      <path d=\"M 54.890625 33.015625 \nL 54.890625 0 \nL 45.90625 0 \nL 45.90625 32.71875 \nQ 45.90625 40.484375 42.875 44.328125 \nQ 39.84375 48.1875 33.796875 48.1875 \nQ 26.515625 48.1875 22.3125 43.546875 \nQ 18.109375 38.921875 18.109375 30.90625 \nL 18.109375 0 \nL 9.078125 0 \nL 9.078125 54.6875 \nL 18.109375 54.6875 \nL 18.109375 46.1875 \nQ 21.34375 51.125 25.703125 53.5625 \nQ 30.078125 56 35.796875 56 \nQ 45.21875 56 50.046875 50.171875 \nQ 54.890625 44.34375 54.890625 33.015625 \nz\n\" id=\"DejaVuSans-110\"/>\n      <path d=\"M 41.109375 46.296875 \nQ 39.59375 47.171875 37.8125 47.578125 \nQ 36.03125 48 33.890625 48 \nQ 26.265625 48 22.1875 43.046875 \nQ 18.109375 38.09375 18.109375 28.8125 \nL 18.109375 0 \nL 9.078125 0 \nL 9.078125 54.6875 \nL 18.109375 54.6875 \nL 18.109375 46.1875 \nQ 20.953125 51.171875 25.484375 53.578125 \nQ 30.03125 56 36.53125 56 \nQ 37.453125 56 38.578125 55.875 \nQ 39.703125 55.765625 41.0625 55.515625 \nz\n\" id=\"DejaVuSans-114\"/>\n      <path d=\"M 34.28125 27.484375 \nQ 23.390625 27.484375 19.1875 25 \nQ 14.984375 22.515625 14.984375 16.5 \nQ 14.984375 11.71875 18.140625 8.90625 \nQ 21.296875 6.109375 26.703125 6.109375 \nQ 34.1875 6.109375 38.703125 11.40625 \nQ 43.21875 16.703125 43.21875 25.484375 \nL 43.21875 27.484375 \nz\nM 52.203125 31.203125 \nL 52.203125 0 \nL 43.21875 0 \nL 43.21875 8.296875 \nQ 40.140625 3.328125 35.546875 0.953125 \nQ 30.953125 -1.421875 24.3125 -1.421875 \nQ 15.921875 -1.421875 10.953125 3.296875 \nQ 6 8.015625 6 15.921875 \nQ 6 25.140625 12.171875 29.828125 \nQ 18.359375 34.515625 30.609375 34.515625 \nL 43.21875 34.515625 \nL 43.21875 35.40625 \nQ 43.21875 41.609375 39.140625 45 \nQ 35.0625 48.390625 27.6875 48.390625 \nQ 23 48.390625 18.546875 47.265625 \nQ 14.109375 46.140625 10.015625 43.890625 \nL 10.015625 52.203125 \nQ 14.9375 54.109375 19.578125 55.046875 \nQ 24.21875 56 28.609375 56 \nQ 40.484375 56 46.34375 49.84375 \nQ 52.203125 43.703125 52.203125 31.203125 \nz\n\" id=\"DejaVuSans-97\"/>\n      <path d=\"M 9.421875 75.984375 \nL 18.40625 75.984375 \nL 18.40625 0 \nL 9.421875 0 \nz\n\" id=\"DejaVuSans-108\"/>\n      <path d=\"M 9.421875 54.6875 \nL 18.40625 54.6875 \nL 18.40625 0 \nL 9.421875 0 \nz\nM 9.421875 75.984375 \nL 18.40625 75.984375 \nL 18.40625 64.59375 \nL 9.421875 64.59375 \nz\n\" id=\"DejaVuSans-105\"/>\n      <path d=\"M 5.515625 54.6875 \nL 48.1875 54.6875 \nL 48.1875 46.484375 \nL 14.40625 7.171875 \nL 48.1875 7.171875 \nL 48.1875 0 \nL 4.296875 0 \nL 4.296875 8.203125 \nL 38.09375 47.515625 \nL 5.515625 47.515625 \nz\n\" id=\"DejaVuSans-122\"/>\n      <path d=\"M 45.40625 46.390625 \nL 45.40625 75.984375 \nL 54.390625 75.984375 \nL 54.390625 0 \nL 45.40625 0 \nL 45.40625 8.203125 \nQ 42.578125 3.328125 38.25 0.953125 \nQ 33.9375 -1.421875 27.875 -1.421875 \nQ 17.96875 -1.421875 11.734375 6.484375 \nQ 5.515625 14.40625 5.515625 27.296875 \nQ 5.515625 40.1875 11.734375 48.09375 \nQ 17.96875 56 27.875 56 \nQ 33.9375 56 38.25 53.625 \nQ 42.578125 51.265625 45.40625 46.390625 \nz\nM 14.796875 27.296875 \nQ 14.796875 17.390625 18.875 11.75 \nQ 22.953125 6.109375 30.078125 6.109375 \nQ 37.203125 6.109375 41.296875 11.75 \nQ 45.40625 17.390625 45.40625 27.296875 \nQ 45.40625 37.203125 41.296875 42.84375 \nQ 37.203125 48.484375 30.078125 48.484375 \nQ 22.953125 48.484375 18.875 42.84375 \nQ 14.796875 37.203125 14.796875 27.296875 \nz\n\" id=\"DejaVuSans-100\"/>\n      <path id=\"DejaVuSans-32\"/>\n      <path d=\"M 37.109375 75.984375 \nL 37.109375 68.5 \nL 28.515625 68.5 \nQ 23.6875 68.5 21.796875 66.546875 \nQ 19.921875 64.59375 19.921875 59.515625 \nL 19.921875 54.6875 \nL 34.71875 54.6875 \nL 34.71875 47.703125 \nL 19.921875 47.703125 \nL 19.921875 0 \nL 10.890625 0 \nL 10.890625 47.703125 \nL 2.296875 47.703125 \nL 2.296875 54.6875 \nL 10.890625 54.6875 \nL 10.890625 58.5 \nQ 10.890625 67.625 15.140625 71.796875 \nQ 19.390625 75.984375 28.609375 75.984375 \nz\n\" id=\"DejaVuSans-102\"/>\n      <path d=\"M 18.109375 8.203125 \nL 18.109375 -20.796875 \nL 9.078125 -20.796875 \nL 9.078125 54.6875 \nL 18.109375 54.6875 \nL 18.109375 46.390625 \nQ 20.953125 51.265625 25.265625 53.625 \nQ 29.59375 56 35.59375 56 \nQ 45.5625 56 51.78125 48.09375 \nQ 58.015625 40.1875 58.015625 27.296875 \nQ 58.015625 14.40625 51.78125 6.484375 \nQ 45.5625 -1.421875 35.59375 -1.421875 \nQ 29.59375 -1.421875 25.265625 0.953125 \nQ 20.953125 3.328125 18.109375 8.203125 \nz\nM 48.6875 27.296875 \nQ 48.6875 37.203125 44.609375 42.84375 \nQ 40.53125 48.484375 33.40625 48.484375 \nQ 26.265625 48.484375 22.1875 42.84375 \nQ 18.109375 37.203125 18.109375 27.296875 \nQ 18.109375 17.390625 22.1875 11.75 \nQ 26.265625 6.109375 33.40625 6.109375 \nQ 40.53125 6.109375 44.609375 11.75 \nQ 48.6875 17.390625 48.6875 27.296875 \nz\n\" id=\"DejaVuSans-112\"/>\n     </defs>\n     <g transform=\"translate(121.016094 256.715781)scale(0.1 -0.1)\">\n      <use xlink:href=\"#DejaVuSans-103\"/>\n      <use x=\"63.476562\" xlink:href=\"#DejaVuSans-101\"/>\n      <use x=\"125\" xlink:href=\"#DejaVuSans-110\"/>\n      <use x=\"188.378906\" xlink:href=\"#DejaVuSans-101\"/>\n      <use x=\"249.902344\" xlink:href=\"#DejaVuSans-114\"/>\n      <use x=\"291.015625\" xlink:href=\"#DejaVuSans-97\"/>\n      <use x=\"352.294922\" xlink:href=\"#DejaVuSans-108\"/>\n      <use x=\"380.078125\" xlink:href=\"#DejaVuSans-105\"/>\n      <use x=\"407.861328\" xlink:href=\"#DejaVuSans-122\"/>\n      <use x=\"460.351562\" xlink:href=\"#DejaVuSans-101\"/>\n      <use x=\"521.875\" xlink:href=\"#DejaVuSans-100\"/>\n      <use x=\"585.351562\" xlink:href=\"#DejaVuSans-32\"/>\n      <use x=\"617.138672\" xlink:href=\"#DejaVuSans-102\"/>\n      <use x=\"652.34375\" xlink:href=\"#DejaVuSans-112\"/>\n      <use x=\"715.820312\" xlink:href=\"#DejaVuSans-114\"/>\n     </g>\n    </g>\n   </g>\n   <g id=\"matplotlib.axis_2\">\n    <g id=\"ytick_1\">\n     <g id=\"line2d_6\">\n      <defs>\n       <path d=\"M 0 0 \nL -3.5 0 \n\" id=\"m057ac773a0\" style=\"stroke:#000000;stroke-width:0.8;\"/>\n      </defs>\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"50.14375\" xlink:href=\"#m057ac773a0\" y=\"228.439219\"/>\n      </g>\n     </g>\n     <g id=\"text_7\">\n      <!-- 0.30 -->\n      <g transform=\"translate(20.878125 232.238437)scale(0.1 -0.1)\">\n       <use xlink:href=\"#DejaVuSans-48\"/>\n       <use x=\"63.623047\" xlink:href=\"#DejaVuSans-46\"/>\n       <use x=\"95.410156\" xlink:href=\"#DejaVuSans-51\"/>\n       <use x=\"159.033203\" xlink:href=\"#DejaVuSans-48\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"ytick_2\">\n     <g id=\"line2d_7\">\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"50.14375\" xlink:href=\"#m057ac773a0\" y=\"201.259219\"/>\n      </g>\n     </g>\n     <g id=\"text_8\">\n      <!-- 0.35 -->\n      <defs>\n       <path d=\"M 10.796875 72.90625 \nL 49.515625 72.90625 \nL 49.515625 64.59375 \nL 19.828125 64.59375 \nL 19.828125 46.734375 \nQ 21.96875 47.46875 24.109375 47.828125 \nQ 26.265625 48.1875 28.421875 48.1875 \nQ 40.625 48.1875 47.75 41.5 \nQ 54.890625 34.8125 54.890625 23.390625 \nQ 54.890625 11.625 47.5625 5.09375 \nQ 40.234375 -1.421875 26.90625 -1.421875 \nQ 22.3125 -1.421875 17.546875 -0.640625 \nQ 12.796875 0.140625 7.71875 1.703125 \nL 7.71875 11.625 \nQ 12.109375 9.234375 16.796875 8.0625 \nQ 21.484375 6.890625 26.703125 6.890625 \nQ 35.15625 6.890625 40.078125 11.328125 \nQ 45.015625 15.765625 45.015625 23.390625 \nQ 45.015625 31 40.078125 35.4375 \nQ 35.15625 39.890625 26.703125 39.890625 \nQ 22.75 39.890625 18.8125 39.015625 \nQ 14.890625 38.140625 10.796875 36.28125 \nz\n\" id=\"DejaVuSans-53\"/>\n      </defs>\n      <g transform=\"translate(20.878125 205.058437)scale(0.1 -0.1)\">\n       <use xlink:href=\"#DejaVuSans-48\"/>\n       <use x=\"63.623047\" xlink:href=\"#DejaVuSans-46\"/>\n       <use x=\"95.410156\" xlink:href=\"#DejaVuSans-51\"/>\n       <use x=\"159.033203\" xlink:href=\"#DejaVuSans-53\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"ytick_3\">\n     <g id=\"line2d_8\">\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"50.14375\" xlink:href=\"#m057ac773a0\" y=\"174.079219\"/>\n      </g>\n     </g>\n     <g id=\"text_9\">\n      <!-- 0.40 -->\n      <g transform=\"translate(20.878125 177.878437)scale(0.1 -0.1)\">\n       <use xlink:href=\"#DejaVuSans-48\"/>\n       <use x=\"63.623047\" xlink:href=\"#DejaVuSans-46\"/>\n       <use x=\"95.410156\" xlink:href=\"#DejaVuSans-52\"/>\n       <use x=\"159.033203\" xlink:href=\"#DejaVuSans-48\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"ytick_4\">\n     <g id=\"line2d_9\">\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"50.14375\" xlink:href=\"#m057ac773a0\" y=\"146.899219\"/>\n      </g>\n     </g>\n     <g id=\"text_10\">\n      <!-- 0.45 -->\n      <g transform=\"translate(20.878125 150.698437)scale(0.1 -0.1)\">\n       <use xlink:href=\"#DejaVuSans-48\"/>\n       <use x=\"63.623047\" xlink:href=\"#DejaVuSans-46\"/>\n       <use x=\"95.410156\" xlink:href=\"#DejaVuSans-52\"/>\n       <use x=\"159.033203\" xlink:href=\"#DejaVuSans-53\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"ytick_5\">\n     <g id=\"line2d_10\">\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"50.14375\" xlink:href=\"#m057ac773a0\" y=\"119.719219\"/>\n      </g>\n     </g>\n     <g id=\"text_11\">\n      <!-- 0.50 -->\n      <g transform=\"translate(20.878125 123.518437)scale(0.1 -0.1)\">\n       <use xlink:href=\"#DejaVuSans-48\"/>\n       <use x=\"63.623047\" xlink:href=\"#DejaVuSans-46\"/>\n       <use x=\"95.410156\" xlink:href=\"#DejaVuSans-53\"/>\n       <use x=\"159.033203\" xlink:href=\"#DejaVuSans-48\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"ytick_6\">\n     <g id=\"line2d_11\">\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"50.14375\" xlink:href=\"#m057ac773a0\" y=\"92.539219\"/>\n      </g>\n     </g>\n     <g id=\"text_12\">\n      <!-- 0.55 -->\n      <g transform=\"translate(20.878125 96.338437)scale(0.1 -0.1)\">\n       <use xlink:href=\"#DejaVuSans-48\"/>\n       <use x=\"63.623047\" xlink:href=\"#DejaVuSans-46\"/>\n       <use x=\"95.410156\" xlink:href=\"#DejaVuSans-53\"/>\n       <use x=\"159.033203\" xlink:href=\"#DejaVuSans-53\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"ytick_7\">\n     <g id=\"line2d_12\">\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"50.14375\" xlink:href=\"#m057ac773a0\" y=\"65.359219\"/>\n      </g>\n     </g>\n     <g id=\"text_13\">\n      <!-- 0.60 -->\n      <defs>\n       <path d=\"M 33.015625 40.375 \nQ 26.375 40.375 22.484375 35.828125 \nQ 18.609375 31.296875 18.609375 23.390625 \nQ 18.609375 15.53125 22.484375 10.953125 \nQ 26.375 6.390625 33.015625 6.390625 \nQ 39.65625 6.390625 43.53125 10.953125 \nQ 47.40625 15.53125 47.40625 23.390625 \nQ 47.40625 31.296875 43.53125 35.828125 \nQ 39.65625 40.375 33.015625 40.375 \nz\nM 52.59375 71.296875 \nL 52.59375 62.3125 \nQ 48.875 64.0625 45.09375 64.984375 \nQ 41.3125 65.921875 37.59375 65.921875 \nQ 27.828125 65.921875 22.671875 59.328125 \nQ 17.53125 52.734375 16.796875 39.40625 \nQ 19.671875 43.65625 24.015625 45.921875 \nQ 28.375 48.1875 33.59375 48.1875 \nQ 44.578125 48.1875 50.953125 41.515625 \nQ 57.328125 34.859375 57.328125 23.390625 \nQ 57.328125 12.15625 50.6875 5.359375 \nQ 44.046875 -1.421875 33.015625 -1.421875 \nQ 20.359375 -1.421875 13.671875 8.265625 \nQ 6.984375 17.96875 6.984375 36.375 \nQ 6.984375 53.65625 15.1875 63.9375 \nQ 23.390625 74.21875 37.203125 74.21875 \nQ 40.921875 74.21875 44.703125 73.484375 \nQ 48.484375 72.75 52.59375 71.296875 \nz\n\" id=\"DejaVuSans-54\"/>\n      </defs>\n      <g transform=\"translate(20.878125 69.158437)scale(0.1 -0.1)\">\n       <use xlink:href=\"#DejaVuSans-48\"/>\n       <use x=\"63.623047\" xlink:href=\"#DejaVuSans-46\"/>\n       <use x=\"95.410156\" xlink:href=\"#DejaVuSans-54\"/>\n       <use x=\"159.033203\" xlink:href=\"#DejaVuSans-48\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"ytick_8\">\n     <g id=\"line2d_13\">\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"50.14375\" xlink:href=\"#m057ac773a0\" y=\"38.179219\"/>\n      </g>\n     </g>\n     <g id=\"text_14\">\n      <!-- 0.65 -->\n      <g transform=\"translate(20.878125 41.978437)scale(0.1 -0.1)\">\n       <use xlink:href=\"#DejaVuSans-48\"/>\n       <use x=\"63.623047\" xlink:href=\"#DejaVuSans-46\"/>\n       <use x=\"95.410156\" xlink:href=\"#DejaVuSans-54\"/>\n       <use x=\"159.033203\" xlink:href=\"#DejaVuSans-53\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"ytick_9\">\n     <g id=\"line2d_14\">\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"50.14375\" xlink:href=\"#m057ac773a0\" y=\"10.999219\"/>\n      </g>\n     </g>\n     <g id=\"text_15\">\n      <!-- 0.70 -->\n      <defs>\n       <path d=\"M 8.203125 72.90625 \nL 55.078125 72.90625 \nL 55.078125 68.703125 \nL 28.609375 0 \nL 18.3125 0 \nL 43.21875 64.59375 \nL 8.203125 64.59375 \nz\n\" id=\"DejaVuSans-55\"/>\n      </defs>\n      <g transform=\"translate(20.878125 14.798437)scale(0.1 -0.1)\">\n       <use xlink:href=\"#DejaVuSans-48\"/>\n       <use x=\"63.623047\" xlink:href=\"#DejaVuSans-46\"/>\n       <use x=\"95.410156\" xlink:href=\"#DejaVuSans-55\"/>\n       <use x=\"159.033203\" xlink:href=\"#DejaVuSans-48\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"text_16\">\n     <!-- generalized fnr -->\n     <g transform=\"translate(14.798438 157.561406)rotate(-90)scale(0.1 -0.1)\">\n      <use xlink:href=\"#DejaVuSans-103\"/>\n      <use x=\"63.476562\" xlink:href=\"#DejaVuSans-101\"/>\n      <use x=\"125\" xlink:href=\"#DejaVuSans-110\"/>\n      <use x=\"188.378906\" xlink:href=\"#DejaVuSans-101\"/>\n      <use x=\"249.902344\" xlink:href=\"#DejaVuSans-114\"/>\n      <use x=\"291.015625\" xlink:href=\"#DejaVuSans-97\"/>\n      <use x=\"352.294922\" xlink:href=\"#DejaVuSans-108\"/>\n      <use x=\"380.078125\" xlink:href=\"#DejaVuSans-105\"/>\n      <use x=\"407.861328\" xlink:href=\"#DejaVuSans-122\"/>\n      <use x=\"460.351562\" xlink:href=\"#DejaVuSans-101\"/>\n      <use x=\"521.875\" xlink:href=\"#DejaVuSans-100\"/>\n      <use x=\"585.351562\" xlink:href=\"#DejaVuSans-32\"/>\n      <use x=\"617.138672\" xlink:href=\"#DejaVuSans-102\"/>\n      <use x=\"652.34375\" xlink:href=\"#DejaVuSans-110\"/>\n      <use x=\"715.722656\" xlink:href=\"#DejaVuSans-114\"/>\n     </g>\n    </g>\n   </g>\n   <g id=\"line2d_15\">\n    <path clip-path=\"url(#pf1fb1c960f)\" d=\"M 66.127718 266.995469 \nL 100.52783 -1 \n\" style=\"fill:none;stroke:#0000ff;stroke-linecap:square;stroke-width:1.5;\"/>\n   </g>\n   <g id=\"line2d_16\">\n    <path clip-path=\"url(#pf1fb1c960f)\" d=\"M 106.745455 266.995469 \nL 220.01875 17.794219 \n\" style=\"fill:none;stroke:#ff0000;stroke-linecap:square;stroke-width:1.5;\"/>\n   </g>\n   <g id=\"line2d_17\">\n    <path clip-path=\"url(#pf1fb1c960f)\" d=\"M 50.14375 119.610014 \nL 501.739162 119.610014 \n\" style=\"fill:none;stroke:#808080;stroke-dasharray:5.55,2.4;stroke-dashoffset:0;stroke-width:1.5;\"/>\n   </g>\n   <g id=\"patch_3\">\n    <path d=\"M 50.14375 228.439219 \nL 50.14375 10.999219 \n\" style=\"fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;\"/>\n   </g>\n   <g id=\"patch_4\">\n    <path d=\"M 267.58375 228.439219 \nL 267.58375 10.999219 \n\" style=\"fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;\"/>\n   </g>\n   <g id=\"patch_5\">\n    <path d=\"M 50.14375 228.439219 \nL 267.58375 228.439219 \n\" style=\"fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;\"/>\n   </g>\n   <g id=\"patch_6\">\n    <path d=\"M 50.14375 10.999219 \nL 267.58375 10.999219 \n\" style=\"fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;\"/>\n   </g>\n   <g id=\"legend_1\">\n    <g id=\"patch_7\">\n     <path d=\"M 283.28135 107.067969 \nL 491.539162 107.067969 \nQ 493.539162 107.067969 493.539162 105.067969 \nL 493.539162 17.999219 \nQ 493.539162 15.999219 491.539162 15.999219 \nL 283.28135 15.999219 \nQ 281.28135 15.999219 281.28135 17.999219 \nL 281.28135 105.067969 \nQ 281.28135 107.067969 283.28135 107.067969 \nz\n\" style=\"fill:#ffffff;opacity:0.8;stroke:#cccccc;stroke-linejoin:miter;\"/>\n    </g>\n    <g id=\"line2d_18\">\n     <path d=\"M 285.28135 24.097656 \nL 305.28135 24.097656 \n\" style=\"fill:none;stroke:#0000ff;stroke-linecap:square;stroke-width:1.5;\"/>\n    </g>\n    <g id=\"line2d_19\"/>\n    <g id=\"text_17\">\n     <!-- All calibrated classifiers (Females) -->\n     <defs>\n      <path d=\"M 34.1875 63.1875 \nL 20.796875 26.90625 \nL 47.609375 26.90625 \nz\nM 28.609375 72.90625 \nL 39.796875 72.90625 \nL 67.578125 0 \nL 57.328125 0 \nL 50.6875 18.703125 \nL 17.828125 18.703125 \nL 11.1875 0 \nL 0.78125 0 \nz\n\" id=\"DejaVuSans-65\"/>\n      <path d=\"M 48.78125 52.59375 \nL 48.78125 44.1875 \nQ 44.96875 46.296875 41.140625 47.34375 \nQ 37.3125 48.390625 33.40625 48.390625 \nQ 24.65625 48.390625 19.8125 42.84375 \nQ 14.984375 37.3125 14.984375 27.296875 \nQ 14.984375 17.28125 19.8125 11.734375 \nQ 24.65625 6.203125 33.40625 6.203125 \nQ 37.3125 6.203125 41.140625 7.25 \nQ 44.96875 8.296875 48.78125 10.40625 \nL 48.78125 2.09375 \nQ 45.015625 0.34375 40.984375 -0.53125 \nQ 36.96875 -1.421875 32.421875 -1.421875 \nQ 20.0625 -1.421875 12.78125 6.34375 \nQ 5.515625 14.109375 5.515625 27.296875 \nQ 5.515625 40.671875 12.859375 48.328125 \nQ 20.21875 56 33.015625 56 \nQ 37.15625 56 41.109375 55.140625 \nQ 45.0625 54.296875 48.78125 52.59375 \nz\n\" id=\"DejaVuSans-99\"/>\n      <path d=\"M 48.6875 27.296875 \nQ 48.6875 37.203125 44.609375 42.84375 \nQ 40.53125 48.484375 33.40625 48.484375 \nQ 26.265625 48.484375 22.1875 42.84375 \nQ 18.109375 37.203125 18.109375 27.296875 \nQ 18.109375 17.390625 22.1875 11.75 \nQ 26.265625 6.109375 33.40625 6.109375 \nQ 40.53125 6.109375 44.609375 11.75 \nQ 48.6875 17.390625 48.6875 27.296875 \nz\nM 18.109375 46.390625 \nQ 20.953125 51.265625 25.265625 53.625 \nQ 29.59375 56 35.59375 56 \nQ 45.5625 56 51.78125 48.09375 \nQ 58.015625 40.1875 58.015625 27.296875 \nQ 58.015625 14.40625 51.78125 6.484375 \nQ 45.5625 -1.421875 35.59375 -1.421875 \nQ 29.59375 -1.421875 25.265625 0.953125 \nQ 20.953125 3.328125 18.109375 8.203125 \nL 18.109375 0 \nL 9.078125 0 \nL 9.078125 75.984375 \nL 18.109375 75.984375 \nz\n\" id=\"DejaVuSans-98\"/>\n      <path d=\"M 18.3125 70.21875 \nL 18.3125 54.6875 \nL 36.8125 54.6875 \nL 36.8125 47.703125 \nL 18.3125 47.703125 \nL 18.3125 18.015625 \nQ 18.3125 11.328125 20.140625 9.421875 \nQ 21.96875 7.515625 27.59375 7.515625 \nL 36.8125 7.515625 \nL 36.8125 0 \nL 27.59375 0 \nQ 17.1875 0 13.234375 3.875 \nQ 9.28125 7.765625 9.28125 18.015625 \nL 9.28125 47.703125 \nL 2.6875 47.703125 \nL 2.6875 54.6875 \nL 9.28125 54.6875 \nL 9.28125 70.21875 \nz\n\" id=\"DejaVuSans-116\"/>\n      <path d=\"M 44.28125 53.078125 \nL 44.28125 44.578125 \nQ 40.484375 46.53125 36.375 47.5 \nQ 32.28125 48.484375 27.875 48.484375 \nQ 21.1875 48.484375 17.84375 46.4375 \nQ 14.5 44.390625 14.5 40.28125 \nQ 14.5 37.15625 16.890625 35.375 \nQ 19.28125 33.59375 26.515625 31.984375 \nL 29.59375 31.296875 \nQ 39.15625 29.25 43.1875 25.515625 \nQ 47.21875 21.78125 47.21875 15.09375 \nQ 47.21875 7.46875 41.1875 3.015625 \nQ 35.15625 -1.421875 24.609375 -1.421875 \nQ 20.21875 -1.421875 15.453125 -0.5625 \nQ 10.6875 0.296875 5.421875 2 \nL 5.421875 11.28125 \nQ 10.40625 8.6875 15.234375 7.390625 \nQ 20.0625 6.109375 24.8125 6.109375 \nQ 31.15625 6.109375 34.5625 8.28125 \nQ 37.984375 10.453125 37.984375 14.40625 \nQ 37.984375 18.0625 35.515625 20.015625 \nQ 33.0625 21.96875 24.703125 23.78125 \nL 21.578125 24.515625 \nQ 13.234375 26.265625 9.515625 29.90625 \nQ 5.8125 33.546875 5.8125 39.890625 \nQ 5.8125 47.609375 11.28125 51.796875 \nQ 16.75 56 26.8125 56 \nQ 31.78125 56 36.171875 55.265625 \nQ 40.578125 54.546875 44.28125 53.078125 \nz\n\" id=\"DejaVuSans-115\"/>\n      <path d=\"M 31 75.875 \nQ 24.46875 64.65625 21.28125 53.65625 \nQ 18.109375 42.671875 18.109375 31.390625 \nQ 18.109375 20.125 21.3125 9.0625 \nQ 24.515625 -2 31 -13.1875 \nL 23.1875 -13.1875 \nQ 15.875 -1.703125 12.234375 9.375 \nQ 8.59375 20.453125 8.59375 31.390625 \nQ 8.59375 42.28125 12.203125 53.3125 \nQ 15.828125 64.359375 23.1875 75.875 \nz\n\" id=\"DejaVuSans-40\"/>\n      <path d=\"M 9.8125 72.90625 \nL 51.703125 72.90625 \nL 51.703125 64.59375 \nL 19.671875 64.59375 \nL 19.671875 43.109375 \nL 48.578125 43.109375 \nL 48.578125 34.8125 \nL 19.671875 34.8125 \nL 19.671875 0 \nL 9.8125 0 \nz\n\" id=\"DejaVuSans-70\"/>\n      <path d=\"M 52 44.1875 \nQ 55.375 50.25 60.0625 53.125 \nQ 64.75 56 71.09375 56 \nQ 79.640625 56 84.28125 50.015625 \nQ 88.921875 44.046875 88.921875 33.015625 \nL 88.921875 0 \nL 79.890625 0 \nL 79.890625 32.71875 \nQ 79.890625 40.578125 77.09375 44.375 \nQ 74.3125 48.1875 68.609375 48.1875 \nQ 61.625 48.1875 57.5625 43.546875 \nQ 53.515625 38.921875 53.515625 30.90625 \nL 53.515625 0 \nL 44.484375 0 \nL 44.484375 32.71875 \nQ 44.484375 40.625 41.703125 44.40625 \nQ 38.921875 48.1875 33.109375 48.1875 \nQ 26.21875 48.1875 22.15625 43.53125 \nQ 18.109375 38.875 18.109375 30.90625 \nL 18.109375 0 \nL 9.078125 0 \nL 9.078125 54.6875 \nL 18.109375 54.6875 \nL 18.109375 46.1875 \nQ 21.1875 51.21875 25.484375 53.609375 \nQ 29.78125 56 35.6875 56 \nQ 41.65625 56 45.828125 52.96875 \nQ 50 49.953125 52 44.1875 \nz\n\" id=\"DejaVuSans-109\"/>\n      <path d=\"M 8.015625 75.875 \nL 15.828125 75.875 \nQ 23.140625 64.359375 26.78125 53.3125 \nQ 30.421875 42.28125 30.421875 31.390625 \nQ 30.421875 20.453125 26.78125 9.375 \nQ 23.140625 -1.703125 15.828125 -13.1875 \nL 8.015625 -13.1875 \nQ 14.5 -2 17.703125 9.0625 \nQ 20.90625 20.125 20.90625 31.390625 \nQ 20.90625 42.671875 17.703125 53.65625 \nQ 14.5 64.65625 8.015625 75.875 \nz\n\" id=\"DejaVuSans-41\"/>\n     </defs>\n     <g transform=\"translate(313.28135 27.597656)scale(0.1 -0.1)\">\n      <use xlink:href=\"#DejaVuSans-65\"/>\n      <use x=\"68.408203\" xlink:href=\"#DejaVuSans-108\"/>\n      <use x=\"96.191406\" xlink:href=\"#DejaVuSans-108\"/>\n      <use x=\"123.974609\" xlink:href=\"#DejaVuSans-32\"/>\n      <use x=\"155.761719\" xlink:href=\"#DejaVuSans-99\"/>\n      <use x=\"210.742188\" xlink:href=\"#DejaVuSans-97\"/>\n      <use x=\"272.021484\" xlink:href=\"#DejaVuSans-108\"/>\n      <use x=\"299.804688\" xlink:href=\"#DejaVuSans-105\"/>\n      <use x=\"327.587891\" xlink:href=\"#DejaVuSans-98\"/>\n      <use x=\"391.064453\" xlink:href=\"#DejaVuSans-114\"/>\n      <use x=\"432.177734\" xlink:href=\"#DejaVuSans-97\"/>\n      <use x=\"493.457031\" xlink:href=\"#DejaVuSans-116\"/>\n      <use x=\"532.666016\" xlink:href=\"#DejaVuSans-101\"/>\n      <use x=\"594.189453\" xlink:href=\"#DejaVuSans-100\"/>\n      <use x=\"657.666016\" xlink:href=\"#DejaVuSans-32\"/>\n      <use x=\"689.453125\" xlink:href=\"#DejaVuSans-99\"/>\n      <use x=\"744.433594\" xlink:href=\"#DejaVuSans-108\"/>\n      <use x=\"772.216797\" xlink:href=\"#DejaVuSans-97\"/>\n      <use x=\"833.496094\" xlink:href=\"#DejaVuSans-115\"/>\n      <use x=\"885.595703\" xlink:href=\"#DejaVuSans-115\"/>\n      <use x=\"937.695312\" xlink:href=\"#DejaVuSans-105\"/>\n      <use x=\"965.478516\" xlink:href=\"#DejaVuSans-102\"/>\n      <use x=\"1000.683594\" xlink:href=\"#DejaVuSans-105\"/>\n      <use x=\"1028.466797\" xlink:href=\"#DejaVuSans-101\"/>\n      <use x=\"1089.990234\" xlink:href=\"#DejaVuSans-114\"/>\n      <use x=\"1131.103516\" xlink:href=\"#DejaVuSans-115\"/>\n      <use x=\"1183.203125\" xlink:href=\"#DejaVuSans-32\"/>\n      <use x=\"1214.990234\" xlink:href=\"#DejaVuSans-40\"/>\n      <use x=\"1254.003906\" xlink:href=\"#DejaVuSans-70\"/>\n      <use x=\"1311.445312\" xlink:href=\"#DejaVuSans-101\"/>\n      <use x=\"1372.96875\" xlink:href=\"#DejaVuSans-109\"/>\n      <use x=\"1470.380859\" xlink:href=\"#DejaVuSans-97\"/>\n      <use x=\"1531.660156\" xlink:href=\"#DejaVuSans-108\"/>\n      <use x=\"1559.443359\" xlink:href=\"#DejaVuSans-101\"/>\n      <use x=\"1620.966797\" xlink:href=\"#DejaVuSans-115\"/>\n      <use x=\"1673.066406\" xlink:href=\"#DejaVuSans-41\"/>\n     </g>\n    </g>\n    <g id=\"line2d_20\">\n     <path d=\"M 285.28135 38.775781 \nL 305.28135 38.775781 \n\" style=\"fill:none;stroke:#ff0000;stroke-linecap:square;stroke-width:1.5;\"/>\n    </g>\n    <g id=\"line2d_21\"/>\n    <g id=\"text_18\">\n     <!-- All calibrated classifiers (Males) -->\n     <defs>\n      <path d=\"M 9.8125 72.90625 \nL 24.515625 72.90625 \nL 43.109375 23.296875 \nL 61.8125 72.90625 \nL 76.515625 72.90625 \nL 76.515625 0 \nL 66.890625 0 \nL 66.890625 64.015625 \nL 48.09375 14.015625 \nL 38.1875 14.015625 \nL 19.390625 64.015625 \nL 19.390625 0 \nL 9.8125 0 \nz\n\" id=\"DejaVuSans-77\"/>\n     </defs>\n     <g transform=\"translate(313.28135 42.275781)scale(0.1 -0.1)\">\n      <use xlink:href=\"#DejaVuSans-65\"/>\n      <use x=\"68.408203\" xlink:href=\"#DejaVuSans-108\"/>\n      <use x=\"96.191406\" xlink:href=\"#DejaVuSans-108\"/>\n      <use x=\"123.974609\" xlink:href=\"#DejaVuSans-32\"/>\n      <use x=\"155.761719\" xlink:href=\"#DejaVuSans-99\"/>\n      <use x=\"210.742188\" xlink:href=\"#DejaVuSans-97\"/>\n      <use x=\"272.021484\" xlink:href=\"#DejaVuSans-108\"/>\n      <use x=\"299.804688\" xlink:href=\"#DejaVuSans-105\"/>\n      <use x=\"327.587891\" xlink:href=\"#DejaVuSans-98\"/>\n      <use x=\"391.064453\" xlink:href=\"#DejaVuSans-114\"/>\n      <use x=\"432.177734\" xlink:href=\"#DejaVuSans-97\"/>\n      <use x=\"493.457031\" xlink:href=\"#DejaVuSans-116\"/>\n      <use x=\"532.666016\" xlink:href=\"#DejaVuSans-101\"/>\n      <use x=\"594.189453\" xlink:href=\"#DejaVuSans-100\"/>\n      <use x=\"657.666016\" xlink:href=\"#DejaVuSans-32\"/>\n      <use x=\"689.453125\" xlink:href=\"#DejaVuSans-99\"/>\n      <use x=\"744.433594\" xlink:href=\"#DejaVuSans-108\"/>\n      <use x=\"772.216797\" xlink:href=\"#DejaVuSans-97\"/>\n      <use x=\"833.496094\" xlink:href=\"#DejaVuSans-115\"/>\n      <use x=\"885.595703\" xlink:href=\"#DejaVuSans-115\"/>\n      <use x=\"937.695312\" xlink:href=\"#DejaVuSans-105\"/>\n      <use x=\"965.478516\" xlink:href=\"#DejaVuSans-102\"/>\n      <use x=\"1000.683594\" xlink:href=\"#DejaVuSans-105\"/>\n      <use x=\"1028.466797\" xlink:href=\"#DejaVuSans-101\"/>\n      <use x=\"1089.990234\" xlink:href=\"#DejaVuSans-114\"/>\n      <use x=\"1131.103516\" xlink:href=\"#DejaVuSans-115\"/>\n      <use x=\"1183.203125\" xlink:href=\"#DejaVuSans-32\"/>\n      <use x=\"1214.990234\" xlink:href=\"#DejaVuSans-40\"/>\n      <use x=\"1254.003906\" xlink:href=\"#DejaVuSans-77\"/>\n      <use x=\"1340.283203\" xlink:href=\"#DejaVuSans-97\"/>\n      <use x=\"1401.5625\" xlink:href=\"#DejaVuSans-108\"/>\n      <use x=\"1429.345703\" xlink:href=\"#DejaVuSans-101\"/>\n      <use x=\"1490.869141\" xlink:href=\"#DejaVuSans-115\"/>\n      <use x=\"1542.96875\" xlink:href=\"#DejaVuSans-41\"/>\n     </g>\n    </g>\n    <g id=\"PathCollection_5\">\n     <g>\n      <use style=\"fill:#0000ff;stroke:#0000ff;\" x=\"295.28135\" xlink:href=\"#m145838425a\" y=\"54.328906\"/>\n     </g>\n    </g>\n    <g id=\"text_19\">\n     <!-- Original classifier (Females) -->\n     <defs>\n      <path d=\"M 39.40625 66.21875 \nQ 28.65625 66.21875 22.328125 58.203125 \nQ 16.015625 50.203125 16.015625 36.375 \nQ 16.015625 22.609375 22.328125 14.59375 \nQ 28.65625 6.59375 39.40625 6.59375 \nQ 50.140625 6.59375 56.421875 14.59375 \nQ 62.703125 22.609375 62.703125 36.375 \nQ 62.703125 50.203125 56.421875 58.203125 \nQ 50.140625 66.21875 39.40625 66.21875 \nz\nM 39.40625 74.21875 \nQ 54.734375 74.21875 63.90625 63.9375 \nQ 73.09375 53.65625 73.09375 36.375 \nQ 73.09375 19.140625 63.90625 8.859375 \nQ 54.734375 -1.421875 39.40625 -1.421875 \nQ 24.03125 -1.421875 14.8125 8.828125 \nQ 5.609375 19.09375 5.609375 36.375 \nQ 5.609375 53.65625 14.8125 63.9375 \nQ 24.03125 74.21875 39.40625 74.21875 \nz\n\" id=\"DejaVuSans-79\"/>\n     </defs>\n     <g transform=\"translate(313.28135 56.953906)scale(0.1 -0.1)\">\n      <use xlink:href=\"#DejaVuSans-79\"/>\n      <use x=\"78.710938\" xlink:href=\"#DejaVuSans-114\"/>\n      <use x=\"119.824219\" xlink:href=\"#DejaVuSans-105\"/>\n      <use x=\"147.607422\" xlink:href=\"#DejaVuSans-103\"/>\n      <use x=\"211.083984\" xlink:href=\"#DejaVuSans-105\"/>\n      <use x=\"238.867188\" xlink:href=\"#DejaVuSans-110\"/>\n      <use x=\"302.246094\" xlink:href=\"#DejaVuSans-97\"/>\n      <use x=\"363.525391\" xlink:href=\"#DejaVuSans-108\"/>\n      <use x=\"391.308594\" xlink:href=\"#DejaVuSans-32\"/>\n      <use x=\"423.095703\" xlink:href=\"#DejaVuSans-99\"/>\n      <use x=\"478.076172\" xlink:href=\"#DejaVuSans-108\"/>\n      <use x=\"505.859375\" xlink:href=\"#DejaVuSans-97\"/>\n      <use x=\"567.138672\" xlink:href=\"#DejaVuSans-115\"/>\n      <use x=\"619.238281\" xlink:href=\"#DejaVuSans-115\"/>\n      <use x=\"671.337891\" xlink:href=\"#DejaVuSans-105\"/>\n      <use x=\"699.121094\" xlink:href=\"#DejaVuSans-102\"/>\n      <use x=\"734.326172\" xlink:href=\"#DejaVuSans-105\"/>\n      <use x=\"762.109375\" xlink:href=\"#DejaVuSans-101\"/>\n      <use x=\"823.632812\" xlink:href=\"#DejaVuSans-114\"/>\n      <use x=\"864.746094\" xlink:href=\"#DejaVuSans-32\"/>\n      <use x=\"896.533203\" xlink:href=\"#DejaVuSans-40\"/>\n      <use x=\"935.546875\" xlink:href=\"#DejaVuSans-70\"/>\n      <use x=\"992.988281\" xlink:href=\"#DejaVuSans-101\"/>\n      <use x=\"1054.511719\" xlink:href=\"#DejaVuSans-109\"/>\n      <use x=\"1151.923828\" xlink:href=\"#DejaVuSans-97\"/>\n      <use x=\"1213.203125\" xlink:href=\"#DejaVuSans-108\"/>\n      <use x=\"1240.986328\" xlink:href=\"#DejaVuSans-101\"/>\n      <use x=\"1302.509766\" xlink:href=\"#DejaVuSans-115\"/>\n      <use x=\"1354.609375\" xlink:href=\"#DejaVuSans-41\"/>\n     </g>\n    </g>\n    <g id=\"PathCollection_6\">\n     <g>\n      <use style=\"fill:#ff0000;stroke:#ff0000;\" x=\"295.28135\" xlink:href=\"#m15d4b4a51c\" y=\"69.007031\"/>\n     </g>\n    </g>\n    <g id=\"text_20\">\n     <!-- Original classifier (Males) -->\n     <g transform=\"translate(313.28135 71.632031)scale(0.1 -0.1)\">\n      <use xlink:href=\"#DejaVuSans-79\"/>\n      <use x=\"78.710938\" xlink:href=\"#DejaVuSans-114\"/>\n      <use x=\"119.824219\" xlink:href=\"#DejaVuSans-105\"/>\n      <use x=\"147.607422\" xlink:href=\"#DejaVuSans-103\"/>\n      <use x=\"211.083984\" xlink:href=\"#DejaVuSans-105\"/>\n      <use x=\"238.867188\" xlink:href=\"#DejaVuSans-110\"/>\n      <use x=\"302.246094\" xlink:href=\"#DejaVuSans-97\"/>\n      <use x=\"363.525391\" xlink:href=\"#DejaVuSans-108\"/>\n      <use x=\"391.308594\" xlink:href=\"#DejaVuSans-32\"/>\n      <use x=\"423.095703\" xlink:href=\"#DejaVuSans-99\"/>\n      <use x=\"478.076172\" xlink:href=\"#DejaVuSans-108\"/>\n      <use x=\"505.859375\" xlink:href=\"#DejaVuSans-97\"/>\n      <use x=\"567.138672\" xlink:href=\"#DejaVuSans-115\"/>\n      <use x=\"619.238281\" xlink:href=\"#DejaVuSans-115\"/>\n      <use x=\"671.337891\" xlink:href=\"#DejaVuSans-105\"/>\n      <use x=\"699.121094\" xlink:href=\"#DejaVuSans-102\"/>\n      <use x=\"734.326172\" xlink:href=\"#DejaVuSans-105\"/>\n      <use x=\"762.109375\" xlink:href=\"#DejaVuSans-101\"/>\n      <use x=\"823.632812\" xlink:href=\"#DejaVuSans-114\"/>\n      <use x=\"864.746094\" xlink:href=\"#DejaVuSans-32\"/>\n      <use x=\"896.533203\" xlink:href=\"#DejaVuSans-40\"/>\n      <use x=\"935.546875\" xlink:href=\"#DejaVuSans-77\"/>\n      <use x=\"1021.826172\" xlink:href=\"#DejaVuSans-97\"/>\n      <use x=\"1083.105469\" xlink:href=\"#DejaVuSans-108\"/>\n      <use x=\"1110.888672\" xlink:href=\"#DejaVuSans-101\"/>\n      <use x=\"1172.412109\" xlink:href=\"#DejaVuSans-115\"/>\n      <use x=\"1224.511719\" xlink:href=\"#DejaVuSans-41\"/>\n     </g>\n    </g>\n    <g id=\"PathCollection_7\">\n     <g>\n      <use style=\"fill:#0000ff;stroke:#0000ff;\" x=\"295.28135\" xlink:href=\"#m790b3b3eff\" y=\"83.685156\"/>\n     </g>\n    </g>\n    <g id=\"text_21\">\n     <!-- Post-processed classifier (Females) -->\n     <defs>\n      <path d=\"M 19.671875 64.796875 \nL 19.671875 37.40625 \nL 32.078125 37.40625 \nQ 38.96875 37.40625 42.71875 40.96875 \nQ 46.484375 44.53125 46.484375 51.125 \nQ 46.484375 57.671875 42.71875 61.234375 \nQ 38.96875 64.796875 32.078125 64.796875 \nz\nM 9.8125 72.90625 \nL 32.078125 72.90625 \nQ 44.34375 72.90625 50.609375 67.359375 \nQ 56.890625 61.8125 56.890625 51.125 \nQ 56.890625 40.328125 50.609375 34.8125 \nQ 44.34375 29.296875 32.078125 29.296875 \nL 19.671875 29.296875 \nL 19.671875 0 \nL 9.8125 0 \nz\n\" id=\"DejaVuSans-80\"/>\n      <path d=\"M 30.609375 48.390625 \nQ 23.390625 48.390625 19.1875 42.75 \nQ 14.984375 37.109375 14.984375 27.296875 \nQ 14.984375 17.484375 19.15625 11.84375 \nQ 23.34375 6.203125 30.609375 6.203125 \nQ 37.796875 6.203125 41.984375 11.859375 \nQ 46.1875 17.53125 46.1875 27.296875 \nQ 46.1875 37.015625 41.984375 42.703125 \nQ 37.796875 48.390625 30.609375 48.390625 \nz\nM 30.609375 56 \nQ 42.328125 56 49.015625 48.375 \nQ 55.71875 40.765625 55.71875 27.296875 \nQ 55.71875 13.875 49.015625 6.21875 \nQ 42.328125 -1.421875 30.609375 -1.421875 \nQ 18.84375 -1.421875 12.171875 6.21875 \nQ 5.515625 13.875 5.515625 27.296875 \nQ 5.515625 40.765625 12.171875 48.375 \nQ 18.84375 56 30.609375 56 \nz\n\" id=\"DejaVuSans-111\"/>\n      <path d=\"M 4.890625 31.390625 \nL 31.203125 31.390625 \nL 31.203125 23.390625 \nL 4.890625 23.390625 \nz\n\" id=\"DejaVuSans-45\"/>\n     </defs>\n     <g transform=\"translate(313.28135 86.310156)scale(0.1 -0.1)\">\n      <use xlink:href=\"#DejaVuSans-80\"/>\n      <use x=\"60.255859\" xlink:href=\"#DejaVuSans-111\"/>\n      <use x=\"121.4375\" xlink:href=\"#DejaVuSans-115\"/>\n      <use x=\"173.537109\" xlink:href=\"#DejaVuSans-116\"/>\n      <use x=\"212.746094\" xlink:href=\"#DejaVuSans-45\"/>\n      <use x=\"248.830078\" xlink:href=\"#DejaVuSans-112\"/>\n      <use x=\"312.306641\" xlink:href=\"#DejaVuSans-114\"/>\n      <use x=\"353.388672\" xlink:href=\"#DejaVuSans-111\"/>\n      <use x=\"414.570312\" xlink:href=\"#DejaVuSans-99\"/>\n      <use x=\"469.550781\" xlink:href=\"#DejaVuSans-101\"/>\n      <use x=\"531.074219\" xlink:href=\"#DejaVuSans-115\"/>\n      <use x=\"583.173828\" xlink:href=\"#DejaVuSans-115\"/>\n      <use x=\"635.273438\" xlink:href=\"#DejaVuSans-101\"/>\n      <use x=\"696.796875\" xlink:href=\"#DejaVuSans-100\"/>\n      <use x=\"760.273438\" xlink:href=\"#DejaVuSans-32\"/>\n      <use x=\"792.060547\" xlink:href=\"#DejaVuSans-99\"/>\n      <use x=\"847.041016\" xlink:href=\"#DejaVuSans-108\"/>\n      <use x=\"874.824219\" xlink:href=\"#DejaVuSans-97\"/>\n      <use x=\"936.103516\" xlink:href=\"#DejaVuSans-115\"/>\n      <use x=\"988.203125\" xlink:href=\"#DejaVuSans-115\"/>\n      <use x=\"1040.302734\" xlink:href=\"#DejaVuSans-105\"/>\n      <use x=\"1068.085938\" xlink:href=\"#DejaVuSans-102\"/>\n      <use x=\"1103.291016\" xlink:href=\"#DejaVuSans-105\"/>\n      <use x=\"1131.074219\" xlink:href=\"#DejaVuSans-101\"/>\n      <use x=\"1192.597656\" xlink:href=\"#DejaVuSans-114\"/>\n      <use x=\"1233.710938\" xlink:href=\"#DejaVuSans-32\"/>\n      <use x=\"1265.498047\" xlink:href=\"#DejaVuSans-40\"/>\n      <use x=\"1304.511719\" xlink:href=\"#DejaVuSans-70\"/>\n      <use x=\"1361.953125\" xlink:href=\"#DejaVuSans-101\"/>\n      <use x=\"1423.476562\" xlink:href=\"#DejaVuSans-109\"/>\n      <use x=\"1520.888672\" xlink:href=\"#DejaVuSans-97\"/>\n      <use x=\"1582.167969\" xlink:href=\"#DejaVuSans-108\"/>\n      <use x=\"1609.951172\" xlink:href=\"#DejaVuSans-101\"/>\n      <use x=\"1671.474609\" xlink:href=\"#DejaVuSans-115\"/>\n      <use x=\"1723.574219\" xlink:href=\"#DejaVuSans-41\"/>\n     </g>\n    </g>\n    <g id=\"PathCollection_8\">\n     <g>\n      <use style=\"fill:#ff0000;stroke:#ff0000;\" x=\"295.28135\" xlink:href=\"#m84d4c46120\" y=\"98.363281\"/>\n     </g>\n    </g>\n    <g id=\"text_22\">\n     <!-- Post-processed classifier (Males) -->\n     <g transform=\"translate(313.28135 100.988281)scale(0.1 -0.1)\">\n      <use xlink:href=\"#DejaVuSans-80\"/>\n      <use x=\"60.255859\" xlink:href=\"#DejaVuSans-111\"/>\n      <use x=\"121.4375\" xlink:href=\"#DejaVuSans-115\"/>\n      <use x=\"173.537109\" xlink:href=\"#DejaVuSans-116\"/>\n      <use x=\"212.746094\" xlink:href=\"#DejaVuSans-45\"/>\n      <use x=\"248.830078\" xlink:href=\"#DejaVuSans-112\"/>\n      <use x=\"312.306641\" xlink:href=\"#DejaVuSans-114\"/>\n      <use x=\"353.388672\" xlink:href=\"#DejaVuSans-111\"/>\n      <use x=\"414.570312\" xlink:href=\"#DejaVuSans-99\"/>\n      <use x=\"469.550781\" xlink:href=\"#DejaVuSans-101\"/>\n      <use x=\"531.074219\" xlink:href=\"#DejaVuSans-115\"/>\n      <use x=\"583.173828\" xlink:href=\"#DejaVuSans-115\"/>\n      <use x=\"635.273438\" xlink:href=\"#DejaVuSans-101\"/>\n      <use x=\"696.796875\" xlink:href=\"#DejaVuSans-100\"/>\n      <use x=\"760.273438\" xlink:href=\"#DejaVuSans-32\"/>\n      <use x=\"792.060547\" xlink:href=\"#DejaVuSans-99\"/>\n      <use x=\"847.041016\" xlink:href=\"#DejaVuSans-108\"/>\n      <use x=\"874.824219\" xlink:href=\"#DejaVuSans-97\"/>\n      <use x=\"936.103516\" xlink:href=\"#DejaVuSans-115\"/>\n      <use x=\"988.203125\" xlink:href=\"#DejaVuSans-115\"/>\n      <use x=\"1040.302734\" xlink:href=\"#DejaVuSans-105\"/>\n      <use x=\"1068.085938\" xlink:href=\"#DejaVuSans-102\"/>\n      <use x=\"1103.291016\" xlink:href=\"#DejaVuSans-105\"/>\n      <use x=\"1131.074219\" xlink:href=\"#DejaVuSans-101\"/>\n      <use x=\"1192.597656\" xlink:href=\"#DejaVuSans-114\"/>\n      <use x=\"1233.710938\" xlink:href=\"#DejaVuSans-32\"/>\n      <use x=\"1265.498047\" xlink:href=\"#DejaVuSans-40\"/>\n      <use x=\"1304.511719\" xlink:href=\"#DejaVuSans-77\"/>\n      <use x=\"1390.791016\" xlink:href=\"#DejaVuSans-97\"/>\n      <use x=\"1452.070312\" xlink:href=\"#DejaVuSans-108\"/>\n      <use x=\"1479.853516\" xlink:href=\"#DejaVuSans-101\"/>\n      <use x=\"1541.376953\" xlink:href=\"#DejaVuSans-115\"/>\n      <use x=\"1593.476562\" xlink:href=\"#DejaVuSans-41\"/>\n     </g>\n    </g>\n   </g>\n  </g>\n </g>\n <defs>\n  <clipPath id=\"pf1fb1c960f\">\n   <rect height=\"217.44\" width=\"217.44\" x=\"50.14375\" y=\"10.999219\"/>\n  </clipPath>\n </defs>\n</svg>\n",
+      "text/plain": "<Figure size 432x288 with 1 Axes>"
      },
      "metadata": {
       "needs_background": "light"
@@ -714,7 +460,7 @@
     "y_pred = postproc.predict_proba(X_test)[:, 1]\n",
     "y_lr = postproc.estimator_.predict_proba(X_test)[:, 1]\n",
     "br = postproc.postprocessor_.base_rates_\n",
-    "i = X_test.sex == 1\n",
+    "i = X_test.index.get_level_values('sex') == 1\n",
     "\n",
     "plt.plot([0, br[0]], [0, 1-br[0]], '-b', label='All calibrated classifiers (Females)')\n",
     "plt.plot([0, br[1]], [0, 1-br[1]], '-r', label='All calibrated classifiers (Males)')\n",
@@ -736,8 +482,8 @@
     "plt.plot([0, 1], [generalized_fnr(y_test, y_pred)]*2, '--', c='0.5')\n",
     "\n",
     "plt.axis('square')\n",
-    "plt.xlim([0, 0.4])\n",
-    "plt.ylim([0.4, 0.8])\n",
+    "plt.xlim([0.0, 0.4])\n",
+    "plt.ylim([0.3, 0.7])\n",
     "plt.xlabel('generalized fpr');\n",
     "plt.ylabel('generalized fnr');\n",
     "plt.legend(bbox_to_anchor=(1.04,1), loc='upper left');"
@@ -747,15 +493,28 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We can see the generalized false negative rate is approximately equalized and the classifiers remain close to the calibration lines."
+    "We can see the generalized false negative rate is approximately equalized and the classifiers remain close to the calibration lines.\n",
+    "\n",
+    "We can quanitify the discrepancy between protected groups using the `difference` operator:"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 19,
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "data": {
+      "text/plain": "0.0027891187222710556"
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "difference(generalized_fnr, y_test, y_pred, prot_attr='sex')"
+   ]
   }
  ],
  "metadata": {
@@ -774,9 +533,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.5.6"
+   "version": "3.6.9-final"
   }
  },
  "nbformat": 4,
  "nbformat_minor": 2
-}
+}
\ No newline at end of file

From 789e96b099b28e98109dd7a436dce8610e6a1229 Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Wed, 19 Feb 2020 17:10:23 -0500
Subject: [PATCH 61/61] added comments to tests

---
 tests/sklearn/test_adversarial_debiasing.py     |  8 ++++++++
 tests/sklearn/test_calibrated_equalized_odds.py | 12 +++++++++++-
 tests/sklearn/test_datasets.py                  | 10 ++++++++++
 tests/sklearn/test_metrics.py                   | 14 ++++++++++++++
 tests/sklearn/test_reweighing.py                | 17 ++++++-----------
 5 files changed, 49 insertions(+), 12 deletions(-)

diff --git a/tests/sklearn/test_adversarial_debiasing.py b/tests/sklearn/test_adversarial_debiasing.py
index c28fb17c..f7dd19d4 100644
--- a/tests/sklearn/test_adversarial_debiasing.py
+++ b/tests/sklearn/test_adversarial_debiasing.py
@@ -15,6 +15,7 @@
                           'hours-per-week'], features_to_drop=[])
 
 def test_adv_debias_old_reproduce():
+    """Test that the old AdversarialDebiasing is reproducible."""
     sess = tf.Session()
     old_adv_deb = OldAdversarialDebiasing(unprivileged_groups=[{'sex': 0}],
                                           privileged_groups=[{'sex': 1}],
@@ -34,6 +35,8 @@ def test_adv_debias_old_reproduce():
     assert np.allclose(old_preds.labels, old_preds2.labels)
 
 def test_adv_debias_old():
+    """Test that the predictions of the old and new AdversarialDebiasing match.
+    """
     tf.reset_default_graph()
     sess = tf.Session()
     old_adv_deb = OldAdversarialDebiasing(unprivileged_groups=[{'sex': 0}],
@@ -48,6 +51,7 @@ def test_adv_debias_old():
     assert np.allclose(old_preds.labels.flatten(), new_preds)
 
 def test_adv_debias_reproduce():
+    """Test that the new AdversarialDebiasing is reproducible."""
     adv_deb = AdversarialDebiasing('sex', num_epochs=5, random_state=123)
     new_preds = adv_deb.fit(X, y).predict(X)
     adv_deb.sess_.close()
@@ -60,12 +64,16 @@ def test_adv_debias_reproduce():
     assert new_acc == accuracy_score(y, new_preds)
 
 def test_adv_debias_intersection():
+    """Test that the new AdversarialDebiasing runs with >2 protected groups."""
     adv_deb = AdversarialDebiasing(scope_name='intersect', num_epochs=5)
     adv_deb.fit(X, y)
     adv_deb.sess_.close()
     assert adv_deb.adversary_logits_.shape[1] == 4
 
 def test_adv_debias_grid():
+    """Test that the new AdversarialDebiasing works in a grid search (and that
+    debiasing results in reduced accuracy).
+    """
     adv_deb = AdversarialDebiasing('sex', num_epochs=10, random_state=123)
 
     params = {'debias': [True, False]}
diff --git a/tests/sklearn/test_calibrated_equalized_odds.py b/tests/sklearn/test_calibrated_equalized_odds.py
index 3352b548..3bfffaf5 100644
--- a/tests/sklearn/test_calibrated_equalized_odds.py
+++ b/tests/sklearn/test_calibrated_equalized_odds.py
@@ -14,6 +14,9 @@
                           'hours-per-week'], features_to_drop=[])
 
 def test_calib_eq_odds_sex_weighted():
+    """Test that the old and new CalibratedEqualizedOdds produce the same mix
+    rates.
+    """
     logreg = LogisticRegression(solver='lbfgs', max_iter=500)
     y_pred = logreg.fit(X, y, sample_weight=sample_weight).predict_proba(X)
     adult_pred = adult.copy()
@@ -28,6 +31,12 @@ def test_calib_eq_odds_sex_weighted():
     assert np.isclose(orig_cal_eq_odds.unpriv_mix_rate, cal_eq_odds.mix_rates_[0])
 
 def test_postprocessingmeta_fnr():
+    """Test that the old and new CalibratedEqualizedOdds produce the same
+    probability predictions.
+
+    This tests the whole "pipeline": splitting the data the same way, training a
+    LogisticRegression classifier, and training the post-processor the same way.
+    """
     adult_train, adult_test = adult.split([0.9], shuffle=False)
     X_tr, X_te, y_tr, _, sw_tr, _ = train_test_split(X, y, sample_weight,
                 train_size=0.9, shuffle=False)
@@ -52,7 +61,8 @@ def test_postprocessingmeta_fnr():
     orig_cal_eq_odds.fit(adult_post, adult_pred)
 
     cal_eq_odds = PostProcessingMeta(estimator=logreg,
-            postprocessor=CalibratedEqualizedOdds('sex', cost_constraint='fnr', random_state=0),
+            postprocessor=CalibratedEqualizedOdds('sex', cost_constraint='fnr',
+                                                  random_state=0),
             shuffle=False)
     cal_eq_odds.fit(X_tr, y_tr, sample_weight=sw_tr)
 
diff --git a/tests/sklearn/test_datasets.py b/tests/sklearn/test_datasets.py
index 1d2ec6a0..2b0fb2c5 100644
--- a/tests/sklearn/test_datasets.py
+++ b/tests/sklearn/test_datasets.py
@@ -15,6 +15,7 @@
                 dropna=False)
 
 def test_standardize_dataset_basic():
+    """Tests standardize_dataset on a toy example."""
     dataset = basic()
     X, y = dataset
     X, y = dataset.X, dataset.y
@@ -28,11 +29,13 @@ def test_standardize_dataset_basic():
     assert X.shape == (3, 3)
 
 def test_sample_weight_basic():
+    """Tests returning sample_weight on a toy example."""
     with_weights = basic(sample_weight='X2')
     assert len(with_weights) == 3
     assert with_weights.X.shape == (3, 2)
 
 def test_usecols_dropcols_basic():
+    """Tests various combinations of usecols and dropcols on a toy example."""
     assert basic(usecols='X1').X.columns.tolist() == ['X1']
     assert basic(usecols=['X1', 'Z']).X.columns.tolist() == ['X1', 'Z']
 
@@ -44,17 +47,20 @@ def test_usecols_dropcols_basic():
                       pd.DataFrame)
 
 def test_dropna_basic():
+    """Tests dropna on a toy example."""
     basic_dropna = partial(standardize_dataset, df=df, prot_attr='Z',
                            target='y', dropna=True)
     assert basic_dropna().X.shape == (2, 3)
     assert basic(dropcols='X1').X.shape == (3, 2)
 
 def test_numeric_only_basic():
+    """Tests numeric_only on a toy example."""
     assert basic(prot_attr='X2', numeric_only=True).X.shape == (3, 2)
     assert (basic(prot_attr='X2', dropcols='Z', numeric_only=True).X.shape
             == (3, 2))
 
 def test_fetch_adult():
+    """Tests Adult Income dataset shapes with various options."""
     adult = fetch_adult()
     assert len(adult) == 3
     assert adult.X.shape == (45222, 13)
@@ -62,12 +68,14 @@ def test_fetch_adult():
     assert fetch_adult(numeric_only=True).X.shape == (48842, 7)
 
 def test_fetch_german():
+    """Tests German Credit dataset shapes with various options."""
     german = fetch_german()
     assert len(german) == 2
     assert german.X.shape == (1000, 21)
     assert fetch_german(numeric_only=True).X.shape == (1000, 9)
 
 def test_fetch_bank():
+    """Tests Bank Marketing dataset shapes with various options."""
     bank = fetch_bank()
     assert len(bank) == 2
     assert bank.X.shape == (45211, 15)
@@ -76,6 +84,7 @@ def test_fetch_bank():
 
 @pytest.mark.filterwarnings('error', category=ColumnAlreadyDroppedWarning)
 def test_fetch_compas():
+    """Tests COMPAS Recidivism dataset shapes with various options."""
     compas = fetch_compas()
     assert len(compas) == 2
     assert compas.X.shape == (6167, 10)
@@ -84,5 +93,6 @@ def test_fetch_compas():
         assert fetch_compas(numeric_only=True).X.shape == (6172, 6)
 
 def test_onehot_transformer():
+    """Tests that categorical features can be correctly one-hot encoded."""
     X, y = fetch_german()
     assert len(pd.get_dummies(X).columns) == 63
diff --git a/tests/sklearn/test_metrics.py b/tests/sklearn/test_metrics.py
index 326c7c8b..916d2ce5 100644
--- a/tests/sklearn/test_metrics.py
+++ b/tests/sklearn/test_metrics.py
@@ -29,61 +29,75 @@
                           privileged_groups=[{'sex': 1}])
 
 def test_dataset_equality():
+    """Tests that the old and new datasets match exactly."""
     assert (adult.features == X.values).all()
     assert (adult.labels.ravel() == y).all()
 
 def test_consistency():
+    """Tests that the old and new consistency_score matches exactly."""
     assert np.isclose(consistency_score(X, y), cm.consistency())
 
 def test_specificity():
+    """Tests that the old and new specificity_score matches exactly."""
     spec = specificity_score(y, y_pred, sample_weight=sample_weight)
     assert spec == cm.specificity()
 
 def test_base_rate():
+    """Tests that the old and new base_rate matches exactly."""
     base = base_rate(y, y_pred, sample_weight=sample_weight)
     assert base == cm.base_rate()
 
 def test_selection_rate():
+    """Tests that the old and new selection_rate matches exactly."""
     select = selection_rate(y, y_pred, sample_weight=sample_weight)
     assert select == cm.selection_rate()
 
 def test_generalized_fpr():
+    """Tests that the old and new generalized_fpr matches exactly."""
     gfpr = generalized_fpr(y, y_proba, sample_weight=sample_weight)
     assert np.isclose(gfpr, cm.generalized_false_positive_rate())
 
 def test_generalized_fnr():
+    """Tests that the old and new generalized_fnr matches exactly."""
     gfnr = generalized_fnr(y, y_proba, sample_weight=sample_weight)
     assert np.isclose(gfnr, cm.generalized_false_negative_rate())
 
 def test_disparate_impact():
+    """Tests that the old and new disparate_impact matches exactly."""
     di = disparate_impact_ratio(y, y_pred, prot_attr='sex',
                                 sample_weight=sample_weight)
     assert di == cm.disparate_impact()
 
 def test_statistical_parity():
+    """Tests that the old and new statistical_parity matches exactly."""
     stat = statistical_parity_difference(y, y_pred, prot_attr='sex',
                                          sample_weight=sample_weight)
     assert stat == cm.statistical_parity_difference()
 
 def test_equal_opportunity():
+    """Tests that the old and new equal_opportunity matches exactly."""
     eopp = equal_opportunity_difference(y, y_pred, prot_attr='sex',
                                         sample_weight=sample_weight)
     assert eopp == cm.equal_opportunity_difference()
 
 def test_average_odds_difference():
+    """Tests that the old and new average_odds_difference matches exactly."""
     aod = average_odds_difference(y, y_pred, prot_attr='sex',
                                   sample_weight=sample_weight)
     assert np.isclose(aod, cm.average_odds_difference())
 
 def test_average_odds_error():
+    """Tests that the old and new average_odds_error matches exactly."""
     aoe = average_odds_error(y, y_pred, prot_attr='sex',
                              sample_weight=sample_weight)
     assert np.isclose(aoe, cm.average_abs_odds_difference())
 
 def test_generalized_entropy_index():
+    """Tests that the old and new generalized_entropy_index matches exactly."""
     gei = generalized_entropy_error(y, y_pred)
     assert np.isclose(gei, cm.generalized_entropy_index())
 
 def test_between_group_generalized_entropy_index():
+    """Tests that the old and new between_group_GEI matches exactly."""
     bggei = between_group_generalized_entropy_error(y, y_pred, prot_attr='sex')
     assert bggei == cm.between_group_generalized_entropy_index()
diff --git a/tests/sklearn/test_reweighing.py b/tests/sklearn/test_reweighing.py
index 97631043..f8046fe9 100644
--- a/tests/sklearn/test_reweighing.py
+++ b/tests/sklearn/test_reweighing.py
@@ -9,36 +9,32 @@
 from aif360.sklearn.preprocessing import Reweighing, ReweighingMeta
 
 
-# X, y = fetch_german(numeric_only=True, dropcols='duration')
-# X.age = (X.age >= 25).astype('int')
-# german = GermanDataset(categorical_features=[], features_to_keep=[
-#         'credit_amount', 'investment_as_income_percentage', 'residence_since',
-#         'age', 'number_of_credits', 'people_liable_for', 'sex'])
 X, y, sample_weight = fetch_adult(numeric_only=True)
 adult = AdultDataset(instance_weights_name='fnlwgt', categorical_features=[],
         features_to_keep=['age', 'education-num', 'capital-gain', 'capital-loss',
                           'hours-per-week'], features_to_drop=[])
 
 def test_reweighing_sex():
+    """Test that the old and new Reweighing produce the same sample_weights."""
     orig_rew = OrigReweighing(unprivileged_groups=[{'sex': 0}],
                               privileged_groups=[{'sex': 1}])
     adult_fair = orig_rew.fit_transform(adult)
     rew = Reweighing('sex')
     _, new_sample_weight = rew.fit_transform(X, y, sample_weight=sample_weight)
 
-    # assert np.allclose([[orig_rew.w_up_unfav, orig_rew.w_up_fav],
-    #                     [orig_rew.w_p_unfav, orig_rew.w_p_fav]],
-    #                    rew.reweigh_factors_)
+    assert np.allclose([[orig_rew.w_up_unfav, orig_rew.w_up_fav],
+                        [orig_rew.w_p_unfav, orig_rew.w_p_fav]],
+                       rew.reweigh_factors_)
     assert np.allclose(adult_fair.instance_weights, new_sample_weight)
 
 def test_reweighing_intersection():
+    """Test that the new Reweighing runs with >2 protected groups."""
     rew = Reweighing()
     rew.fit_transform(X, y)
     assert rew.reweigh_factors_.shape == (4, 2)
 
 def test_gridsearch():
-    # logreg = LogisticRegression(solver='lbfgs', max_iter=500)
-    # rew = ReweighingMeta(estimator=logreg, reweigher=Reweighing('sex'))
+    """Test that ReweighingMeta works in a grid search."""
     rew = ReweighingMeta(estimator=LogisticRegression(solver='liblinear'))
 
     # UGLY workaround for sklearn issue: https://stackoverflow.com/a/49598597
@@ -51,4 +47,3 @@ def score_func(y_true, y_pred, sample_weight):
 
     clf = GridSearchCV(rew, params, scoring=scoring, cv=5, iid=False)
     clf.fit(X, y, **{'sample_weight': sample_weight})
-    # print(clf.best_score_)