[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Trusted-AI/AIF360/blob/master/examples/demo_json_explainers.ipynb)


In [1]:
!pip install aif360

Collecting aif360
  Downloading aif360-0.5.0-py3-none-any.whl (214 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m214.1/214.1 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: aif360
Successfully installed aif360-0.5.0


In [2]:
# Load all necessary packages
import sys
sys.path.append("../")
from collections import OrderedDict
import json
from pprint import pprint
from aif360.datasets import GermanDataset
from aif360.metrics import BinaryLabelDatasetMetric
from aif360.explainers import MetricTextExplainer, MetricJSONExplainer
from IPython.display import JSON, display_json

pip install 'aif360[LawSchoolGPA]'
pip install 'aif360[Reductions]'
pip install 'aif360[Reductions]'
pip install 'aif360[Reductions]'


##### Load dataset

In [7]:
import os
import urllib
import aif360

# Obtain the location where it is installed
LIB_PATH = aif360.__file__.rsplit("aif360", 1)[0]

# check if the data got download properly
def check_data_or_download(destn, files, data_source_directory):
    check = all(item in os.listdir(destn) for item in files)
    if check:
        print("Adult dataset is available for us")
    else:
        print("Some files are missing. Downloading now.")
        for data_file in files:
            _ = urllib.request.urlretrieve(data_source_directory + data_file,
                                           os.path.join(destn, data_file))

# Download adult dataset
data_source_directory = "https://archive.ics.uci.edu/ml/machine-learning-databases/adult/"
destn = os.path.join(LIB_PATH, "aif360", "data", "raw", "adult")
files = ["adult.data", "adult.test", "adult.names"]

check_data_or_download(destn, files, data_source_directory)


# Download german dataset
data_source_directory = "https://archive.ics.uci.edu/ml/machine-learning-databases/statlog/german/"
destn = os.path.join(LIB_PATH, "aif360", "data", "raw", "german")
files = ["german.data", "german.doc"]

check_data_or_download(destn, files, data_source_directory)

Adult dataset is available for us
Some files are missing. Downloading now.


In [8]:
gd = GermanDataset()

##### Create metrics

In [9]:
priv = [{'sex': 1}]
unpriv = [{'sex': 0}]
bldm = BinaryLabelDatasetMetric(gd, unprivileged_groups=unpriv, privileged_groups=priv)

##### Create explainers

In [10]:
text_expl = MetricTextExplainer(bldm)
json_expl = MetricJSONExplainer(bldm)

##### Text explanations

In [11]:
print(text_expl.num_positives())

Number of positive-outcome instances: 700.0


In [12]:
print(text_expl.mean_difference())

Mean difference (mean label value on unprivileged instances - mean label value on privileged instances): -0.07480130902290782


In [13]:
print(text_expl.disparate_impact())

Disparate impact (probability of favorable outcome for unprivileged instances / probability of favorable outcome for privileged instances): 0.8965673282047968


##### JSON Explanations

In [14]:
def format_json(json_str):
    return json.dumps(json.loads(json_str, object_pairs_hook=OrderedDict), indent=2)

In [15]:
print(format_json(json_expl.num_positives()))

{
  "metric": "Number Of Positives",
  "message": "Number of positive-outcome instances: 700.0",
  "numPositives": 700.0,
  "description": "Computed as the number of positive instances for the given (privileged or unprivileged) group.",
  "ideal": "The ideal value of this metric lies in the total number of positive instances made available"
}


In [16]:
print(format_json(json_expl.mean_difference()))

{
  "metric": "Mean Difference",
  "message": "Mean difference (mean label value on unprivileged instances - mean label value on privileged instances): -0.07480130902290782",
  "numPositivesUnprivileged": 201.0,
  "numInstancesUnprivileged": 310.0,
  "numPositivesPrivileged": 499.0,
  "numInstancesPrivileged": 690.0,
  "description": "Computed as the difference of the rate of favorable outcomes received by the unprivileged group to the privileged group.",
  "ideal": "The ideal value of this metric is 0.0"
}


In [17]:
print(format_json(json_expl.disparate_impact()))

{
  "metric": "Disparate Impact",
  "message": "Disparate impact (probability of favorable outcome for unprivileged instances / probability of favorable outcome for privileged instances): 0.8965673282047968",
  "numPositivePredictionsUnprivileged": 201.0,
  "numUnprivileged": 310.0,
  "numPositivePredictionsPrivileged": 499.0,
  "numPrivileged": 690.0,
  "description": "Computed as the ratio of rate of favorable outcome for the unprivileged group to that of the privileged group.",
  "ideal": "The ideal value of this metric is 1.0 A value < 1 implies higher benefit for the privileged group and a value >1 implies a higher benefit for the unprivileged group."
}
