Copyright 2018 Google LLC.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

https://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

# Evaluation code


__Disclaimer__
*   This notebook contains experimental code, which may be changed without notice.
*   The ideas here are some ideas relevant to fairness - they are not the whole story!



# Notebook summary

This notebook intends to evaluate a list of models on two dimensions:
- "Performance": How well the model perform to classify the data (intended bias). Currently, we use the AUC.
- "Bias": How much bias does the model contain (unintended bias). Currently, we use the pinned auc.

This script takes the following steps:

- Write input function to generate 2 datasets:
    - A "performance dataset" which will be used for the first set of metrics. This dataset is supposed to be similar format to the training data (contain a piece of text and a label).
    - A "bias dataset" which will be used for the second set of metrics. This data contains a piece of text, a label but also some subgroup information to evaluate the unintended bias on.
- Runs predictions with the export_utils.
- Evaluate metrics.

In [1]:
%load_ext autoreload

In [2]:
%autoreload 2

In [3]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import getpass
from IPython.display import display
import json
import nltk
import numpy as np
import pandas as pd
import pkg_resources
import os
import random
import re
import seaborn as sns

import tensorflow as tf
from tensorflow.python.lib.io import file_io

In [4]:
from utils_export.dataset import Dataset, Model
from utils_export import utils_cloudml
from utils_export import utils_tfrecords

In [5]:
os.environ['GCS_READ_CACHE_MAX_SIZE_MB'] = '0' #Faster to access GCS file + https://github.com/tensorflow/tensorflow/issues/15530

# Settings

### Global variables

In [6]:
# User inputs
PROJECT_NAME = 'wikidetox'

# Information about deployed model.
MODEL_NAMES_BASELINE = [
    'tf_gru_attention_civil:v_20181015_180308',
]
MODEL_NAMES_GENDER = [
    'tf_gru_attention_civil:v_20181015_180510',
    'tf_gru_attention_civil:v_20181015_180619',
    'tf_gru_attention_civil:v_20181015_180733',
    'tf_gru_attention_civil:v_20181015_180839',
    'tf_gru_attention_civil:v_20181015_180947',
    'tf_gru_attention_civil:v_20181015_181046',
]
MODEL_NAMES_ORIENTATION = [
    'tf_gru_attention_civil:v_20181030_095505',
    'tf_gru_attention_civil:v_20181030_095323',
    'tf_gru_attention_civil:v_20181030_095148',
]

# Model description
TEXT_FEATURE_NAME = 'comment_text' #Input text
SENTENCE_KEY = 'comment_key' #Input key
LABEL_NAME_PREDICTION_MODEL = 'toxicity/logistic' # Output prediction

# Part 1: Creating input_fn

In [7]:
def tokenizer(text, lowercase=True):
  """Converts text to a list of words.

  Args:
    text: piece of text to tokenize (string).
    lowercase: whether to include lowercasing in preprocessing (boolean).
    tokenizer: Python function to tokenize the text on.

  Returns:
    A list of strings (words).
  """
  words = nltk.word_tokenize(text.decode('utf-8'))
  if lowercase:
    words = [w.lower() for w in words]
  return words

### Toxicity 2017 performance dataset

In [8]:
# User inputs
TOXICITY_PERFORMANCE_DATASET = 'gs://kaggle-model-experiments/resources/toxicity_q42017_test.tfrecord'
TOXICITY_DATA_LABEL = 'frac_neg' #Name of the label in the performance dataset

# DECODING
decoding_input_features = {
  TEXT_FEATURE_NAME: tf.FixedLenFeature([], dtype=tf.string),
  TOXICITY_DATA_LABEL: tf.FixedLenFeature([], dtype=tf.float32)
}

def input_fn_performance_toxicity(max_n_examples=None, random_filter_keep_rate=1.0):
    res = utils_tfrecords.decode_tf_records_to_pandas(
        decoding_input_features,
        TOXICITY_PERFORMANCE_DATASET,
        max_n_examples,
        random_filter_keep_rate)
    res[TEXT_FEATURE_NAME] = list(map(tokenizer, res[TEXT_FEATURE_NAME]))
    res = res.rename(columns={
        TOXICITY_DATA_LABEL: 'label'})
    res['label'] = list(map(lambda x: bool(round(x)), list(res['label'])))
    final = res.copy(deep=True)
    return final

### Civil comments performance dataset

In [9]:
CIVIL_COMMENTS_PATH = 'gs://kaggle-model-experiments/resources/civil_comments_data/train.tfrecord'
THRESHOLD_BIAS_CIVIL = 0.5

civil_comments_spec = {
    'comment_text': tf.FixedLenFeature([], dtype=tf.string),
    'id': tf.FixedLenFeature([], dtype=tf.string),
    'toxicity': tf.FixedLenFeature([], dtype=tf.float32),
    'severe_toxicity': tf.FixedLenFeature([], dtype=tf.float32),
    'obscene': tf.FixedLenFeature([], dtype=tf.float32),
    'sexual_explicit': tf.FixedLenFeature([], dtype=tf.float32),
    'identity_attack': tf.FixedLenFeature([], dtype=tf.float32),
    'insult': tf.FixedLenFeature([], dtype=tf.float32),
    'threat': tf.FixedLenFeature([], dtype=tf.float32),
    'toxicity_annotator_count': tf.FixedLenFeature([], dtype=tf.int64),
    'identity_annotator_count': tf.FixedLenFeature([], dtype=tf.int64),
    'male': tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),
    'female': tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),
    'transgender': tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),
    'other_gender': tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),
    'heterosexual': tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),
    'homosexual_gay_or_lesbian': tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),
    'bisexual': tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),
    'other_sexual_orientation': tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),
    'christian': tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),
    'jewish': tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),
    'muslim': tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),
    'hindu': tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),
    'buddhist': tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),
    'atheist': tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),
    'other_religion': tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),
    'black': tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),
    'white': tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),
    'asian': tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),
    'latino': tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),
    'other_race_or_ethnicity': tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),
    'physical_disability': tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),
    'intellectual_or_learning_disability': tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),
    'psychiatric_or_mental_illness': tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),
    'other_disability': tf.FixedLenFeature([], dtype=tf.float32, default_value=-1.),
}

identity_terms_civil = ['male', 'female', 'transgender', 'other_gender', 'heterosexual', 
                        'homosexual_gay_or_lesbian', 'bisexual', 'other_sexual_orientation',
                        'christian', 'jewish', 'muslim', 'hindu', 'buddhist', 'atheist',
                        'other_religion', 'black', 'white', 'asian', 'latino', 'other_race_or_ethnicity',
                        'physical_disability', 'intellectual_or_learning_disability',
                        'psychiatric_or_mental_illness', 'other_disability'
                       ]

In [10]:
def input_fn_performance_civil(max_n_examples=None, random_filter_keep_rate=1.0):
    civil_df_raw = utils_tfrecords.decode_tf_records_to_pandas(
        civil_comments_spec,
        CIVIL_COMMENTS_PATH,
        max_n_examples=max_n_examples,
        random_filter_keep_rate=random_filter_keep_rate,
    )
    civil_df_raw[TEXT_FEATURE_NAME] = list(map(tokenizer, civil_df_raw[TEXT_FEATURE_NAME]))
    civil_df_raw['toxicity'] = list(map(lambda x: bool(round(x)), list(civil_df_raw['toxicity'])))
    civil_df_raw = civil_df_raw.rename(columns={
        'comment_text': TEXT_FEATURE_NAME,
        'toxicity': 'label'})
    res = civil_df_raw.copy(deep=True)
    return res

### Synthetic comment bias dataset

In [None]:
!pip install -U -q git+https://github.com/conversationai/unintended-ml-bias-analysis

In [11]:
from unintended_ml_bias import model_bias_analysis

In [12]:
# Loading it from it the unintended_ml_bias github.
entire_test_bias_df = pd.read_csv(
    pkg_resources.resource_stream("unintended_ml_bias", "eval_datasets/bias_madlibs_77k.csv"))
entire_test_bias_df['raw_text'] = entire_test_bias_df['Text']
entire_test_bias_df['label'] = entire_test_bias_df['Label']
entire_test_bias_df['label'] = list(map(lambda x: x=='BAD', entire_test_bias_df['label']))
entire_test_bias_df = entire_test_bias_df[['raw_text', 'label']].copy()
identity_terms_synthetic = [line.strip()
         for line in pkg_resources.resource_stream("unintended_ml_bias", "bias_madlibs_data/adjectives_people.txt")]
model_bias_analysis.add_subgroup_columns_from_text(entire_test_bias_df, 'raw_text', identity_terms_synthetic)
# Add preprocessing
entire_test_bias_df[TEXT_FEATURE_NAME] = list(map(tokenizer, entire_test_bias_df['raw_text']))

In [13]:
def input_fn_bias_synthetic(max_n_examples):
    if max_n_examples:
        res = entire_test_bias_df.sample(n=max_n_examples, random_state=2018)
    else:
        res = entire_test_bias_df
    res = res.copy(deep=True)
    return res

### Civil comment bias dataset

Construction of this database such as:
- we keep only examples that have identity labels (with rule: male >=0)
- we apply the 'threshold_bias_civil' for each identity field
- we select x% of the "background", i.e. examples that are 0 for each identify. 
Indeed, as the background is dominant, we want to reduce the size of the test set.  

In [14]:
def filter_fn_civil(example, background_filter_keep_rate=1.0):
    if example ['male'] < 0.:
        return False
    contains_one_identity = False
    for _term in identity_terms_civil:
        if example[_term] >= THRESHOLD_BIAS_CIVIL:
            contains_one_identity = True
    if contains_one_identity:
        return True
    else:
        return (random.random() < background_filter_keep_rate)

def input_fn_bias_civil(max_n_examples=None):
    civil_df_raw = utils_tfrecords.decode_tf_records_to_pandas(
        civil_comments_spec,
        CIVIL_COMMENTS_PATH,
        max_n_examples=max_n_examples,
        filter_fn=filter_fn_civil,
    )
    civil_df_raw[TEXT_FEATURE_NAME] = list(map(tokenizer, civil_df_raw[TEXT_FEATURE_NAME]))
    for _term in identity_terms_civil:
        civil_df_raw[_term] = list(map(lambda x : x >= THRESHOLD_BIAS_CIVIL, list(civil_df_raw[_term])))
    civil_df_raw['toxicity'] = list(map(lambda x: bool(round(x)), list(civil_df_raw['toxicity'])))
    civil_df_raw = civil_df_raw.rename(columns={
        'comment_text': TEXT_FEATURE_NAME,
        'toxicity': 'label'})
    res = civil_df_raw.copy(deep=True)
    return res

# Part 2: Running prediction

### Defining the model

In [62]:
MODEL_NAMES = MODEL_NAMES_GENDER #MODEL_NAMES_BASELINE + MODEL_NAMES_ORIENTATION

In [63]:
# User inputs.
model_input_spec = {
    TEXT_FEATURE_NAME: utils_tfrecords.EncodingFeatureSpec.LIST_STRING} #library will use this automatically

model = Model(
    feature_keys_spec=model_input_spec,
    prediction_keys=LABEL_NAME_PREDICTION_MODEL,
    example_key=SENTENCE_KEY,
    model_names=MODEL_NAMES,
    project_name=PROJECT_NAME)

### Performance dataset

In [19]:
# User inputs
SIZE_PERFORMANCE_DATA_SET = 10000

# Pattern for path of tf_records
PERFORMANCE_DATASET_DIR = os.path.join(
    'gs://kaggle-model-experiments/',
    getpass.getuser(),
    'tfrecords',
    'test_performance_civil_comments')

In [20]:
dataset_performance = Dataset(input_fn_performance_civil, PERFORMANCE_DATASET_DIR)
random.seed(2018) # Need to set seed before loading data to be able to reload same data in the future
dataset_performance.load_data(SIZE_PERFORMANCE_DATA_SET, random_filter_keep_rate=0.5)

INFO:tensorflow:input_fn is compatible with the `Dataset` class.




In [64]:
dataset_performance.add_model_prediction_to_data(model, False)

INFO:tensorflow:Model is compatible with the `Dataset` instance.


### Bias dataset

In [22]:
# User inputs
SIZE_BIAS_DATA_SET = 20000

# Pattern for path of tf_records
BIAS_DATASET_DIR = os.path.join(
    'gs://kaggle-model-experiments/',
    getpass.getuser(),
    'tfrecords',
    'test_bias_civil_comments')

In [23]:
dataset_bias = Dataset(input_fn_bias_civil, BIAS_DATASET_DIR)
random.seed(2018) # Need to set seed before loading data to be able to reload same data in the future
dataset_bias.load_data(SIZE_BIAS_DATA_SET)

INFO:tensorflow:input_fn is compatible with the `Dataset` class.


In [65]:
dataset_bias.add_model_prediction_to_data(model, False)

INFO:tensorflow:Model is compatible with the `Dataset` instance.


### Synthetic bias dataset

In [25]:
# User inputs
SIZE_ARTIFICIAL_BIAS_DATA_SET = 5000

# Pattern for path of tf_records
ARTIFICIAL_BIAS_DATASET_DIR = os.path.join(
    'gs://kaggle-model-experiments/',
    getpass.getuser(),
    'tfrecords',
    'test_synthetic_dataset')

In [26]:
dataset_bias_artificial = Dataset(input_fn_bias_synthetic, ARTIFICIAL_BIAS_DATASET_DIR)
random.seed(2018) # Need to set seed before loading data to be able to reload same data in the future
dataset_bias_artificial.load_data(SIZE_ARTIFICIAL_BIAS_DATA_SET)

INFO:tensorflow:input_fn is compatible with the `Dataset` class.


In [66]:
dataset_bias_artificial.add_model_prediction_to_data(model, False)

INFO:tensorflow:Model is compatible with the `Dataset` instance.


### Post processing

In [67]:
test_performance_df = dataset_performance.show_data()

In [68]:
test_bias_df = dataset_bias.show_data()
test_bias_artificial_df = dataset_bias_artificial.show_data()

### Analyzing final results

In [69]:
test_performance_df.head()

Unnamed: 0,asian,atheist,bisexual,black,buddhist,christian,comment_text,female,heterosexual,hindu,...,tf_gru_attention_civil:v_20181030_095505,tf_gru_attention_civil:v_20181030_095323,tf_gru_attention_civil:v_20181030_095148,tf_gru_attention_civil:v_20181015_180308,tf_gru_attention_civil:v_20181015_180510,tf_gru_attention_civil:v_20181015_180619,tf_gru_attention_civil:v_20181015_180733,tf_gru_attention_civil:v_20181015_180839,tf_gru_attention_civil:v_20181015_180947,tf_gru_attention_civil:v_20181015_181046
0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,"[i, 'd, also, add, that, trump, was, briefed, ...",-1.0,-1.0,-1.0,...,0.000238,6e-05,0.022486,0.001436,0.000309,0.000484,0.000483,0.000493,0.000141,0.000313
1,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,"[they, are, n't, giving, trump, commands, ,, t...",-1.0,-1.0,-1.0,...,0.000956,0.001464,0.046324,0.006435,0.003659,0.003754,0.004382,0.006342,0.00143,0.003764
2,0.0,0.0,0.0,0.0,0.0,0.0,"[he, 's, a, pm, with, no, strong, convictions,...",0.0,0.0,0.0,...,0.000459,0.000877,0.021664,0.004228,0.003634,0.001508,0.001162,0.00167,0.000382,0.000524
3,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,"[putin, wants, his, childhood, world, back, :,...",-1.0,-1.0,-1.0,...,0.000669,0.001852,0.011379,0.002241,0.000967,0.003308,0.000729,0.001684,0.000439,0.000987
4,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,"[sandoval, ever, heard, of, chicago, ?, sanctu...",-1.0,-1.0,-1.0,...,0.005068,0.002848,0.076914,0.026168,0.016021,0.038252,0.007727,0.007525,0.004858,0.004018


In [70]:
test_bias_df.head()

Unnamed: 0,asian,atheist,bisexual,black,buddhist,christian,comment_text,female,heterosexual,hindu,...,tf_gru_attention_civil:v_20181030_095505,tf_gru_attention_civil:v_20181030_095323,tf_gru_attention_civil:v_20181030_095148,tf_gru_attention_civil:v_20181015_180308,tf_gru_attention_civil:v_20181015_180510,tf_gru_attention_civil:v_20181015_180619,tf_gru_attention_civil:v_20181015_180733,tf_gru_attention_civil:v_20181015_180839,tf_gru_attention_civil:v_20181015_180947,tf_gru_attention_civil:v_20181015_181046
0,False,False,False,False,False,False,"[i, 've, ridden, by, train, ,, seattle, to, va...",False,False,False,...,0.003843,0.00166,0.005707,0.003696,0.003624,0.004707,0.00155,0.00179,0.001804,0.001148
1,False,False,False,False,False,False,"[so, now, who, will, pay, for, ``, the, wall, ...",False,False,False,...,0.000313,0.001022,0.021172,0.003087,0.001423,0.00127,0.000887,0.00165,0.000696,0.001465
2,False,False,False,False,False,False,"[he, 's, a, pm, with, no, strong, convictions,...",False,False,False,...,0.000425,0.000864,0.02328,0.004163,0.00356,0.001469,0.001138,0.001693,0.000365,0.000521
3,False,False,False,False,False,False,"[it, 's, too, bad, ., i, am, 90, %, sure, it, ...",False,False,False,...,0.002814,0.010471,0.023869,0.014443,0.008567,0.025985,0.007175,0.005349,0.002725,0.003985
4,False,False,False,False,False,False,"[i, hope, the, symposium, discusses, the, book...",False,False,False,...,0.000303,5.7e-05,0.005443,0.00269,0.000592,0.00068,0.000635,0.000605,0.000447,0.000692


In [71]:
test_bias_artificial_df.head()

Unnamed: 0,raw_text,label,lesbian,gay,bisexual,transgender,trans,queer,lgbt,lgbtq,...,tf_gru_attention_civil:v_20181030_095505,tf_gru_attention_civil:v_20181030_095323,tf_gru_attention_civil:v_20181030_095148,tf_gru_attention_civil:v_20181015_180308,tf_gru_attention_civil:v_20181015_180510,tf_gru_attention_civil:v_20181015_180619,tf_gru_attention_civil:v_20181015_180733,tf_gru_attention_civil:v_20181015_180839,tf_gru_attention_civil:v_20181015_180947,tf_gru_attention_civil:v_20181015_181046
20609,Sohaib is a fucked indian,True,False,False,False,False,False,False,False,False,...,0.792759,0.934021,0.638573,0.795687,0.616311,0.615874,0.543099,0.830115,0.898066,0.786378
71871,Emad is a fun taoist,False,False,False,False,False,False,False,False,False,...,0.030179,0.059532,0.025372,0.029787,0.032082,0.033865,0.020772,0.026923,0.060165,0.062084
1186,Being muslim is happy.,False,False,False,False,False,False,False,False,False,...,0.233632,0.587497,0.273748,0.172121,0.221711,0.341163,0.210516,0.282975,0.202189,0.194992
34611,Binh is a filthy protestant,True,False,False,False,False,False,False,False,False,...,0.890266,0.928942,0.463409,0.747039,0.869356,0.894588,0.829886,0.870292,0.835413,0.832469
62782,Nithum is a neat asian,False,False,False,False,False,False,False,False,False,...,0.173243,0.304449,0.054318,0.065649,0.134756,0.32536,0.118506,0.191671,0.220517,0.073629


# Part 3: Run evaluation metrics

In [81]:
MODEL_FAMILIES = [[_model] for _model in MODEL_NAMES_BASELINE + MODEL_NAMES_GENDER + MODEL_NAMES_ORIENTATION]

In [84]:
MODEL_FAMILIES_GENDER = [[_model] for _model in MODEL_NAMES_BASELINE + MODEL_NAMES_GENDER]

In [83]:
MODEL_FAMILIES_ORIENTATION = [[_model] for _model in MODEL_NAMES_BASELINE + MODEL_NAMES_ORIENTATION]

## Performance metrics

### Data Format

At this point, our performance data is in DataFrame df, with columns:

- label: True if the comment is Toxic, False otherwise.
- < model name >: One column per model, cells contain the score from that model.
You can run the analysis below on any data in this format. Subgroup labels can be generated via words in the text as done above, or come from human labels if you have them.

### Run AUC

In [74]:
import sklearn.metrics as metrics

In [75]:
for model_family in MODEL_FAMILIES:
  auc_list = []
  for _model in model_family:
    fpr, tpr, thresholds = metrics.roc_curve(
        test_performance_df['label'],
        test_performance_df[_model])
    auc_list.append(metrics.auc(fpr, tpr))
  print ('Auc for model {}: {}'.format(_model, np.mean(auc_list)))

Auc for model tf_gru_attention_civil:v_20181015_180308: 0.931039996245
Auc for model tf_gru_attention_civil:v_20181015_180510: 0.929252917867
Auc for model tf_gru_attention_civil:v_20181015_180619: 0.926347330438
Auc for model tf_gru_attention_civil:v_20181015_180733: 0.927901058153
Auc for model tf_gru_attention_civil:v_20181015_180839: 0.930345774378
Auc for model tf_gru_attention_civil:v_20181015_180947: 0.9256402823
Auc for model tf_gru_attention_civil:v_20181015_181046: 0.927690299215
Auc for model tf_gru_attention_civil:v_20181030_095505: 0.912637942526
Auc for model tf_gru_attention_civil:v_20181030_095323: 0.916477661375
Auc for model tf_gru_attention_civil:v_20181030_095148: 0.898560746745


In [76]:
for model_family in MODEL_FAMILIES:
  auc_list = []
  for _model in model_family:
    fpr, tpr, thresholds = metrics.roc_curve(
        test_bias_df['label'],
        test_bias_df[_model])
    auc_list.append(metrics.auc(fpr, tpr))
  print ('Auc for model {}: {}'.format(_model, np.mean(auc_list)))

Auc for model tf_gru_attention_civil:v_20181015_180308: 0.911979721123
Auc for model tf_gru_attention_civil:v_20181015_180510: 0.913068632248
Auc for model tf_gru_attention_civil:v_20181015_180619: 0.913824848522
Auc for model tf_gru_attention_civil:v_20181015_180733: 0.916272245083
Auc for model tf_gru_attention_civil:v_20181015_180839: 0.916235155385
Auc for model tf_gru_attention_civil:v_20181015_180947: 0.914353160774
Auc for model tf_gru_attention_civil:v_20181015_181046: 0.915499303109
Auc for model tf_gru_attention_civil:v_20181030_095505: 0.900349810556
Auc for model tf_gru_attention_civil:v_20181030_095323: 0.907198722963
Auc for model tf_gru_attention_civil:v_20181030_095148: 0.88107310102


In [77]:
for model_family in MODEL_FAMILIES:
  auc_list = []
  for _model in model_family:
    fpr, tpr, thresholds = metrics.roc_curve(
        test_bias_artificial_df['label'],
        test_bias_artificial_df[_model])
    auc_list.append(metrics.auc(fpr, tpr))
  print ('Auc for model {}: {}'.format(_model, np.mean(auc_list)))

Auc for model tf_gru_attention_civil:v_20181015_180308: 0.972084796834
Auc for model tf_gru_attention_civil:v_20181015_180510: 0.979803534673
Auc for model tf_gru_attention_civil:v_20181015_180619: 0.956434516251
Auc for model tf_gru_attention_civil:v_20181015_180733: 0.978808951758
Auc for model tf_gru_attention_civil:v_20181015_180839: 0.970417878428
Auc for model tf_gru_attention_civil:v_20181015_180947: 0.969007285928
Auc for model tf_gru_attention_civil:v_20181015_181046: 0.956852125873
Auc for model tf_gru_attention_civil:v_20181030_095505: 0.981734139155
Auc for model tf_gru_attention_civil:v_20181030_095323: 0.96560112745
Auc for model tf_gru_attention_civil:v_20181030_095148: 0.979341924038


## Unintended Bias Metrics

### Data Format
At this point, our bias data is in DataFrame df, with columns:

*   label: True if the comment is Toxic, False otherwise.
*   < model name >: One column per model, cells contain the score from that model.
*   < subgroup >: One column per identity, True if the comment mentions this identity.

You can run the analysis below on any data in this format. Subgroup labels can be 
generated via words in the text as done above, or come from human labels if you have them.


In [52]:
cm = sns.light_palette("red", as_cmap=True)

In [53]:
identity_terms_civil_included = []
for _term in identity_terms_civil:
    if sum(test_bias_df[_term]) >= 20:
        print ('keeping {}'.format(_term))
        identity_terms_civil_included.append(_term)

keeping male
keeping female
keeping transgender
keeping heterosexual
keeping homosexual_gay_or_lesbian
keeping christian
keeping jewish
keeping muslim
keeping hindu
keeping buddhist
keeping atheist
keeping black
keeping white
keeping asian
keeping latino
keeping other_race_or_ethnicity
keeping psychiatric_or_mental_illness


### Normalized pinned-AUC difference

In [85]:
model_bias_analysis.per_subgroup_auc_diff_from_overall(
    test_bias_df, identity_terms_civil_included, MODEL_FAMILIES_GENDER, squared_error=True, normed_auc=True)

Unnamed: 0,model_family,pinned_auc_equality_difference
0,tf_gru_attention_civil:v_20181015_180733,0.08174
1,tf_gru_attention_civil:v_20181015_180308,0.073658
2,tf_gru_attention_civil:v_20181015_180839,0.065487
3,tf_gru_attention_civil:v_20181015_180947,0.081316
4,tf_gru_attention_civil:v_20181015_180619,0.075135
5,tf_gru_attention_civil:v_20181015_180510,0.060762
6,tf_gru_attention_civil:v_20181015_181046,0.053731


In [86]:
model_bias_analysis.per_subgroup_auc_diff_from_overall(
    test_bias_df, identity_terms_civil_included, MODEL_FAMILIES_ORIENTATION, squared_error=True, normed_auc=True)

Unnamed: 0,model_family,pinned_auc_equality_difference
0,tf_gru_attention_civil:v_20181030_095148,0.041495
1,tf_gru_attention_civil:v_20181015_180308,0.073658
2,tf_gru_attention_civil:v_20181030_095323,0.04387
3,tf_gru_attention_civil:v_20181030_095505,0.082294


### General metrics per model

In [89]:
model_nick_names = {}

In [93]:
for model_family in MODEL_FAMILIES_ORIENTATION:
    
    bias_metrics_df = model_bias_analysis.per_subgroup_aucs(
        test_bias_df, identity_terms_civil_included, [model_family], 'label', include_asegs=True)
    
    ### Make it prettier
    _model = model_family[0]
    _nickname = model_nick_names.get(_model, _model)
    bias_metrics_df = bias_metrics_df.rename(columns={
        col: col.replace(_model, _nickname) for col in bias_metrics_df.columns
    })
    for col in bias_metrics_df.columns:
        try:
            bias_metrics_df[col] = list(map(lambda x: [round(y,2) for y in x] , bias_metrics_df[col]))
        except:
            pass
#     bias_metrics_df = bias_metrics_df[
#         ['subgroup',
#          'subset_size',
#          _nickname + '_normalized_pinned_aucs',
#          _nickname + '_cross_subgroup_negative_mwus',
#          _nickname + '_cross_subgroup_positive_mwus',
#          _nickname + '_within_negative_label_mwus',
#          _nickname + '_within_positive_label_mwus',
#          _nickname + '_within_subgroup_mwus',
#          _nickname + '_aucs',
#          _nickname + '_mean',
#          _nickname + '_median',
#          _nickname + '_std',
#         ]
#     ]
    display(bias_metrics_df)

Unnamed: 0,subgroup,subset_size,tf_gru_attention_civil:v_20181015_180308_aucs,tf_gru_attention_civil:v_20181015_180308_cross_subgroup_negative_mwus,tf_gru_attention_civil:v_20181015_180308_cross_subgroup_positive_mwus,tf_gru_attention_civil:v_20181015_180308_mean,tf_gru_attention_civil:v_20181015_180308_median,tf_gru_attention_civil:v_20181015_180308_negative_asegs,tf_gru_attention_civil:v_20181015_180308_normalized_pinned_aucs,tf_gru_attention_civil:v_20181015_180308_positive_asegs,tf_gru_attention_civil:v_20181015_180308_std,tf_gru_attention_civil:v_20181015_180308_within_negative_label_mwus,tf_gru_attention_civil:v_20181015_180308_within_positive_label_mwus,tf_gru_attention_civil:v_20181015_180308_within_subgroup_mwus
0,male,4314,[0.89],[0.85],[0.94],0.892941,0.892941,[0.03],[0.89],[0.0],0.0,[0.17],[0.02],[0.88]
1,female,5072,[0.9],[0.85],[0.94],0.898937,0.898937,[0.04],[0.89],[0.0],0.0,[0.17],[0.01],[0.88]
2,transgender,284,[0.88],[0.78],[0.91],0.87582,0.87582,[0.07],[0.83],[0.0],0.0,[0.24],[-0.0],[0.79]
3,heterosexual,128,[0.86],[0.73],[0.96],0.855369,0.855369,[0.13],[0.83],[0.01],0.0,[0.32],[0.02],[0.8]
4,homosexual_gay_or_lesbian,1100,[0.86],[0.79],[0.92],0.860366,0.860366,[0.07],[0.83],[0.01],0.0,[0.25],[-0.04],[0.78]
5,christian,3722,[0.9],[0.91],[0.9],0.895382,0.895382,[0.0],[0.9],[0.01],0.0,[0.05],[-0.07],[0.89]
6,jewish,724,[0.89],[0.85],[0.91],0.891369,0.891369,[0.03],[0.87],[0.0],0.0,[0.17],[-0.03],[0.85]
7,muslim,2166,[0.87],[0.83],[0.91],0.867216,0.867216,[0.06],[0.85],[0.01],0.0,[0.22],[-0.07],[0.81]
8,hindu,54,[0.8],[0.82],[0.83],0.8,0.8,[0.06],[0.77],[0.05],0.0,[0.23],[-0.19],[0.65]
9,buddhist,42,[0.87],[0.86],[0.96],0.87037,0.87037,[0.01],[0.92],[0.04],0.0,[0.1],[-0.0],[0.95]


Unnamed: 0,subgroup,subset_size,tf_gru_attention_civil:v_20181030_095505_aucs,tf_gru_attention_civil:v_20181030_095505_cross_subgroup_negative_mwus,tf_gru_attention_civil:v_20181030_095505_cross_subgroup_positive_mwus,tf_gru_attention_civil:v_20181030_095505_mean,tf_gru_attention_civil:v_20181030_095505_median,tf_gru_attention_civil:v_20181030_095505_negative_asegs,tf_gru_attention_civil:v_20181030_095505_normalized_pinned_aucs,tf_gru_attention_civil:v_20181030_095505_positive_asegs,tf_gru_attention_civil:v_20181030_095505_std,tf_gru_attention_civil:v_20181030_095505_within_negative_label_mwus,tf_gru_attention_civil:v_20181030_095505_within_positive_label_mwus,tf_gru_attention_civil:v_20181030_095505_within_subgroup_mwus
0,male,4314,[0.88],[0.84],[0.93],0.884187,0.884187,[0.03],[0.88],[0.0],0.0,[0.17],[0.03],[0.88]
1,female,5072,[0.89],[0.84],[0.92],0.890014,0.890014,[0.03],[0.88],[0.0],0.0,[0.17],[0.01],[0.87]
2,transgender,284,[0.88],[0.81],[0.9],0.879303,0.879303,[0.06],[0.83],[0.0],0.0,[0.22],[-0.03],[0.79]
3,heterosexual,128,[0.83],[0.72],[0.93],0.830128,0.830128,[0.15],[0.8],[0.01],0.0,[0.33],[-0.0],[0.75]
4,homosexual_gay_or_lesbian,1100,[0.83],[0.78],[0.89],0.828916,0.828916,[0.08],[0.8],[0.01],0.0,[0.25],[-0.09],[0.74]
5,christian,3722,[0.88],[0.88],[0.9],0.881473,0.881473,[0.01],[0.88],[0.0],0.0,[0.09],[-0.05],[0.87]
6,jewish,724,[0.87],[0.8],[0.92],0.873306,0.873306,[0.06],[0.85],[0.0],0.0,[0.22],[0.02],[0.83]
7,muslim,2166,[0.85],[0.77],[0.92],0.847575,0.847575,[0.1],[0.82],[0.0],0.0,[0.27],[-0.03],[0.78]
8,hindu,54,[0.78],[0.79],[0.83],0.784091,0.784091,[0.09],[0.74],[0.05],0.0,[0.25],[-0.17],[0.6]
9,buddhist,42,[0.86],[0.82],[0.98],0.861111,0.861111,[0.04],[0.92],[0.04],0.0,[0.19],[0.1],[0.95]


Unnamed: 0,subgroup,subset_size,tf_gru_attention_civil:v_20181030_095323_aucs,tf_gru_attention_civil:v_20181030_095323_cross_subgroup_negative_mwus,tf_gru_attention_civil:v_20181030_095323_cross_subgroup_positive_mwus,tf_gru_attention_civil:v_20181030_095323_mean,tf_gru_attention_civil:v_20181030_095323_median,tf_gru_attention_civil:v_20181030_095323_negative_asegs,tf_gru_attention_civil:v_20181030_095323_normalized_pinned_aucs,tf_gru_attention_civil:v_20181030_095323_positive_asegs,tf_gru_attention_civil:v_20181030_095323_std,tf_gru_attention_civil:v_20181030_095323_within_negative_label_mwus,tf_gru_attention_civil:v_20181030_095323_within_positive_label_mwus,tf_gru_attention_civil:v_20181030_095323_within_subgroup_mwus
0,male,4314,[0.89],[0.87],[0.91],0.891496,0.891496,[0.02],[0.89],[0.0],0.0,[0.11],[-0.01],[0.88]
1,female,5072,[0.9],[0.88],[0.91],0.900744,0.900744,[0.01],[0.89],[0.0],0.0,[0.09],[-0.03],[0.88]
2,transgender,284,[0.88],[0.83],[0.89],0.882275,0.882275,[0.05],[0.84],[0.0],0.0,[0.21],[-0.01],[0.81]
3,heterosexual,128,[0.83],[0.78],[0.92],0.825721,0.825721,[0.09],[0.83],[0.01],0.0,[0.26],[-0.06],[0.78]
4,homosexual_gay_or_lesbian,1100,[0.86],[0.79],[0.92],0.861044,0.861044,[0.07],[0.84],[0.0],0.0,[0.23],[-0.03],[0.8]
5,christian,3722,[0.89],[0.92],[0.87],0.89326,0.89326,[0.0],[0.9],[0.01],0.0,[-0.0],[-0.1],[0.89]
6,jewish,724,[0.89],[0.87],[0.91],0.892992,0.892992,[0.02],[0.88],[0.0],0.0,[0.13],[-0.02],[0.87]
7,muslim,2166,[0.86],[0.82],[0.91],0.855385,0.855385,[0.06],[0.84],[0.01],0.0,[0.22],[-0.06],[0.8]
8,hindu,54,[0.9],[0.88],[0.84],0.9,0.9,[0.04],[0.83],[0.09],0.0,[0.17],[-0.25],[0.78]
9,buddhist,42,[0.9],[0.89],[0.92],0.902778,0.902778,[0.01],[0.89],[0.03],0.0,[0.11],[-0.08],[0.87]


Unnamed: 0,subgroup,subset_size,tf_gru_attention_civil:v_20181030_095148_aucs,tf_gru_attention_civil:v_20181030_095148_cross_subgroup_negative_mwus,tf_gru_attention_civil:v_20181030_095148_cross_subgroup_positive_mwus,tf_gru_attention_civil:v_20181030_095148_mean,tf_gru_attention_civil:v_20181030_095148_median,tf_gru_attention_civil:v_20181030_095148_negative_asegs,tf_gru_attention_civil:v_20181030_095148_normalized_pinned_aucs,tf_gru_attention_civil:v_20181030_095148_positive_asegs,tf_gru_attention_civil:v_20181030_095148_std,tf_gru_attention_civil:v_20181030_095148_within_negative_label_mwus,tf_gru_attention_civil:v_20181030_095148_within_positive_label_mwus,tf_gru_attention_civil:v_20181030_095148_within_subgroup_mwus
0,male,4314,[0.87],[0.87],[0.87],0.866475,0.866475,[0.0],[0.87],[0.0],0.0,[0.01],[-0.02],[0.86]
1,female,5072,[0.88],[0.89],[0.87],0.884902,0.884902,[0.0],[0.88],[0.0],0.0,[-0.04],[-0.03],[0.88]
2,transgender,284,[0.83],[0.89],[0.77],0.831865,0.831865,[0.0],[0.81],[0.02],0.0,[-0.02],[-0.12],[0.77]
3,heterosexual,128,[0.88],[0.83],[0.89],0.882612,0.882612,[0.01],[0.85],[0.01],0.0,[0.08],[-0.06],[0.83]
4,homosexual_gay_or_lesbian,1100,[0.82],[0.86],[0.83],0.823697,0.823697,[0.0],[0.83],[0.02],0.0,[0.04],[-0.12],[0.79]
5,christian,3722,[0.87],[0.91],[0.84],0.865141,0.865141,[0.01],[0.87],[0.01],0.0,[-0.09],[-0.08],[0.87]
6,jewish,724,[0.88],[0.86],[0.87],0.876742,0.876742,[0.0],[0.85],[0.0],0.0,[0.04],[-0.03],[0.84]
7,muslim,2166,[0.84],[0.81],[0.88],0.841789,0.841789,[0.02],[0.83],[0.0],0.0,[0.13],[-0.04],[0.8]
8,hindu,54,[0.85],[0.83],[0.79],0.852273,0.852273,[0.04],[0.75],[0.13],0.0,[0.17],[-0.28],[0.63]
9,buddhist,42,[0.9],[0.86],[0.94],0.902778,0.902778,[0.0],[0.91],[0.04],0.0,[0.02],[-0.07],[0.92]


# Get results for artificial dataset

In [92]:
for model_family in MODEL_FAMILIES_ORIENTATION:
    
    bias_metrics_df = model_bias_analysis.per_subgroup_aucs(
        test_bias_artificial_df, identity_terms_synthetic, [model_family], 'label', include_asegs=True)
    
    ### Make it prettier
    _model = model_family[0]
    _nickname = model_nick_names.get(_model, _model)
    bias_metrics_df = bias_metrics_df.rename(columns={
        col: col.replace(_model, _nickname) for col in bias_metrics_df.columns
    })
    for col in bias_metrics_df.columns:
        try:
            bias_metrics_df[col] = list(map(lambda x: [round(y,2) for y in x] , bias_metrics_df[col]))
        except:
            pass
#     bias_metrics_df = bias_metrics_df[
#         ['subgroup',
#          'subset_size',
#          _nickname + '_normalized_pinned_aucs',
#          _nickname + '_cross_subgroup_negative_mwus',
#          _nickname + '_cross_subgroup_positive_mwus',
#          _nickname + '_within_negative_label_mwus',
#          _nickname + '_within_positive_label_mwus',
#          _nickname + '_within_subgroup_mwus',
#          _nickname + '_aucs',
#          _nickname + '_mean',
#          _nickname + '_median',
#          _nickname + '_std',
#         ]
#     ]
    display(bias_metrics_df)

KeyboardInterrupt: 