<a href="https://colab.research.google.com/github/imbealopez/Toxicity-Detection/blob/master/ToxicityDetection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
# essential imports
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import warnings  # Ignore warnings

warnings.filterwarnings("ignore")

import numpy as np  # linear algebra
import pandas as pd  # data processing, CSV file I/O (e.g. pd.read_csv)
import re  # regular expressions
import math  # math functions
import scipy.stats as stats
import random  # random numbers and generator
import copy  # copy objects
import pickle  # copy objects into binary files
import timeit  # timer
import os  # system functions
import sys
import datetime
import pkg_resources

# import seaborn as sns
import matplotlib.pyplot as plt  # plotting tool

# scikit-learn
# evaluation metrics
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import classification_report

# model selection
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_validate
from sklearn.model_selection import KFold
from sklearn.model_selection import RepeatedKFold

# preprocessing
from sklearn.preprocessing import normalize
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

# preprocess text
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer

# pytorch
import torch

# tensorflow
# import tensorflow as tf
# print(tf.__version__)

# Load data and set defaults

In [2]:
# Run this cell and select the kaggle.json file downloaded
# from the Kaggle account settings page. 
from google.colab import files
files.upload()
!ls -lha kaggle.json
!pip install -q kaggle
%cd /content/

# The Kaggle API client expects this file to be in ~/.kaggle,
# so move it there.
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/

# This permissions change avoids a warning on Kaggle tool startup.
!chmod 600 ~/.kaggle/kaggle.json
# List available datasets.
#!kaggle datasets list
# download dataset from api
!kaggle competitions download -c jigsaw-unintended-bias-in-toxicity-classification

#unzip train and test sets into data directory
!unzip train.csv.zip -d ./data
!unzip test_private_expanded.csv.zip -d ./data
!unzip test_public_expanded.csv.zip -d ./data

Saving kaggle.json to kaggle.json
-rw-r--r-- 1 root root 66 Nov 22 18:10 kaggle.json
/content
Downloading identity_individual_annotations.csv.zip to /content
 39% 5.00M/12.7M [00:00<00:01, 5.47MB/s]
100% 12.7M/12.7M [00:01<00:00, 13.1MB/s]
Downloading sample_submission.csv.zip to /content
  0% 0.00/224k [00:00<?, ?B/s]
100% 224k/224k [00:00<00:00, 69.9MB/s]
Downloading test.csv.zip to /content
 42% 5.00M/12.0M [00:00<00:00, 18.4MB/s]
100% 12.0M/12.0M [00:00<00:00, 34.5MB/s]
Downloading test_private_expanded.csv.zip to /content
 58% 9.00M/15.6M [00:00<00:00, 12.9MB/s]
100% 15.6M/15.6M [00:00<00:00, 17.8MB/s]
Downloading test_public_expanded.csv.zip to /content
 57% 9.00M/15.7M [00:00<00:00, 16.2MB/s]
100% 15.7M/15.7M [00:00<00:00, 23.0MB/s]
Downloading toxicity_individual_annotations.csv.zip to /content
 74% 49.0M/66.6M [00:01<00:01, 17.7MB/s]
100% 66.6M/66.6M [00:02<00:00, 34.6MB/s]
Downloading train.csv.zip to /content
 98% 267M/273M [00:06<00:00, 55.8MB/s]
100% 273M/273M [00:06<00:00

In [8]:
# set defaults
#%matplotlib inline
plt.ion()
#pd options
pd.set_option("display.max_columns", 500)
pd.set_option("display.max_rows", 100)
pd.set_option("display.width", 1000)


# default seeding for reproducability
def seed_everything(seed):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True


seed_everything(42)

CORE_COUNT = os.cpu_count()
print('number of cores:', CORE_COUNT)

# toxicity score column
TOXICITY_COLUMN = "target"
# text comment column
TEXT_COLUMN = "comment_text"
# List all identities
# target and subgroup columns

identity_columns = [
    "male",
    "female",
    "homosexual_gay_or_lesbian",
    "christian",
    "jewish",
    "muslim",
    "black",
    "white",
    "psychiatric_or_mental_illness",
]

number of cores: 2


In [4]:
%%time
# Load train and test comments
train_comments = pd.read_csv('/content/data/train.csv')
test_private_comments = pd.read_csv('/content/data/test_private_expanded.csv')
test_public_comments = pd.read_csv('/content/data/test_public_expanded.csv')




CPU times: user 10.4 s, sys: 1.34 s, total: 11.7 s
Wall time: 11.7 s


In [0]:
print("loaded %d records" % len(train_comments))
#%%
# display first comment
print(train_comments.iloc[0]["comment_text"])
# display head
train_comments.head()

loaded 1804874 records
This is so cool. It's like, 'would you want your mother to read this??' Really great idea, well done!


Unnamed: 0,id,target,comment_text,severe_toxicity,obscene,identity_attack,insult,threat,asian,atheist,bisexual,black,buddhist,christian,female,heterosexual,hindu,homosexual_gay_or_lesbian,intellectual_or_learning_disability,jewish,latino,male,muslim,other_disability,other_gender,other_race_or_ethnicity,other_religion,other_sexual_orientation,physical_disability,psychiatric_or_mental_illness,transgender,white,created_date,publication_id,parent_id,article_id,rating,funny,wow,sad,likes,disagree,sexual_explicit,identity_annotator_count,toxicity_annotator_count
0,59848,0.0,"This is so cool. It's like, 'would you want yo...",0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,2015-09-29 10:50:41.987077+00,2,,2006,rejected,0,0,0,0,0,0.0,0,4
1,59849,0.0,Thank you!! This would make my life a lot less...,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,2015-09-29 10:50:42.870083+00,2,,2006,rejected,0,0,0,0,0,0.0,0,4
2,59852,0.0,This is such an urgent design problem; kudos t...,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,2015-09-29 10:50:45.222647+00,2,,2006,rejected,0,0,0,0,0,0.0,0,4
3,59855,0.0,Is this something I'll be able to install on m...,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,2015-09-29 10:50:47.601894+00,2,,2006,rejected,0,0,0,0,0,0.0,0,4
4,59856,0.893617,haha you guys are a bunch of losers.,0.021277,0.0,0.021277,0.87234,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2015-09-29 10:50:48.488476+00,2,,2006,rejected,0,0,0,1,0,0.0,4,47


In [0]:
# display toxic comments above target 0.5
train_comments[train_comments["target"] >= 0.5].head()
# shuffle
# train_comments = train_comments.sample(frac=1).reset_index(drop=True)

Unnamed: 0,id,target,comment_text,severe_toxicity,obscene,identity_attack,insult,threat,asian,atheist,bisexual,black,buddhist,christian,female,heterosexual,hindu,homosexual_gay_or_lesbian,intellectual_or_learning_disability,jewish,latino,male,muslim,other_disability,other_gender,other_race_or_ethnicity,other_religion,other_sexual_orientation,physical_disability,psychiatric_or_mental_illness,transgender,white,created_date,publication_id,parent_id,article_id,rating,funny,wow,sad,likes,disagree,sexual_explicit,identity_annotator_count,toxicity_annotator_count
4,59856,0.893617,haha you guys are a bunch of losers.,0.021277,0.0,0.021277,0.87234,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2015-09-29 10:50:48.488476+00,2,,2006,rejected,0,0,0,1,0,0.0,4,47
5,59859,0.666667,ur a sh*tty comment.,0.047619,0.638095,0.0,0.333333,0.0,,,,,,,,,,,,,,,,,,,,,,,,,2015-09-29 10:50:50.865549+00,2,,2006,rejected,0,0,0,0,0,0.009524,0,105
13,239583,0.6,It's ridiculous that these guys are being call...,0.0,0.1,0.0,0.6,0.1,,,,,,,,,,,,,,,,,,,,,,,,,2016-01-13 19:02:22.655293+00,6,,26670,approved,0,0,0,3,0,0.0,0,10
14,239584,0.5,This story gets more ridiculous by the hour! A...,0.0,0.0,0.0,0.3,0.0,,,,,,,,,,,,,,,,,,,,,,,,,2016-01-13 19:04:31.238894+00,6,,26670,approved,0,0,0,9,0,0.0,0,10
19,239592,0.5,"Angry trolls, misogynists and Racists"", oh my....",0.0,0.0,0.1,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2016-01-13 19:48:45.619202+00,6,,26795,approved,0,0,0,0,0,0.0,4,10


# Preprocessing

## helper functions

In [5]:
# helper functions

#imports
import re  # regular expressions
import nltk
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('stopwords')
from nltk import sent_tokenize, word_tokenize
from nltk.stem.snowball import SnowballStemmer
from nltk.corpus import stopwords
from nltk import WordNetLemmatizer  # lemmatizer
from nltk.stem import PorterStemmer  # stemmer
from nltk.tokenize import word_tokenize  # tokenizer

# preprocess text
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer

# check blank string function
def isBlank(myString):
    myString = str(myString)
    return not (myString and myString.strip())

# tokenizers (a lot of feature vectors, raw)
def tokenizeBasic(txt):
    return txt.split()

# lemmatizer (doesn't remove punctuation)
class LemmaTokenizer(object):
    def __init__(self):
        self.wnl = WordNetLemmatizer()

    def __call__(self, doc):
        return [self.wnl.lemmatize(t) for t in word_tokenize(doc)]


# stemmer (no difference from lemmatizer?)
stemmer = PorterStemmer()
analyzer = CountVectorizer().build_analyzer()

def stemmed_words(doc):
    return (stemmer.stem(w) for w in analyzer(doc))

#TODO

def cleanUp(text):
    # Initilaise Lemmatizer
    lemm = WordNetLemmatizer()

    # use alternative stemmer
    #snowball = SnowballStemmer(language = 'english')
    #ps = PorterStemmer()

    # load stopwords
    #my_stopwords = stopwords.words('english')
    my_stopwords = []
    clean_text = ""
    # tokenize words (convert text from byte to string)
    words = word_tokenize(str(text, errors="ignore"))
    # print(words[:8])

    for word in words:

        w = lemm.lemmatize(word.lower())
        #w = re.sub('<.*?>', '', w) # remove HTML tags
        #w = re.sub(r'[^\w\s</>]', '', w) # remove punc.
        w = re.sub(r'\d+','',w)# remove numbers
        # lemmatize the word(normalized to lower case)
        
        # stem the word
        #w = snowball.stem(w.lower())

        # print(w)

        # filter out stopwords
        if w not in my_stopwords and len(w) > 0:
          clean_text += w + " "

    return clean_text

def cleanUpPP(text):
    # Initilaise Lemmatizer
    lemm = WordNetLemmatizer()

    # use alternative stemmer
    #snowball = SnowballStemmer(language = 'english')
    #ps = PorterStemmer()

    # load stopwords
    my_stopwords = stopwords.words('english')
    my_stopwords = []
    clean_text = ""
    # tokenize words (convert text from byte to string)
    words = word_tokenize(str(text, errors="ignore"))
    # print(words[:8])

    for word in words:

        w = lemm.lemmatize(word.lower())
        #w = re.sub('<.*?>', '', w) # remove HTML tags
        w = re.sub(r'[^\w\s</>]', '', w) # remove punc.
        w = re.sub(r'\d+','',w)# remove numbers
        # lemmatize the word(normalized to lower case)
        
        # stem the word
        #w = snowball.stem(w.lower())

        # print(w)

        # filter out stopwords
        if w not in my_stopwords and len(w) > 0:
          clean_text += w + " "

    return clean_text


def preprocess(text):
    clean_data = []
    for x in (text[:][0]): #this is Df_pd for Df_np (text[:])
        new_text = re.sub('<.*?>', '', x)   # remove HTML tags
        new_text = re.sub(r'[^\w\s]', '', new_text) # remove punc.
        new_text = re.sub(r'\d+','',new_text)# remove numbers
        new_text = new_text.lower() # lower case, .upper() for upper          
        if new_text != '':
            clean_data.append(new_text)
    return clean_data

def tokenization_w(words):
    w_new = []
    for w in (words[:][0]):  # for NumPy = words[:]
        w_token = word_tokenize(w)
        if w_token != '':
            w_new.append(w_token)
    return w_new

snowball = SnowballStemmer(language = 'english')
def stemming(words):
    new = []
    stem_words = [snowball.stem(x) for x in (words[:][0])]
    new.append(stem_words)
    return new
    
lemmatizer = WordNetLemmatizer()
def lemmatization(words):
    new = []
    lem_words = [lemmatizer.lemmatize(x) for x in (words[:][0])]
    new.append(lem_words)
    return new

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Unzipping corpora/wordnet.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


## tests

In [0]:
# %%
# TODO test preprocessing

# Make sure all comment_text values are strings
train_comments["comment_text"] = train_comments["comment_text"].astype(str)



# Convert taget and identity columns to booleans
def convert_to_bool(df, col_name):
    df[col_name] = np.where(df[col_name] >= 0.5, True, False)
    # df.loc[df.col_name >= 0.5, col_name] = True
    # df.loc[df.col_name < 0.5, col_name] = False


def convert_dataframe_to_bool(df):
    bool_df = df.copy()
    for col in ["target"] + identity_columns:
        convert_to_bool(bool_df, col)
    return bool_df

#%%
train_comments_cleaned = convert_dataframe_to_bool(train_comments)
# train_comments.loc[:, "comment_text"] = train_comments.comment_text.apply(cleanUp)


In [0]:
# display 10 toxic comments 
train_comments_cleaned[train_comments_cleaned["target"] == True]['comment_text'].head(10).values

array(['haha you guys are a bunch of losers.', 'ur a sh*tty comment.',
       'It\'s ridiculous that these guys are being called "protesters". Being armed is a threat of violence, which makes them terrorists.',
       "This story gets more ridiculous by the hour! And, I love that people are sending these guys dildos in the mail now. But… if they really think there's a happy ending in this for any of them, I think they're even more deluded than all of the jokes about them assume.",
       'Angry trolls, misogynists and Racists", oh my. It doesn\'t take all of my 150 IQ to see the slant here.  it\'s the "Diversity diode" at work yet again. "We can say anything that we want because we are Diversity. You on the other hand must only  say what we allow you to say. From now on, winning arguments against any member of diversity will be considered offensive language.  facts, cogent, linear posts and Math are now verboten.',
       "Yet call out all Muslims for the acts of a few will get you pil

In [0]:
# display 10 selected identity comments
train_comments_cleaned[train_comments_cleaned["homosexual_gay_or_lesbian"] == True]['comment_text'].head(10).values

array(["Is there any such thing as a 'gay trans woman '?  Technically?  Theoretically, for example, a female, once you become your born sex, anatomically, you will be/continue to be attracted to women and not necessarily gay women.",
       'Well, this wasn\'t meant to be a dissertation. It\'s an entertainment piece, so I was mostly trying to be funny—whether I was successful in that regard is up to the reader. And of all these guys, I have a certain fondness for Hitchens, as I make clear, but I had to make fun of him for that whole "women aren\'t funny" thing. And while I can\'t say I have any "favorite" atheist artists or writers—I don\'t really seek them out, to be honest—I certainly don\'t think all atheists are "awful" (some of my best friends are atheists; some of them may even be gay and/or black!). I mostly just get annoyed when someone thinks identifying as an atheist inherently makes them smarter than a rational theist. I\'m cool with whatever philosophical choice you\'ve dec

preprocess toxic words

In [0]:
train_comments_cleaned['stared words'] = train_comments_cleaned['comment_text'].apply(lambda comment: set(w for w in comment.split() if (w.count('*') > 0)))


In [0]:
list(set().union(*train_comments_cleaned[train_comments_cleaned["stared words"] != set()]['stared words'].values
))

['puss*ies....they',
 '"p***y,"',
 'thought*',
 'r*dnecks',
 '**Look',
 '*paid',
 'know!!!!*****',
 '**the**',
 'p***y"',
 'sh**-slinging',
 '(Sh***ty',
 'said.*',
 '***********',
 'form*',
 'F**K',
 '*Russian*',
 'freely*',
 '*U.S.',
 '*aberrant*,',
 '*listen*',
 '%$#*$^ing',
 '*shrug*',
 'exhausted*',
 '*royalty*',
 'B**tards',
 '(*&^',
 '*currently*',
 '*net*',
 '*Shocker*.',
 'way.*',
 '*spent*',
 '*witness*,',
 '*"if',
 'tricks...bull***t!',
 'wink-wink.**',
 'p*ssy-grabber,',
 '*gas-line,',
 '*glorify*',
 'C*',
 '*few',
 '*got*',
 'income*',
 'A**hole',
 '*lower*',
 'puss**ies)....',
 '*legality*',
 'a*sholes',
 'authorized*',
 'Birds*.',
 '*https://knoema.com/atlas/Canada/topics/Demographics/Population/Net-migration-rate*',
 '*current*',
 'million(*)',
 'cares*',
 '*whispering*',
 ',(*hope',
 '*****Grabber',
 'jour*',
 'b****”',
 'way.**"',
 '(*snicker*).',
 'sh**crazy',
 '*Using',
 '*Facepalm*',
 '*re-arranging',
 '*apparently',
 '*institutions*',
 'for*',
 '*Nor',
 'bu***it...

In [0]:
train_comments_cleaned['stared words'] = train_comments_cleaned['comment_text'].apply(lambda comment: set(w for w in comment.split() if (w.count('*') > 0) and (w[0] != '*') and (w[-1] != '*')))

In [0]:
list(set().union(*train_comments_cleaned[train_comments_cleaned["stared words"] != set()]['stared words'].values
))

['P*ssies.',
 'puss*ies....they',
 'dum*mies',
 '"p***y,"',
 'r*dnecks',
 'together*)',
 'is*?',
 'then...**RG:',
 'sh*t....',
 'p***y"',
 'sh**-slinging',
 '(Sh***ty',
 'shi**ing',
 'division*.',
 'F**K',
 'bullsh*tters',
 's**ts.',
 's*it,',
 'p*****y."',
 'f*ggot',
 '%$#*$^ing',
 'C**nton',
 'bullsh*t?',
 'B**tards',
 '(*&^',
 '(******Oaks),',
 '(*reposted',
 'H**Lhole',
 'equality......*groan*.',
 '<bullsh**t>',
 'tricks...bull***t!',
 'p*ssy-grabber,',
 'Sh*t.',
 'kill*?',
 'ratf***ed?',
 'A**hole',
 '(*Minimum',
 'puss**ies)....',
 'Oil*.',
 'C**T.',
 'Chicago*,',
 'Pi**ing',
 'a*sholes',
 'P*ss"',
 'Birds*.',
 'se*...',
 'a**hole!"',
 'f**cked',
 'sh*t-faced',
 'properly*.',
 'million(*)',
 "p***y.'",
 ',(*hope',
 'thing*)-',
 's***+',
 'horse***ter.',
 'b****”',
 'person*;',
 'way.**"',
 'S*&$',
 's*x,',
 'wh**e',
 '(*snicker*).',
 'sh**crazy',
 '@$%*&##',
 'on*.',
 'bu***it...does',
 'bat-s**t',
 'toxic."*???',
 'F*kin',
 'jacka**.',
 'p****!"',
 'appurtenances*.',
 'a**for',


# Model training

## train-validation split

In [7]:

# %%
# comment-target split
#full_labels = train_comments.iloc[:]["target"].copy()
#full_comments = train_comments[["comment_text"]].copy()
#print(full_labels.head())
#print(full_comments.head())

# split train into training-evaluation set 80%-20%
# x_train, x_eval, y_train, y_eval = train_test_split(
#     full_comments, full_labels, test_size=0.2, random_state=42, shuffle=False
# )
# array form
# x_train.values

train_df, validate_df = train_test_split(
    train_comments, test_size=0.2, random_state=42, shuffle=False
)

print("%d train comments, %d validate comments" % (len(train_df), len(validate_df)))



0    0.000000
1    0.000000
2    0.000000
3    0.000000
4    0.893617
Name: target, dtype: float64
                                        comment_text
0  This is so cool. It's like, 'would you want yo...
1  Thank you!! This would make my life a lot less...
2  This is such an urgent design problem; kudos t...
3  Is this something I'll be able to install on m...
4               haha you guys are a bunch of losers.
1443899 train comments, 360975 validate comments


In [0]:
#%%
# TODO setup and run model
MODEL_NAME = "my_model"
# validate_df[MODEL_NAME] = model.predict(pad_text(validate_df[TEXT_COLUMN], tokenizer))[:, 1]


# Evaluations

## helper functions

Evaluate biases

In [0]:
#imports
from sklearn import metrics
import numpy as np 
import pandas as pd 


#Define bias metrics, then evaluate our new model for bias using the validation set predictions

SUBGROUP_AUC = 'subgroup_auc'
BPSN_AUC = 'bpsn_auc'  # stands for background positive, subgroup negative
BNSP_AUC = 'bnsp_auc'  # stands for background negative, subgroup positive

def compute_auc(y_true, y_pred):
    try:
        return metrics.roc_auc_score(y_true, y_pred)
    except ValueError:
        return np.nan

def compute_subgroup_auc(df, subgroup, label, model_name):
    subgroup_examples = df[df[subgroup]]
    return compute_auc(subgroup_examples[label], subgroup_examples[model_name])

def compute_bpsn_auc(df, subgroup, label, model_name):
    """Computes the AUC of the within-subgroup negative examples and the background positive examples."""
    subgroup_negative_examples = df[df[subgroup] & ~df[label]]
    non_subgroup_positive_examples = df[~df[subgroup] & df[label]]
    examples = subgroup_negative_examples.append(non_subgroup_positive_examples)
    return compute_auc(examples[label], examples[model_name])

def compute_bnsp_auc(df, subgroup, label, model_name):
    """Computes the AUC of the within-subgroup positive examples and the background negative examples."""
    subgroup_positive_examples = df[df[subgroup] & df[label]]
    non_subgroup_negative_examples = df[~df[subgroup] & ~df[label]]
    examples = subgroup_positive_examples.append(non_subgroup_negative_examples)
    return compute_auc(examples[label], examples[model_name])

def compute_bias_metrics_for_model(dataset,
                                   subgroups,
                                   model,
                                   label_col,
                                   include_asegs=False):
    """Computes per-subgroup metrics for all subgroups and one model."""
    records = []
    for subgroup in subgroups:
        record = {
            'subgroup': subgroup,
            'subgroup_size': len(dataset[dataset[subgroup]])
        }
        record[SUBGROUP_AUC] = compute_subgroup_auc(dataset, subgroup, label_col, model)
        record[BPSN_AUC] = compute_bpsn_auc(dataset, subgroup, label_col, model)
        record[BNSP_AUC] = compute_bnsp_auc(dataset, subgroup, label_col, model)
        records.append(record)
    return pd.DataFrame(records).sort_values('subgroup_auc', ascending=True)

#uncomment to show only bias metric
#bias_metrics_df = compute_bias_metrics_for_model(validate_df, identity_columns, MODEL_NAME, TOXICITY_COLUMN)
#bias_metrics_df

#NOTE use bettermetric.py for below method
#calculate final score
TOXICITY_COLUMN = 'target'
def calculate_overall_auc(df, model_name):
    true_labels = df[TOXICITY_COLUMN]
    predicted_labels = df[model_name]
    return metrics.roc_auc_score(true_labels, predicted_labels)

def power_mean(series, p):
    total = sum(np.power(series, p))
    return np.power(total / len(series), 1 / p)

def get_final_metric(bias_df, overall_auc, POWER=-5, OVERALL_MODEL_WEIGHT=0.25):
    bias_score = np.average([
        power_mean(bias_df[SUBGROUP_AUC], POWER),
        power_mean(bias_df[BPSN_AUC], POWER),
        power_mean(bias_df[BNSP_AUC], POWER)
    ])
    return (OVERALL_MODEL_WEIGHT * overall_auc) + ((1 - OVERALL_MODEL_WEIGHT) * bias_score)
#uncomment to show overall metric    
#get_final_metric(bias_metrics_df, calculate_overall_auc(validate_df, MODEL_NAME))

Evaluate overall

In [0]:
import numpy as np
from sklearn.metrics import roc_auc_score


class JigsawEvaluator:
    def __init__(self, y_true, y_identity, power=-5, overall_model_weight=0.25):
        self.y = (y_true >= 0.5).astype(int)
        self.y_i = (y_identity >= 0.5).astype(int)
        self.n_subgroups = self.y_i.shape[1]
        self.power = power
        self.overall_model_weight = overall_model_weight

    @staticmethod
    def _compute_auc(y_true, y_pred):
        try:
            return roc_auc_score(y_true, y_pred)
        except ValueError:
            return np.nan

    def _compute_subgroup_auc(self, i, y_pred):
        mask = self.y_i[:, i] == 1
        return self._compute_auc(self.y[mask], y_pred[mask])

    def _compute_bpsn_auc(self, i, y_pred):
        mask = self.y_i[:, i] + self.y == 1
        return self._compute_auc(self.y[mask], y_pred[mask])

    def _compute_bnsp_auc(self, i, y_pred):
        mask = self.y_i[:, i] + self.y != 1
        return self._compute_auc(self.y[mask], y_pred[mask])

    def compute_bias_metrics_for_model(self, y_pred):
        records = np.zeros((3, self.n_subgroups))
        for i in range(self.n_subgroups):
            records[0, i] = self._compute_subgroup_auc(i, y_pred)
            records[1, i] = self._compute_bpsn_auc(i, y_pred)
            records[2, i] = self._compute_bnsp_auc(i, y_pred)
        return records

    def _calculate_overall_auc(self, y_pred):
        return roc_auc_score(self.y, y_pred)

    def _power_mean(self, array):
        total = sum(np.power(array, self.power))
        return np.power(total / len(array), 1 / self.power)

    def get_final_metric(self, y_pred):
        bias_metrics = self.compute_bias_metrics_for_model(y_pred)
        bias_score = np.average(
            [
                self._power_mean(bias_metrics[0]),
                self._power_mean(bias_metrics[1]),
                self._power_mean(bias_metrics[2]),
            ]
        )
        overall_score = self.overall_model_weight * self._calculate_overall_auc(y_pred)
        bias_score = (1 - self.overall_model_weight) * bias_score
        return overall_score + bias_score


## tests

In [0]:
# %%

y_true = validate_df["target"].values
y_identity = validate_df[identity_columns].values

# predict
# TODO add model
# y_pred = model.predict_proba(train_df)

# evaluate
# evaluator = JigsawEvaluator(y_true, y_identity)
# auc_score = evaluator.get_final_metric(y_pred)

# uncomment to show only bias metric
# bias_metrics_df = compute_bias_metrics_for_model(validate_df, identity_columns, MODEL_NAME, TOXICITY_COLUMN)
# bias_metrics_df
