In [1]:
import pandas as pd
import numpy as np
from sklearn import metrics
from sklearn import model_selection

validate_df = pd.read_csv('train.csv').sample(20000,random_state=1234)

identity_columns = [
    'male', 'female', 'homosexual_gay_or_lesbian', 'christian', 'jewish',
    'muslim', 'black', 'white', 'psychiatric_or_mental_illness']
TOXICITY_COLUMN = 'target'
TEXT_COLUMN = 'comment_text'
MODEL_NAME = "LSTM"
validate_df[MODEL_NAME] = pd.read_pickle('val_pred_lstm')

print(validate_df.head(n=10))

# Define bias metrics

SUBGROUP_AUC = 'subgroup_auc'
BPSN_AUC = 'bpsn_auc'  # stands for background positive, subgroup negative
BNSP_AUC = 'bnsp_auc'  # stands for background negative, subgroup positive

def compute_auc(y_true, y_pred):
    try:
        return metrics.roc_auc_score(y_true, y_pred)
    except ValueError:
        return np.nan

def compute_subgroup_auc(df, subgroup, label, model_name):
    subgroup_examples = df[df[subgroup]]
    return compute_auc(subgroup_examples[label], subgroup_examples[model_name])

def compute_bpsn_auc(df, subgroup, label, model_name):
    """Computes the AUC of the within-subgroup negative examples and the background positive examples."""
    subgroup_negative_examples = df[df[subgroup] & ~df[label]]
    non_subgroup_positive_examples = df[~df[subgroup] & df[label]]
    examples = subgroup_negative_examples.append(non_subgroup_positive_examples)
    return compute_auc(examples[label], examples[model_name])

def compute_bnsp_auc(df, subgroup, label, model_name):
    """Computes the AUC of the within-subgroup positive examples and the background negative examples."""
    subgroup_positive_examples = df[df[subgroup] & df[label]]
    non_subgroup_negative_examples = df[~df[subgroup] & ~df[label]]
    examples = subgroup_positive_examples.append(non_subgroup_negative_examples)
    return compute_auc(examples[label], examples[model_name])

def compute_bias_metrics_for_model(dataset,
                                   subgroups,
                                   model,
                                   label_col,
                                   include_asegs=False):
    """Computes per-subgroup metrics for all subgroups and one model."""
    records = []
    for subgroup in subgroups:
        record = {
            'subgroup': subgroup,
            'subgroup_size': len(dataset[dataset[subgroup]])
        }
        record[SUBGROUP_AUC] = compute_subgroup_auc(dataset, subgroup, label_col, model)
        record[BPSN_AUC] = compute_bpsn_auc(dataset, subgroup, label_col, model)
        record[BNSP_AUC] = compute_bnsp_auc(dataset, subgroup, label_col, model)
        records.append(record)
    return pd.DataFrame(records).sort_values('subgroup_auc', ascending=True)

bias_metrics_df = compute_bias_metrics_for_model(validate_df, identity_columns, MODEL_NAME, TOXICITY_COLUMN)

# Calculate the final score

def calculate_overall_auc(df, model_name):
    true_labels = df[TOXICITY_COLUMN]
    predicted_labels = df[model_name]
    return metrics.roc_auc_score(true_labels, predicted_labels)

def power_mean(series, p):
    total = sum(np.power(series, p))
    return np.power(total / len(series), 1 / p)

def get_final_metric(bias_df, overall_auc, POWER=-5, OVERALL_MODEL_WEIGHT=0.25):
    bias_score = np.average([
        power_mean(bias_df[SUBGROUP_AUC], POWER),
        power_mean(bias_df[BPSN_AUC], POWER),
        power_mean(bias_df[BNSP_AUC], POWER)
    ])
    return (OVERALL_MODEL_WEIGHT * overall_auc) + ((1 - OVERALL_MODEL_WEIGHT) * bias_score)
    
get_final_metric(bias_metrics_df, calculate_overall_auc(validate_df, MODEL_NAME))


              id    target                                       comment_text  \
458232    806064  0.000000  It's difficult for many old people to keep up ...   
272766    576402  0.166667  She recognized that her tiny-handed husband is...   
339129    658508  0.000000  HPHY76,\nGood for you for thinking out loud, w...   
773565   5066714  0.500000  And I bet that in the day you expected your Je...   
476233    828147  0.000000  Kennedy will add a much needed and scientifica...   
317668    631532  0.300000  Yeah, because it is far more important to pres...   
839167   5147509  0.000000          Housing inmates is a profitable business.   
913935   5237995  0.000000                                           Go Sens!   
904703   5226678  0.200000  Lol...sounds like someone IS brainwashed but I...   
1579488  6054559  0.000000  Tax relief should include commercial real esta...   

         severe_toxicity  obscene  identity_attack    insult  threat  asian  \
458232               0.0     

KeyError: "None of [Float64Index([nan, nan, nan, 0.0, nan, nan, nan, nan, nan, nan,\n              ...\n              0.0, nan, nan, nan, nan, nan, nan, nan, 0.0, nan],\n             dtype='float64', length=20000)] are in the [columns]"