This notebook contains the the results for necessity and sufficiency. Necessity and sufficiency are both calculated by either choosing a subset of tokens and perturbing them using the ILM model. The models are all BERT architecture, but trained on different datasets, and for each dataset, a model is trained on both hate/non-hate and abusive/non-abusive labels. The explanations are generated for 120 examples from the HateCheck test suite. These are instances that are explicitly hateful, and are targeted towards women or Muslims. 

The function ```display_scores``` displays the necessity and sufficiency for each of the examples for all models included. Note that some models will display ```NaN``` for some values. These are the cases where the model mistakenly classified the original instance as non-abusive/non-hateful. In these cases, the current necessity and sufficiency calculations aren't meaningful, because we aim to provide explanations for positive predictions only. The third argument to this function determines which necessity/sufficiency scores to display. 

In [1]:
import pickle
import pandas as pd
import numpy as np

In [2]:
mod = pickle.load(open("Classifier_baselines_roberta.pickle", "rb"))
mod

{'baseline_preds': {'Measuring_dat_abuse': 0.0348,
  'Measuring_dat_hate': 0.1054,
  'Dynamic_hate': 0.0938},
 'baseline_scores': {'Measuring_dat_abuse': 0.042929678157693704,
  'Measuring_dat_hate': 0.20784525634348391,
  'Dynamic_hate': 0.09628759773243219}}

In [3]:
preds = pickle.load(open("Data/roberta/HateCheck_necc_suff_preds.pickle", "rb"))
results = pickle.load(open("Data/roberta/HateCheck_necc_suff_results_all.pickle", "rb"))
perturbations = pickle.load(open("Data/HateCheck_necc_suff_perturbations.pickle","rb"))

In [5]:
perturbations.keys()

dict_keys(['orig_texts', 'necc_perturbed', 'suff_perturbed', 'necc_masks', 'suff_masks'])

In [6]:
preds.keys()

dict_keys(['orig_preds', 'orig_scores', 'necc_preds', 'necc_scores', 'suff_preds', 'suff_scores'])

In [30]:
# preds

In [4]:
hc = pickle.load(open('Data/HateCheck_necc_suff_preds.pickle', 'rb'))
print(hc.keys())

dict_keys(['orig_preds', 'orig_scores', 'necc_preds', 'necc_scores', 'suff_preds', 'suff_scores'])


In [144]:
# hc = pickle.load(open('Data/HateCheck_necc_suff_results_all.pickle', 'rb'))
# print(hc.keys())
# # hc['necc_results']

In [143]:
# hc = pickle.load(open('Data/HateCheck_necc_suff_preds.pickle', 'rb'))
# print(hc.keys())
# # hc['necc_preds']

In [7]:
results.keys()

dict_keys(['necc_results', 'necc_results_nb', 'suff_results', 'suff_results_nb'])

In [35]:
# results['necc_results']

In [5]:
datasets = list(results['necc_results'].keys())
datasets

['Measuring_dat_abuse', 'Measuring_dat_hate', 'Dynamic_hate']

In [6]:
# get the corrupted examples with tokn k flipped together with the corresponding scores
# if this is sufficiency, set reverse=True so that it will return instances where token k is not flipped
def get_k_corr(k, masks, perturbed, p_results, reverse=False):
    perturbed_k = []
    for pp, mm, rr in zip(perturbed, masks[:,k], p_results):
        if mm != reverse:
            perturbed_k.append((pp, rr))
    return(perturbed_k)

In [7]:
# for a given original test case, make a table for necessity or sufficiency for each model and for each token
def display_scores(templ_n, orig_texts, orig_preds, scores_dict):
    columns = orig_texts[templ_n].strip().split()
    index = list(scores_dict.keys())
    # if the model has not predicted the original as 0, do not display the feature attribution scores
    data = np.array([scores_dict[dset][templ_n] if orig_preds[dset][templ_n] == 1 
                     else [np.nan]*len(scores_dict[dset][templ_n]) 
                    for dset in scores_dict.keys()])
    return pd.DataFrame(data=data, columns=columns, index=index)

In [8]:
display_scores(0, perturbations['orig_texts'],  preds['orig_preds'], results['necc_results'])

Unnamed: 0,I,hate,black,people.
Measuring_dat_abuse,0.704545,0.961165,0.694737,0.58
Measuring_dat_hate,,,,
Dynamic_hate,0.522727,0.669903,0.936842,0.49


In [9]:
display_scores(0, perturbations['orig_texts'],  preds['orig_preds'], results['suff_results'])

Unnamed: 0,I,hate,black,people.
Measuring_dat_abuse,0.201155,0.307542,0.288729,0.135413
Measuring_dat_hate,,,,
Dynamic_hate,0.175863,0.293587,0.553259,0.172157


In [10]:
display_scores(50, perturbations['orig_texts'],  preds['orig_preds'], results['necc_results'])

Unnamed: 0,Black,people,like,you,are,no,better,than,rats.
Measuring_dat_abuse,0.527273,0.48,0.51087,0.494118,0.580645,0.468085,0.526316,0.428571,0.802469
Measuring_dat_hate,0.345455,0.32,0.336957,0.388235,0.419355,0.297872,0.368421,0.357143,0.82716
Dynamic_hate,0.936364,0.61,0.565217,0.470588,0.451613,0.489362,0.526316,0.47619,0.530864


In [11]:
display_scores(27, perturbations['orig_texts'],  preds['orig_preds'], results['suff_results'])

Unnamed: 0,All,black,people,like,them,are,vile.
Measuring_dat_abuse,0.227826,0.229906,0.181695,0.149666,0.212819,0.26223,0.323691
Measuring_dat_hate,0.369347,0.404404,0.286353,0.350911,0.342219,0.429253,0.809694
Dynamic_hate,0.401149,0.690514,0.246406,0.333384,0.296676,0.381448,0.31186


In [41]:
# This function is to examine the perturbations and the scores a model assigned them. 
# Will not work for masked-not-perturbed versions. 

ex_no = 0 # example in the examples list
id_token = 2  #the identity token is the _th token
model = 'Measuring_dat_hate'
get_k_corr(id_token, 
           perturbations['suff_masks'][ex_no], 
           perturbations['suff_perturbed'][ex_no], 
           preds['suff_scores'][model][ex_no], reverse=True)

[("aren't white people black people also", 0.12978239357471466),
 ('they hate black people.', 0.12979868054389954),
 ('his black mha', 0.12979011237621307),
 ('barney leo faced black justice', 0.12979261577129364),
 ('I hate black people :(', 0.12979774177074432),
 ('[[ blackURL]', 0.1297958493232727),
 ('not all \\*workers have black culture', 0.12978322803974152),
 ('i hate black people, for instance.', 0.12978602945804596),
 ('i hate black people.', 0.12979194521903992),
 ("it's black characters by", 0.12978164851665497),
 ("I'm black too.", 0.12978090345859528),
 ('it is so hate black people.', 0.12979228794574738),
 ('I hate black people tho', 0.12979111075401306),
 ('I mean how many black people.', 0.1297803670167923),
 ('i have seen black people draw', 0.12978830933570862),
 ('I want to love my dentist black and peta', 0.1297810822725296),
 ('I hate black people.', 0.1297944188117981),
 ("i think they're both black folks.", 0.12979021668434143),
 ('he black people', 0.1297847926

In [59]:
ex_no = 0 # example in the examples list
id_token = 2  #the identity token is the _th token
model = 'Measuring_dat_abuse'
get_k_corr(id_token, 
           perturbations['necc_masks'][ex_no], 
           perturbations['necc_perturbed'][ex_no], 
           preds['necc_scores'][model][ex_no])

[('I hate how people.', 0.05271809548139572),
 ('>southerners hate southern people...      not really.',
  0.0035168598406016827),
 ('I wanna hear what dade has to say about mt', 0.003026444697752595),
 ('I hate this', 0.01991121843457222),
 ('I hate the people.', 0.9881669878959656),
 ("I'd like to take one less look in the mirror", 0.0018451737705618143),
 ('I wanna know where every sonic screw people.', 0.0277116522192955),
 ('I hate police, people.', 0.9973893761634827),
 ('I hate it so', 0.017644347622990608),
 ('I live in a society that people.', 0.00320631405338645),
 ('i hate cockroaches people.', 0.9943453669548035),
 ('I could understand even', 0.0020480675157159567),
 ('I hold your breath', 0.002760578179731965),
 ('I hate how the people.', 0.184549942612648),
 ("they're not as low functioning as the accusers, people.",
  0.033786624670028687),
 ('I hate when people talk about people.', 0.013771646656095982),
 ('I hate the people.', 0.9881669878959656),
 ('I have seen them n

In the following section, there is the average prediction of the models for the cases where identities are mentioned in neutral or positive contexts (```ident_neutral_nh```, ```ident_pos_nh```) and the test cases where there is "abuse" directed to non-protected groups (```target_group_nh```) individuals (```target_indiv_nh```) and objects (```target_obj_nh```). The performance of the models on non-abusive mention identities for the two targets we've chosen for our explanations ('women' and 'Muslims') is also given seperately. 

In [37]:
hc_results = pd.read_pickle("Data/roberta/HateCheck_templates_and_results.pickle")

mask_test_cases = hc_results.index.map(lambda x: isinstance(x, int))
test_cases_series = hc_results.loc[mask_test_cases]

meas_abuse_score = hc_results["Measuring_dat_abuse_score"]
meas_abuse_pred  = hc_results["Measuring_dat_abuse_pred"]
meas_hate_score  = hc_results["Measuring_dat_hate_score"]
meas_hate_pred   = hc_results["Measuring_dat_hate_pred"]
dyn_hate_pred    = hc_results["Dynamic_hate_pred"]
dyn_hate_score   = hc_results["Dynamic_hate_score"]

df = pd.DataFrame({
    "test_case": test_cases_series.values,
    "Measuring_dat_abuse_score": meas_abuse_score,
    "Measuring_dat_abuse_pred":  meas_abuse_pred,
    "Measuring_dat_hate_score":  meas_hate_score,
    "Measuring_dat_hate_pred":   meas_hate_pred,
    "Dynamic_hate_pred":     dyn_hate_pred,
    "Dynamic_hate_score":    dyn_hate_score
})

print(df.head())


df2 = pd.read_csv("hatecheck-data/test_suite_cases.csv")

merged_df = pd.merge(df, df2, on="test_case", how="outer")

print(merged_df.head())
hc_results = merged_df

# # hc_results = pd.read_pickle("Data/HateCheck_templates_and_results.pickle")

# mask_test_cases = hc_results.index.map(lambda x: isinstance(x, int))
# test_cases_series = hc_results.loc[mask_test_cases]

# meas_abuse_score = hc_results["Measuring_dat_abuse_score"]
# meas_abuse_pred  = hc_results["Measuring_dat_abuse_pred"]
# meas_hate_score  = hc_results["Measuring_dat_hate_score"]
# meas_hate_pred   = hc_results["Measuring_dat_hate_pred"]
# dyn_hate_pred    = hc_results["Dynamic_hate_pred"]
# dyn_hate_score   = hc_results["Dynamic_hate_score"]

# df = pd.DataFrame({
#     "test_case": test_cases_series.values,
#     "Measuring_dat_abuse_score": meas_abuse_score,
#     "Measuring_dat_abuse_pred":  meas_abuse_pred,
#     "Measuring_dat_hate_score":  meas_hate_score,
#     "Measuring_dat_hate_pred":   meas_hate_pred,
#     "Dynamic_hate_pred":     dyn_hate_pred,
#     "Dynamic_hate_score":    dyn_hate_score
# })

# print(df.head())


# df2 = pd.read_csv("hatecheck-data/test_suite_cases.csv")

# merged_df = pd.merge(df, df2, on="test_case", how="outer")

# print(merged_df.head())
# hc_results = merged_df

                  test_case  Measuring_dat_abuse_score  \
0            I hate women.                    0.016810   
1     I hate trans people.                    0.014450   
2       I hate gay people.                    0.982011   
3     I hate black people.                    0.984658   
4  I hate disabled people.                    0.993827   

   Measuring_dat_abuse_pred  Measuring_dat_hate_score  \
0                         0                  0.129785   
1                         0                  0.129787   
2                         1                  0.129792   
3                         1                  0.129793   
4                         1                  0.129800   

   Measuring_dat_hate_pred  Dynamic_hate_pred  Dynamic_hate_score  
0                        0                  1            0.999766  
1                        0                  1            0.999770  
2                        0                  1            0.999768  
3                        0          

In [38]:

hc_results.test_case = hc_results.test_case.apply(lambda x: x.strip())
hc_results

Unnamed: 0.1,test_case,Measuring_dat_abuse_score,Measuring_dat_abuse_pred,Measuring_dat_hate_score,Measuring_dat_hate_pred,Dynamic_hate_pred,Dynamic_hate_score,Unnamed: 0,functionality,case_id,label_gold,target_ident,direction,focus_words,focus_lemma,ref_case_id,ref_templ_id,templ_id,case_templ
0,"""Muslims are a cancer"" is such a dehumanising ...",0.988550,1,0.129789,0,0,0.000811,2523,counter_quote_nh,2623,non-hateful,Muslims,-,cancer,cancer,398.0,57.0,540,"""[IDENTITY_P] are a cancer"" is such a dehumani..."
1,"""black people are a cancer"" is such a dehumani...",0.978437,1,0.129788,0,0,0.000904,2521,counter_quote_nh,2621,non-hateful,black people,-,cancer,cancer,396.0,57.0,540,"""[IDENTITY_P] are a cancer"" is such a dehumani..."
2,"""disabled people are a cancer"" is such a dehum...",0.040497,0,0.129787,0,0,0.000917,2522,counter_quote_nh,2622,non-hateful,disabled people,-,cancer,cancer,397.0,57.0,540,"""[IDENTITY_P] are a cancer"" is such a dehumani..."
3,"""gay people are a cancer"" is such a dehumanisi...",0.799698,1,0.129787,0,0,0.000936,2520,counter_quote_nh,2620,non-hateful,gay people,-,cancer,cancer,395.0,57.0,540,"""[IDENTITY_P] are a cancer"" is such a dehumani..."
4,"""immigrants are a cancer"" is such a dehumanisi...",0.070128,0,0.129787,0,0,0.000901,2524,counter_quote_nh,2624,non-hateful,immigrants,-,cancer,cancer,399.0,57.0,540,"""[IDENTITY_P] are a cancer"" is such a dehumani..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3723,Your obvious contempt for trans people has to ...,0.002423,0,0.129791,0,0,0.000748,2650,counter_ref_nh,2750,non-hateful,trans people,-,contempt,contempt,65.0,10.0,554,Your obvious contempt for [IDENTITY_P] has to ...
3724,Your obvious contempt for women has to stop ri...,0.004372,0,0.129790,0,0,0.000812,2649,counter_ref_nh,2749,non-hateful,women,-,contempt,contempt,64.0,10.0,554,Your obvious contempt for [IDENTITY_P] has to ...
3725,Your trans lives are worthless.,0.017320,0,0.858523,1,1,0.999767,358,derog_dehum_h,359,hateful,trans people,directed,lives ... worthless,worthless,,,52,Your [IDENTITY_A] lives are worthless.
3726,Your trans lives have no value.,0.002514,0,0.129780,0,1,0.999772,1672,negate_pos_h,1723,hateful,trans people,directed,lives ... no value,value,366.0,53.0,412,Your [IDENTITY_A] lives have no value.


In [39]:
hc_results_black_nh = hc_results[((hc_results.functionality == 'ident_neutral_nh') 
                         | (hc_results.functionality == 'ident_pos_nh')) 
                        & (hc_results.target_ident == 'black people')]

hc_results_disabled_nh = hc_results[((hc_results.functionality == 'ident_neutral_nh') 
                         | (hc_results.functionality == 'ident_pos_nh')) 
                        & (hc_results.target_ident == 'disabled people')]
hc_results_black_nh

Unnamed: 0.1,test_case,Measuring_dat_abuse_score,Measuring_dat_abuse_pred,Measuring_dat_hate_score,Measuring_dat_hate_pred,Dynamic_hate_pred,Dynamic_hate_score,Unnamed: 0,functionality,case_id,label_gold,target_ident,direction,focus_words,focus_lemma,ref_case_id,ref_templ_id,templ_id,case_templ
34,All black people like them are amazing.,0.00185,0,0.129787,0,0,0.000869,2318,ident_pos_nh,2404,non-hateful,black people,-,amazing,amazing,193.0,28.0,509,All [IDENTITY_P] like them are amazing.
109,Are there any black people on your course?,0.002204,0,0.129785,0,0,0.001035,2185,ident_neutral_nh,2257,non-hateful,black people,-,-,-,,,488,Are there any [IDENTITY_P] on your course?
153,Black lives are worth so much.,0.002025,0,0.129785,0,0,0.000947,2360,ident_pos_nh,2446,non-hateful,black people,-,lives ... worth so much,worth much,368.0,53.0,515,[IDENTITY_A] lives are worth so much.
170,Black people are beautiful.,0.001821,0,0.129782,0,0,0.000869,2304,ident_pos_nh,2390,non-hateful,black people,-,beautiful,beautiful,144.0,21.0,507,[IDENTITY_P] are beautiful.
172,Black people are fantastic.,0.001819,0,0.129786,0,0,0.000863,2367,ident_pos_nh,2453,non-hateful,black people,-,fantastic,fantastic,396.0,57.0,516,[IDENTITY_P] are fantastic.
173,Black people are great.,0.002038,0,0.12978,0,0,0.001001,2339,ident_pos_nh,2425,non-hateful,black people,-,great,great,284.0,41.0,512,[IDENTITY_P] are great.
194,Black people are wonderful beings.,0.002713,0,0.129787,0,0,0.000793,2353,ident_pos_nh,2439,non-hateful,black people,-,wonderful,wonderful,340.0,49.0,514,[IDENTITY_P] are wonderful beings.
196,Black people being in charge is a credit to ou...,0.001869,0,0.129787,0,0,0.026206,2325,ident_pos_nh,2411,non-hateful,black people,-,credit,credit,214.0,31.0,510,[IDENTITY_P] being in charge is a credit to ou...
199,Black people belong in our government.,0.003486,0,0.129795,0,1,0.962445,2381,ident_pos_nh,2467,non-hateful,black people,-,belong,belong,424.0,61.0,518,[IDENTITY_P] belong in our government.
211,Black people like you are absolute heroes.,0.001945,0,0.129787,0,0,0.000652,2346,ident_pos_nh,2432,non-hateful,black people,-,heroes,hero,298.0,43.0,513,[IDENTITY_P] like you are absolute heroes.


In [40]:
print(hc_results.query("functionality == 'ident_neutral_nh' and target_ident == 'black people'").shape[0])


18


In [31]:
hc_results.functionality.loc[ (hc_results.target_ident == 'black people')] = 'black_nh'

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  hc_results.functionality.loc[ (hc_results.target_ident == 'black people')] = 'black_nh'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returnin

In [44]:
hc_results.functionality.loc[((hc_results.functionality == 'ident_neutral_nh') 
                         | (hc_results.functionality == 'ident_pos_nh')) 
                        & (hc_results.target_ident == 'black people')] = 'black_nh'

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  hc_results.functionality.loc[((hc_results.functionality == 'ident_neutral_nh')
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-

In [33]:
hc_results.functionality.loc[((hc_results.functionality == 'ident_neutral_nh') 
                         | (hc_results.functionality == 'ident_pos_nh')) 
                        & (hc_results.target_ident == 'disabled people')] = 'disabled_nh'

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  hc_results.functionality.loc[((hc_results.functionality == 'ident_neutral_nh')
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-

In [41]:
hc_results

Unnamed: 0.1,test_case,Measuring_dat_abuse_score,Measuring_dat_abuse_pred,Measuring_dat_hate_score,Measuring_dat_hate_pred,Dynamic_hate_pred,Dynamic_hate_score,Unnamed: 0,functionality,case_id,label_gold,target_ident,direction,focus_words,focus_lemma,ref_case_id,ref_templ_id,templ_id,case_templ
0,"""Muslims are a cancer"" is such a dehumanising ...",0.988550,1,0.129789,0,0,0.000811,2523,counter_quote_nh,2623,non-hateful,Muslims,-,cancer,cancer,398.0,57.0,540,"""[IDENTITY_P] are a cancer"" is such a dehumani..."
1,"""black people are a cancer"" is such a dehumani...",0.978437,1,0.129788,0,0,0.000904,2521,counter_quote_nh,2621,non-hateful,black people,-,cancer,cancer,396.0,57.0,540,"""[IDENTITY_P] are a cancer"" is such a dehumani..."
2,"""disabled people are a cancer"" is such a dehum...",0.040497,0,0.129787,0,0,0.000917,2522,counter_quote_nh,2622,non-hateful,disabled people,-,cancer,cancer,397.0,57.0,540,"""[IDENTITY_P] are a cancer"" is such a dehumani..."
3,"""gay people are a cancer"" is such a dehumanisi...",0.799698,1,0.129787,0,0,0.000936,2520,counter_quote_nh,2620,non-hateful,gay people,-,cancer,cancer,395.0,57.0,540,"""[IDENTITY_P] are a cancer"" is such a dehumani..."
4,"""immigrants are a cancer"" is such a dehumanisi...",0.070128,0,0.129787,0,0,0.000901,2524,counter_quote_nh,2624,non-hateful,immigrants,-,cancer,cancer,399.0,57.0,540,"""[IDENTITY_P] are a cancer"" is such a dehumani..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3723,Your obvious contempt for trans people has to ...,0.002423,0,0.129791,0,0,0.000748,2650,counter_ref_nh,2750,non-hateful,trans people,-,contempt,contempt,65.0,10.0,554,Your obvious contempt for [IDENTITY_P] has to ...
3724,Your obvious contempt for women has to stop ri...,0.004372,0,0.129790,0,0,0.000812,2649,counter_ref_nh,2749,non-hateful,women,-,contempt,contempt,64.0,10.0,554,Your obvious contempt for [IDENTITY_P] has to ...
3725,Your trans lives are worthless.,0.017320,0,0.858523,1,1,0.999767,358,derog_dehum_h,359,hateful,trans people,directed,lives ... worthless,worthless,,,52,Your [IDENTITY_A] lives are worthless.
3726,Your trans lives have no value.,0.002514,0,0.129780,0,1,0.999772,1672,negate_pos_h,1723,hateful,trans people,directed,lives ... no value,value,366.0,53.0,412,Your [IDENTITY_A] lives have no value.


In [42]:
target_funcs_results

Unnamed: 0.1,test_case,Measuring_dat_abuse_score,Measuring_dat_abuse_pred,Measuring_dat_hate_score,Measuring_dat_hate_pred,Dynamic_dat_hate_pred,Dynamic_dat_hate_score,Unnamed: 0,functionality,case_id,label_gold,target_ident,direction,focus_words,focus_lemma,ref_case_id,ref_templ_id,templ_id,case_templ
1,"""black people are a cancer"" is such a dehumani...",0.978437,1,0.129788,0,0,0.000904,2521,black_nh,2621,non-hateful,black people,-,cancer,cancer,396.0,57.0,540,"""[IDENTITY_P] are a cancer"" is such a dehumani..."
10,A black life is not worth anything.,0.997708,1,0.129782,0,1,0.999773,1681,black_nh,1732,hateful,black people,general,life ... not worth anything,worth anything,368.0,53.0,413,A [IDENTITY_A] life is not worth anything.
11,A black person must have committed that crime....,0.002387,0,0.129795,0,1,0.999742,1499,black_nh,1543,hateful,black people,general,"violent, cruel","violent, cruel",228.0,33.0,386,A [IDENTITY_S] must have committed that crime....
21,AI systems with any power are a threat to civi...,0.011018,0,0.129807,0,0,0.000764,2797,target_obj_nh,2929,non-hateful,,-,threat,threat,,30.0,593,AI systems with any power are a threat to civi...
22,Action movies really are brainless without exc...,0.993247,1,0.129792,0,0,0.000411,2802,target_obj_nh,2934,non-hateful,,-,brainless,brainless,,40.0,598,Action movies really are brainless without exc...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3696,Your broken records are worthless.,0.993387,1,0.858523,1,0,0.000576,2809,target_obj_nh,2941,non-hateful,,-,lives ... worthless,worthless,,52.0,605,Your broken records are worthless.
3709,Your life is worth absolutely nothing.,0.005905,0,0.129835,0,0,0.000582,2871,target_indiv_nh,3003,non-hateful,,-,lives ... worth nothing,worth nothing,,53.0,667,Your life is worth absolutely nothing.
3710,Your life is worthless.,0.965943,1,0.858523,1,0,0.000681,2870,target_indiv_nh,3002,non-hateful,,-,lives ... worthless,worthless,,52.0,666,Your life is worthless.
3712,Your lives aren't worth shit to me. You're jus...,0.997668,1,0.131940,0,1,0.999440,1576,black_nh,1627,hateful,black people,directed,shit,shit,1191.0,250.0,398,Your lives aren't worth shit to me. You're jus...


In [45]:
# the results we are interested are: 
target_funcs = ['black_nh', 'target_obj_nh', 'target_indiv_nh', 'target_group_nh']
# target_funcs = ['black_nh', 'disabled_nh']

target_funcs_results = hc_results[hc_results.functionality.isin(target_funcs)]
# get average score per functionality
target_funcs_results.groupby('functionality')[['{}_pred'.format(dd) for dd in datasets]].mean().transpose()

functionality,black_nh,target_group_nh,target_indiv_nh,target_obj_nh
Measuring_dat_abuse_pred,0.0,0.516129,0.661538,0.307692
Measuring_dat_hate_pred,0.0,0.354839,0.415385,0.4
Dynamic_hate_pred,0.133333,0.032258,0.0,0.0


In [80]:
dd = hc_results["Measuring_dat_abuse_pred"]
t=0
for i in range(len(dd)):
    if dd[i]==0:
        t=t+1

In [82]:
t/len(dd)

0.6188304721030042

In [88]:
subset = hc_results[hc_results.functionality.isin(target_funcs)]
print(subset[["functionality", "Measuring_dat_abuse_pred"]].head(100))
len(subset)

     functionality  Measuring_dat_abuse_pred
34        black_nh                         0
42     disabled_nh                         0
109       black_nh                         0
110    disabled_nh                         0
153       black_nh                         0
...            ...                       ...
3082   disabled_nh                         0
3149      black_nh                         0
3150   disabled_nh                         0
3483      black_nh                         0
3485   disabled_nh                         0

[90 rows x 2 columns]


90

In [57]:
target_funcs_results

Unnamed: 0.1,test_case,Measuring_abuse_score,Measuring_abuse_pred,Measuring_hate_score,Measuring_hate_pred,Dynamic_hate_pred,Dynamic_hate_score,Unnamed: 0,functionality,case_id,label_gold,target_ident,direction,focus_words,focus_lemma,ref_case_id,ref_templ_id,templ_id,case_templ
21,AI systems with any power are a threat to civi...,1.159231e-04,0,4.340654e-07,0,0,0.000171,2797,target_obj_nh,2929,non-hateful,,-,threat,threat,,30.0,593,AI systems with any power are a threat to civi...
22,Action movies really are brainless without exc...,9.911473e-01,1,1.404031e-06,0,0,0.000128,2802,target_obj_nh,2934,non-hateful,,-,brainless,brainless,,40.0,598,Action movies really are brainless without exc...
34,All black people like them are amazing.,7.391393e-07,0,3.257207e-07,0,0,0.000140,2318,black_nh,2404,non-hateful,black people,-,amazing,amazing,193.0,28.0,509,All [IDENTITY_P] like them are amazing.
38,All cocktails like these are vile.,9.614429e-07,0,3.272847e-07,0,0,0.000210,2795,target_obj_nh,2927,non-hateful,,-,vile,vile,,28.0,591,All cocktails like these are vile.
42,All disabled people like them are amazing.,7.487569e-07,0,4.262076e-07,0,0,0.000166,2319,disabled_nh,2405,non-hateful,disabled people,-,amazing,amazing,194.0,28.0,509,All [IDENTITY_P] like them are amazing.
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3675,You should fuck off forever.,9.992593e-01,1,1.298087e-03,0,0,0.000043,2898,target_indiv_nh,3030,non-hateful,,-,fuck off,fuck off,,244.0,694,You should fuck off forever.
3684,You will die a horrible death at my hands.,1.099749e-04,0,9.674674e-07,0,0,0.000044,2886,target_indiv_nh,3018,non-hateful,,-,die ... death,"die, death",,100.0,682,You will die a horrible death at my hands.
3696,Your broken records are worthless.,4.103475e-05,0,8.967174e-06,0,0,0.000049,2809,target_obj_nh,2941,non-hateful,,-,lives ... worthless,worthless,,52.0,605,Your broken records are worthless.
3709,Your life is worth absolutely nothing.,1.018677e-06,0,6.949345e-03,0,0,0.000057,2871,target_indiv_nh,3003,non-hateful,,-,lives ... worth nothing,worth nothing,,53.0,667,Your life is worth absolutely nothing.


In [84]:
mask_results = pickle.load(open('Data/HateCheck_necc_suff_results_masked.pickle', 'rb'))
mask_results.keys()

dict_keys(['necc_results', 'necc_results_nb', 'suff_results', 'suff_results_nb'])

In [142]:
necc_vals = {}
suff_vals = {}
necc_vals_mask = {}
suff_vals_mask = {}
orig_texts = []
targets = []

for tt in perturbations['orig_texts']:
    orig_text = tt.strip()
    row = hc_results[hc_results.test_case == orig_text]
    targets.append(row.target_ident.tolist()[0])

for dataset in datasets:
    necc_vals[dataset] = []
    suff_vals[dataset] = []
    necc_vals_mask[dataset] = []
    suff_vals_mask[dataset] = []
    for nn, (orig_text, orig_pred) in enumerate(zip(perturbations['orig_texts'], preds['orig_preds'][dataset])):
        if orig_pred != 1:
            necc_vals[dataset].append(np.nan)
            suff_vals[dataset].append(np.nan)
            necc_vals_mask[dataset].append(np.nan)
            suff_vals_mask[dataset].append(np.nan)
            continue
        # get the row in hc_results corresponding to this case
        orig_text = orig_text.strip()
        row = hc_results[hc_results.test_case == orig_text]
        toknd = row.case_templ.tolist()[0].split()
        ## find the index of the template placeholder
        for ii, tt in enumerate(toknd):
            if tt[:1] == "[":
                break
        necc_vals[dataset].append(results['necc_results'][dataset][nn][ii])
        suff_vals[dataset].append(results['suff_results'][dataset][nn][ii])
        necc_vals_mask[dataset].append(mask_results['necc_results_nb'][dataset][nn][ii])
        suff_vals_mask[dataset].append(mask_results['suff_results_nb'][dataset][nn][ii])

df_dict = {('necessity', dd): ll for dd, ll in necc_vals.items()}
df_dict.update({('sufficiency', dd): ll for dd, ll in suff_vals.items()})
df_dict.update({('necessity_mask', dd): ll for dd, ll in necc_vals_mask.items()})
df_dict.update({('sufficiency_mask', dd): ll for dd, ll in suff_vals_mask.items()})
df_dict.update({('prediction', dd): ll for dd, ll in preds['orig_preds'].items()})
df_dict.update({('score', dd): ll for dd, ll in preds['orig_scores'].items()})
#df_dict.update({'target', ''}: targets)

#ind = [xx.strip() for xx in perturbations['orig_texts']]
ind = [(tt, xx.strip()) for xx, tt in zip(perturbations['orig_texts'], targets)]

# pd.DataFrame(df_dict, index=ind)
#     avg_necc[dataset] = {target: np.mean(necc_vals[target]) for target in targets}
#     avg_suff[dataset] = {target: np.mean(suff_vals[target]) for target in targets}

master_df = pd.DataFrame(df_dict, index=ind)
master_df.columns = pd.MultiIndex.from_tuples(master_df.columns, names=['value','Dataset'])
master_df.index = pd.MultiIndex.from_tuples(master_df.index, names=['target', 'text'])
pickle.dump(master_df, open("Data/HateCheck_individual_necc_suff_scores.pickle", "wb"))

# master_df.xs('CAD_abuse', level='Dataset', axis=1)
# master_df['necessity']
# master_df.loc['women']
# master_df.xs('I hate women.', level='text')

In [3]:
master_df = pickle.load(open("Data/HateCheck_individual_necc_suff_scores.pickle", "rb"))

In [4]:
master_df['necessity'].groupby(level='target').mean().transpose()

target,Muslims,women
Dataset,Unnamed: 1_level_1,Unnamed: 2_level_1
CAD_abuse,0.824126,0.825619
Davidson_abuse,0.836019,0.823236
Founta_abuse,0.650831,0.544383
CAD_hate,0.965085,0.960819
Davidson_hate,0.908885,0.584843
Founta_hate,0.888404,0.819972


In [5]:
master_df['necessity'].groupby(level='target').std().transpose()

target,Muslims,women
Dataset,Unnamed: 1_level_1,Unnamed: 2_level_1
CAD_abuse,0.147089,0.135664
Davidson_abuse,0.132284,0.136133
Founta_abuse,0.212946,0.169301
CAD_hate,0.031214,0.023952
Davidson_hate,0.123177,0.089595
Founta_hate,0.159284,0.182587


In [138]:
master_df['sufficiency'].groupby(level='target').mean().transpose()

target,Muslims,women
Dataset,Unnamed: 1_level_1,Unnamed: 2_level_1
CAD_abuse,0.883638,0.64441
Davidson_abuse,0.40808,0.439905
Founta_abuse,0.823165,0.343123
CAD_hate,0.878019,0.706071
Davidson_hate,0.738724,0.213942
Founta_hate,0.813537,0.295489


In [6]:
master_df['sufficiency'].groupby(level='target').std().transpose()

target,Muslims,women
Dataset,Unnamed: 1_level_1,Unnamed: 2_level_1
CAD_abuse,0.073014,0.138826
Davidson_abuse,0.138387,0.125232
Founta_abuse,0.059369,0.102189
CAD_hate,0.13258,0.173075
Davidson_hate,0.091162,0.061001
Founta_hate,0.077387,0.104272


In [139]:
master_df['necessity_mask'].groupby(level='target').mean().transpose()

target,Muslims,women
Dataset,Unnamed: 1_level_1,Unnamed: 2_level_1
CAD_abuse,0.643937,0.615021
Davidson_abuse,0.522215,0.552777
Founta_abuse,0.362422,0.192982
CAD_hate,0.928238,0.874405
Davidson_hate,0.88264,0.436204
Founta_hate,0.724388,0.52969


In [140]:
master_df['sufficiency_mask'].groupby(level='target').mean().transpose()

target,Muslims,women
Dataset,Unnamed: 1_level_1,Unnamed: 2_level_1
CAD_abuse,0.945313,0.859666
Davidson_abuse,0.749977,0.796173
Founta_abuse,0.950145,0.580692
CAD_hate,0.945048,0.882524
Davidson_hate,0.917194,0.257918
Founta_hate,0.909181,0.574927


In [48]:
hc = pickle.load(open('Data/HateCheck_necc_suff_perturbations.pickle', 'rb'))
print(hc.keys())
hc["orig_texts"]

dict_keys(['orig_texts', 'necc_perturbed', 'suff_perturbed', 'necc_masks', 'suff_masks'])


['I hate black people. \n',
 "I really can't stand black people like you. \n",
 'I despise black people. \n',
 'I detest black people. \n',
 'I absolutely loathe black people. \n',
 'Disgusted by all the black people around here. \n',
 'I have so much disdain for these black people. \n',
 'I have so much hatred for you black people. \n',
 'So sick of being around black people like you. \n',
 'I feel nothing but contempt for black people. \n',
 'These black people disgust me so much. \n',
 'Black people are the absolute worst. \n',
 'You black people make me sick. \n',
 'There is nothing worse than a black person. \n',
 'Seeing black people in this place makes me so angry. \n',
 'Black people who think they are my equal fill me with so much rage. \n',
 'Black people like you make me furious. \n',
 'You and all the other black people in this country make me boil with anger. \n',
 'The utter scorn I feel for black people is hard to to put into words. \n',
 'I absolutely abhor all black pe

In [None]:
print(hc.keys())

dict_keys(['orig_texts', 'necc_perturbed', 'suff_perturbed', 'necc_masks', 'suff_masks'])

In [53]:
hc["suff_perturbed"]

[['i hate everything',
  'i hate it hate nigeria!',
  "aren't white people black people also",
  'this sub hate cnn.',
  'I wonder what the drama is at',
  'they hate black people.',
  'you hate hot take',
  'actually [this]([URL] was people.',
  'I hate it.',
  "they don't hate animal testing and i completely understand what a joke that is! people.",
  'his black mha',
  'barney leo faced black justice',
  'I hate black people :(',
  '[[ blackURL]',
  'they did exactly what i said. hate let\'s "exclude whites" from school and institute more laws',
  'fakery.. hate that',
  'afta hate that was the',
  'drumpf people.',
  "haha he was literally the greatest boxer i've ever hate",
  'not all \\*workers have black culture',
  'I assume that every single people.',
  'I mean actual rape on reddit',
  'i hate black people, for instance.',
  'i hate black people.',
  "it's black characters by",
  "i'm an antinatalist and antinatalist hate what not?",
  'I do understand your need to take respo

In [116]:
hc = pickle.load(open('Weights/eng_classif_roberta/measuring_dat_abuse/cls_report_test.pickle', 'rb'))
print(hc.keys())


dict_keys(['0', '1', 'accuracy', 'macro avg', 'weighted avg'])


In [128]:
print("__________---------Disilbert---------_________")
for dataset in datasets:
    
    print( "-------Validation------", dataset)
    ress = pickle.load(open('Weights/eng_classif/{}/cls_report_valid.pickle'.format(dataset), 'rb'))
    print('accuracy', ress['accuracy'])
    print("weighted_avg", ress['weighted avg'])
    print( "---Test-----")
    resv = pickle.load(open('Weights/eng_classif//{}/cls_report_test.pickle'.format(dataset), 'rb'))
    print('accuracy', resv['accuracy'])
    print("weighted_avg", resv['weighted avg'])
    print('-'*43)

print('-'*120)
print("__________---------Roberta---------_________")

for dataset in datasets:
    
    print( "-------Validation------", dataset)
    ress = pickle.load(open('Weights/eng_classif_roberta/{}/cls_report_valid.pickle'.format(dataset), 'rb'))
    print('accuracy', ress['accuracy'])
    print("weighted_avg", ress['weighted avg'])
    print( "---Test-----")
    resv = pickle.load(open('Weights/eng_classif_roberta//{}/cls_report_test.pickle'.format(dataset), 'rb'))
    print('accuracy', resv['accuracy'])
    print("weighted_avg", resv['weighted avg'])
    print('-'*43)

__________---------Disilbert---------_________
-------Validation------ Measuring_dat_abuse
accuracy 0.9718205960460313
weighted_avg {'precision': 0.9718160658091196, 'recall': 0.9718205960460313, 'f1-score': 0.9718169617124169, 'support': 13556.0}
---Test-----
accuracy 0.9722632044850988
weighted_avg {'precision': 0.972258764995049, 'recall': 0.9722632044850988, 'f1-score': 0.9722588453780414, 'support': 13556.0}
-------------------------------------------
-------Validation------ Measuring_dat_hate
accuracy 0.9733697255827678
weighted_avg {'precision': 0.9733472614429252, 'recall': 0.9733697255827678, 'f1-score': 0.9733549178701426, 'support': 13556.0}
---Test-----
accuracy 0.9729271171437002
weighted_avg {'precision': 0.972923631534691, 'recall': 0.9729271171437002, 'f1-score': 0.972925322974404, 'support': 13556.0}
-------------------------------------------
-------Validation------ Dynamic_hate
accuracy 0.7904878048780488
weighted_avg {'precision': 0.7904708701366997, 'recall': 0.790

dict_keys(['preds', 'scores'])

In [149]:
# Load the pickle file containing results
with open("Data/HateCheck_results_all_models.pickle", "rb") as f:
    results = pickle.load(f)

results.keys()

dict_keys(['preds', 'scores'])

In [152]:
results['scores']

{'Measuring_dat_abuse': [0.9626950621604919,
  0.9997301697731018,
  0.9999836683273315,
  0.9999825954437256,
  0.00047648156760260463,
  0.9999896287918091,
  0.0008084248402155936,
  0.00044343018089421093,
  1.4208926586434245e-06,
  0.001180419116280973,
  0.0018773869378492236,
  2.8829495022364426e-06,
  0.00045817423961125314,
  1.7075245750675094e-06,
  7.404868824778532e-07,
  7.546834694949212e-07,
  8.291682433991809e-07,
  8.489997753713396e-07,
  7.917850552985328e-07,
  8.217949698519078e-07,
  7.840006901460583e-07,
  0.9856660962104797,
  2.343478990951553e-05,
  0.997944176197052,
  0.9704042673110962,
  3.1150148060987704e-06,
  0.9864035844802856,
  3.4522470286901807e-06,
  0.00017293807468377054,
  7.948996062623337e-05,
  0.996105968952179,
  0.9833321571350098,
  2.014420033447095e-06,
  0.001862820005044341,
  4.5406744902720675e-05,
  7.841345563974755e-07,
  7.932399626042752e-07,
  7.509344186473754e-07,
  7.83919972491276e-07,
  7.620712949574227e-07,
  9.3

In [154]:
import pickle
import numpy as np

# Load the pickle file containing results
with open("Data/HateCheck_necc_suff_preds.pickle", "rb") as f:
    results = pickle.load(f)

# The pickle file has keys:
# 'orig_preds', 'orig_scores', 'necc_preds', 'necc_scores', 'suff_preds', 'suff_scores'
# Each of these is a dictionary with dataset names as keys.
# We assume that for each dataset, the value is a list of numbers (one per test sample).

datasets = list(results['necc_preds'].keys())

def flatten(x):
    return [item for sublist in x for item in sublist]

agg_metrics = {}
for ds in datasets:
    agg_metrics[ds] = {}

    # Flatten the nested lists
    necc_preds = np.array(flatten(results['necc_preds'][ds]))
    necc_scores = np.array(flatten(results['necc_scores'][ds]))
    suff_preds = np.array(flatten(results['suff_preds'][ds]))
    suff_scores = np.array(flatten(results['suff_scores'][ds]))

    # Calculate mean and std
    agg_metrics[ds]['necc_preds_mean'] = np.mean(necc_preds)
    agg_metrics[ds]['necc_preds_std']  = np.std(necc_preds)
    agg_metrics[ds]['necc_scores_mean'] = np.mean(necc_scores)
    agg_metrics[ds]['necc_scores_std']  = np.std(necc_scores)
    agg_metrics[ds]['suff_preds_mean'] = np.mean(suff_preds)
    agg_metrics[ds]['suff_preds_std']  = np.std(suff_preds)
    agg_metrics[ds]['suff_scores_mean'] = np.mean(suff_scores)
    agg_metrics[ds]['suff_scores_std']  = np.std(suff_scores)

# Produce a LaTeX table snippet:
print(r"\begin{table}[H]")
print(r"\centering")
print(r"\small")
print(r"\begin{tabular}{lcccc}")
print(r"\toprule")
print(r"\textbf{Dataset} & \multicolumn{2}{c}{\textbf{Necessity}} & \multicolumn{2}{c}{\textbf{Sufficiency}} \\")
print(r" & \textbf{Preds} & \textbf{Scores} & \textbf{Preds} & \textbf{Scores} \\")
print(r"\midrule")
for ds in datasets:
    np_mean = agg_metrics[ds]['necc_preds_mean']
    np_std  = agg_metrics[ds]['necc_preds_std']
    ns_mean = agg_metrics[ds]['necc_scores_mean']
    ns_std  = agg_metrics[ds]['necc_scores_std']
    sp_mean = agg_metrics[ds]['suff_preds_mean']
    sp_std  = agg_metrics[ds]['suff_preds_std']
    ss_mean = agg_metrics[ds]['suff_scores_mean']
    ss_std  = agg_metrics[ds]['suff_scores_std']
    print(f"{ds} & {np_mean:.2f} ± {np_std:.2f} & {ns_mean:.2f} ± {ns_std:.2f} & {sp_mean:.2f} ± {sp_std:.2f} & {ss_mean:.2f} ± {ss_std:.2f} \\\\")
print(r"\bottomrule")
print(r"\end{tabular}")
print(r"\caption{Aggregated Necessity and Sufficiency scores (mean ± std) for each dataset.}")
print(r"\label{tab:agg_necc_suff}")
print(r"\end{table}")


\begin{table}[H]
\centering
\small
\begin{tabular}{lcccc}
\toprule
\textbf{Dataset} & \multicolumn{2}{c}{\textbf{Necessity}} & \multicolumn{2}{c}{\textbf{Sufficiency}} \\
 & \textbf{Preds} & \textbf{Scores} & \textbf{Preds} & \textbf{Scores} \\
\midrule
Measuring_dat_abuse & 0.31 ± 0.46 & 0.31 ± 0.46 & 0.18 ± 0.38 & 0.18 ± 0.38 \\
Measuring_dat_hate & 0.21 ± 0.40 & 0.20 ± 0.40 & 0.11 ± 0.31 & 0.11 ± 0.31 \\
Dynamic_hate & 0.55 ± 0.50 & 0.55 ± 0.49 & 0.31 ± 0.46 & 0.31 ± 0.45 \\
\bottomrule
\end{tabular}
\caption{Aggregated Necessity and Sufficiency scores (mean ± std) for each dataset.}
\label{tab:agg_necc_suff}
\end{table}


In [165]:
necc_scores = np.array(flatten(results['necc_scores']["Dynamic_hate"]))
print(necc_scores)

[9.99838233e-01 9.99725044e-01 5.34780091e-04 ... 9.53632355e-01
 9.99934912e-01 8.78563369e-05]


In [171]:
necc_scores = np.array(flatten(results['necc_scores']["Measuring_dat_abuse"]))
print(necc_scores)

[1.18919263e-06 1.06754715e-06 1.11977340e-06 ... 7.52914332e-07
 9.99984980e-01 4.68660392e-06]


In [158]:
results['necc_scores']

{'Measuring_dat_abuse': [[1.1891926305906964e-06,
   1.0675471457943786e-06,
   1.1197734011147986e-06,
   0.9999651908874512,
   0.9999866485595703,
   1.2913241107526119e-06,
   2.625286469992716e-06,
   0.229515939950943,
   7.88157012721058e-06,
   0.9999301433563232,
   0.9999858140945435,
   0.0005781349609605968,
   1.2626201169041451e-06,
   1.0988673011524952e-06,
   0.9994096755981445,
   0.9999288320541382,
   7.953231033752672e-07,
   0.9999825954437256,
   0.9999825954437256,
   3.460744892436196e-06,
   0.016876062378287315,
   0.9999825954437256,
   0.0009842799045145512,
   4.62611433249549e-06,
   1.1297101082163863e-05,
   2.935347038146574e-05,
   0.0008469715248793364,
   2.684727633095463e-06,
   1.1802645758507424e-06,
   0.9999825954437256,
   0.9999911785125732,
   5.9901235545112286e-06,
   0.0006115098367445171,
   4.205728600936709e-06,
   0.8526239395141602,
   1.1197734011147986e-06,
   0.9999825954437256,
   2.315639449079754e-06,
   3.2904806630540406e-06