# Tables
This notebook prints the metric info about the optimal thresholds used in the tables of the paper.

It loads the datasets from all 3 models.
Each model is trained on the Waseem and Hovy (2016) dataset and evaluated on two test datasets:

1. Waseem and Hovy (2016): to simulate seen data.
2. SemEval (2019): to simulate unseen data.

### Table results (all values)
The results below do take all values into account.

In [1]:
from rejector.prediction import Prediction
from rejector.values import Values
from rejector.metric import Metric
import seaborn as sns

values = Values(value_TP=18.149543604085686, value_TN=36.31953463203463, value_FP=16.68669949423102, value_FN=28.08375563063063,value_rejection=4.82167904290429)

predictions = Prediction.load("input/lr-waseem-waseem.p", gold_class="Hate")
metric_lr_waseem_waseem = Metric(values, predictions)
predictions = Prediction.load("input/lr-waseem-semeval.p", gold_class="Hate")
metric_lr_waseem_semeval = Metric(values, predictions)
predictions = Prediction.load("input/distilbert-waseem-waseem.p", gold_class="Hate")
metric_distilbert_waseem_waseem = Metric(values, predictions)
predictions = Prediction.load("input/distilbert-waseem-semeval.p", gold_class="Hate")
metric_distilbert_waseem_semeval = Metric(values, predictions)
predictions = Prediction.load("input/cnn-waseem-waseem.p", gold_class="Hate")
metric_cnn_waseem_waseem = Metric(values, predictions)
predictions = Prediction.load("input/cnn-waseem-semeval.p", gold_class="Hate")
metric_cnn_waseem_semeval = Metric(values, predictions)



  L += func(f_i)
  L += func(f_i)


In [2]:
metric_lr_waseem_waseem.print_optimal_threshold_stats(use_pdf=True)

Optimal threshold:  0.5
Optimal V(threshold):  27.706832886410012
V(0):  27.706832886410012
Num accepted:  3218
Accuracy original model:  0.8467992541951522
Accuracy accepted:  0.8467992541951522
Num rejected:  0
Num TPs rejected 0
Num TNs rejected 0
Num FPs rejected 0
Num FNs rejected 0
Num TPs accepted 689
Num TNs accepted 2036
Num FPs accepted 171
Num FNs accepted 322
Percentage rejected:  0.0


In [3]:
metric_lr_waseem_semeval.print_optimal_threshold_stats(use_pdf=True)

Optimal threshold:  0.5
Optimal V(threshold):  15.689251914545434
V(0):  15.689251914545434
Num accepted:  2400
Accuracy original model:  0.6404166666666666
Accuracy accepted:  0.6404166666666666
Num rejected:  0
Num TPs rejected 0
Num TNs rejected 0
Num FPs rejected 0
Num FNs rejected 0
Num TPs accepted 481
Num TNs accepted 1056
Num FPs accepted 335
Num FNs accepted 528
Percentage rejected:  0.0


In [4]:
metric_distilbert_waseem_waseem.print_optimal_threshold_stats(use_pdf=True)

Optimal threshold:  0.502
Optimal V(threshold):  28.000773216243942
V(0):  27.99634713638655
Num accepted:  3218
Accuracy original model:  0.8495960223741454
Accuracy accepted:  0.8495960223741454
Num rejected:  0
Num TPs rejected 0
Num TNs rejected 0
Num FPs rejected 0
Num FNs rejected 0
Num TPs accepted 763
Num TNs accepted 1971
Num FPs accepted 236
Num FNs accepted 248
Percentage rejected:  0.0


In [5]:
metric_distilbert_waseem_semeval.print_optimal_threshold_stats(use_pdf=True)

Optimal threshold:  0.5
Optimal V(threshold):  15.822775404025263
V(0):  15.822775404025263
Num accepted:  2400
Accuracy original model:  0.6404166666666666
Accuracy accepted:  0.6404166666666666
Num rejected:  0
Num TPs rejected 0
Num TNs rejected 0
Num FPs rejected 0
Num FNs rejected 0
Num TPs accepted 514
Num TNs accepted 1023
Num FPs accepted 368
Num FNs accepted 495
Percentage rejected:  0.0


In [6]:
metric_cnn_waseem_waseem.print_optimal_threshold_stats(use_pdf=True)

Optimal threshold:  0.5
Optimal V(threshold):  27.291234782067917
V(0):  27.291234782067917
Num accepted:  3218
Accuracy original model:  0.8346799254195152
Accuracy accepted:  0.8346799254195152
Num rejected:  0
Num TPs rejected 0
Num TNs rejected 0
Num FPs rejected 0
Num FNs rejected 0
Num TPs accepted 669
Num TNs accepted 2017
Num FPs accepted 190
Num FNs accepted 342
Percentage rejected:  0.0


In [7]:
metric_cnn_waseem_semeval.print_optimal_threshold_stats(use_pdf=True)

Optimal threshold:  0.5
Optimal V(threshold):  14.868028931766595
V(0):  14.868028931766595
Num accepted:  2400
Accuracy original model:  0.6291666666666667
Accuracy accepted:  0.6291666666666667
Num rejected:  0
Num TPs rejected 0
Num TNs rejected 0
Num FPs rejected 0
Num FNs rejected 0
Num TPs accepted 586
Num TNs accepted 924
Num FPs accepted 467
Num FNs accepted 423
Percentage rejected:  0.0


### Table results (TP and TN set to 0)
The results below do take all values into account but TP and TN are set to 0.

In [8]:
values = Values(value_TP=0.0, value_TN=0.0, value_FP=16.68669949423102, value_FN=28.08375563063063,value_rejection=4.82167904290429)

predictions = Prediction.load("input/lr-waseem-waseem.p", gold_class="Hate")
metric_lr_waseem_waseem = Metric(values, predictions)
predictions = Prediction.load("input/lr-waseem-semeval.p", gold_class="Hate")
metric_lr_waseem_semeval = Metric(values, predictions)
predictions = Prediction.load("input/distilbert-waseem-waseem.p", gold_class="Hate")
metric_distilbert_waseem_waseem = Metric(values, predictions)
predictions = Prediction.load("input/distilbert-waseem-semeval.p", gold_class="Hate")
metric_distilbert_waseem_semeval = Metric(values, predictions)
predictions = Prediction.load("input/cnn-waseem-waseem.p", gold_class="Hate")
metric_cnn_waseem_waseem = Metric(values, predictions)
predictions = Prediction.load("input/cnn-waseem-semeval.p", gold_class="Hate")
metric_cnn_waseem_semeval = Metric(values, predictions)


In [9]:
metric_lr_waseem_waseem.print_optimal_threshold_stats(use_pdf=True)

Optimal threshold:  0.7830000000000003
Optimal V(threshold):  2.6875966574968135
V(0):  1.1576252446078468
Num accepted:  2414
Accuracy original model:  0.8467992541951522
Accuracy accepted:  0.9096934548467275
Num rejected:  804
Num TPs rejected 217
Num TNs rejected 312
Num FPs rejected 112
Num FNs rejected 163
Num TPs accepted 472
Num TNs accepted 1724
Num FPs accepted 59
Num FNs accepted 159
Percentage rejected:  0.24984462399005594


In [10]:
metric_lr_waseem_semeval.print_optimal_threshold_stats(use_pdf=True)

Optimal threshold:  0.9940000000000004
Optimal V(threshold):  3.6676034224181153
V(0):  -3.604926549292124
Num accepted:  101
Accuracy original model:  0.6404166666666666
Accuracy accepted:  0.7524752475247525
Num rejected:  2299
Num TPs rejected 450
Num TNs rejected 1011
Num FPs rejected 319
Num FNs rejected 519
Num TPs accepted 31
Num TNs accepted 45
Num FPs accepted 16
Num FNs accepted 9
Percentage rejected:  0.9579166666666666


In [11]:
metric_distilbert_waseem_waseem.print_optimal_threshold_stats(use_pdf=True)

Optimal threshold:  0.8780000000000003
Optimal V(threshold):  3.040608661995707
V(0):  1.4477680916376718
Num accepted:  2408
Accuracy original model:  0.8495960223741454
Accuracy accepted:  0.9256644518272426
Num rejected:  810
Num TPs rejected 242
Num TNs rejected 263
Num FPs rejected 181
Num FNs rejected 124
Num TPs accepted 521
Num TNs accepted 1708
Num FPs accepted 55
Num FNs accepted 124
Percentage rejected:  0.2517091361093847


In [12]:
metric_distilbert_waseem_semeval.print_optimal_threshold_stats(use_pdf=True)

Optimal threshold:  0.9480000000000004
Optimal V(threshold):  3.606319469684868
V(0):  -3.489343137513241
Num accepted:  185
Accuracy original model:  0.6404166666666666
Accuracy accepted:  0.8810810810810811
Num rejected:  2215
Num TPs rejected 514
Num TNs rejected 860
Num FPs rejected 368
Num FNs rejected 473
Num TPs accepted 0
Num TNs accepted 163
Num FPs accepted 0
Num FNs accepted 22
Percentage rejected:  0.9229166666666667


In [13]:
metric_cnn_waseem_waseem.print_optimal_threshold_stats(use_pdf=True)

Optimal threshold:  0.7560000000000002
Optimal V(threshold):  2.489815977599296
V(0):  0.9012939841195365
Num accepted:  2325
Accuracy original model:  0.8346799254195152
Accuracy accepted:  0.8976344086021505
Num rejected:  893
Num TPs rejected 203
Num TNs rejected 396
Num FPs rejected 116
Num FNs rejected 178
Num TPs accepted 466
Num TNs accepted 1621
Num FPs accepted 74
Num FNs accepted 164
Percentage rejected:  0.27750155376009944


In [14]:
metric_cnn_waseem_semeval.print_optimal_threshold_stats(use_pdf=True)

Optimal threshold:  0.9990000000000004
Optimal V(threshold):  3.364996747276529
V(0):  -3.321521729047006
Num accepted:  0
Accuracy original model:  0.6291666666666667
Accuracy accepted: -
Num rejected:  2400
Num TPs rejected 586
Num TNs rejected 924
Num FPs rejected 467
Num FNs rejected 423
Num TPs accepted 0
Num TNs accepted 0
Num FPs accepted 0
Num FNs accepted 0
Percentage rejected:  1.0
