# Tables
This notebook prints the metric info about the optimal thresholds used in the tables of the paper.

It loads the datasets from all 3 models.
Each model is trained on the Waseem and Hovy (2016) dataset and evaluated on two test datasets:

1. Waseem and Hovy (2016): to simulate seen data.
2. SemEval (2019): to simulate unseen data.

### Table results (all values)
The results below do take all values into account.

In [1]:
from rejector.prediction import Prediction
from rejector.values import Values
from rejector.metric import Metric
import seaborn as sns

values = Values(value_TP=18.149543604085686, value_TN=36.31953463203463, value_FP=16.68669949423102, value_FN=28.08375563063063,value_rejection=4.82167904290429)

predictions = Prediction.load("input/lr-waseem-waseem.p", gold_class="Hate")
metric_lr_waseem_waseem = Metric(values, predictions)
predictions = Prediction.load("input/lr-waseem-semeval.p", gold_class="Hate")
metric_lr_waseem_semeval = Metric(values, predictions)
predictions = Prediction.load("input/distilbert-waseem-waseem.p", gold_class="Hate")
metric_distilbert_waseem_waseem = Metric(values, predictions)
predictions = Prediction.load("input/distilbert-waseem-semeval.p", gold_class="Hate")
metric_distilbert_waseem_semeval = Metric(values, predictions)
predictions = Prediction.load("input/cnn-waseem-waseem.p", gold_class="Hate")
metric_cnn_waseem_waseem = Metric(values, predictions)
predictions = Prediction.load("input/cnn-waseem-semeval.p", gold_class="Hate")
metric_cnn_waseem_semeval = Metric(values, predictions)



  L += func(f_i)
  L += func(f_i)


In [2]:
metric_lr_waseem_waseem.print_optimal_threshold_stats()

Optimal threshold:  0.5
Optimal V(threshold):  45533.60924868776
Total value:  45533.60924868776
Num accepted:  1609
Accuracy original model:  0.853325046612803
Accuracy accepted:  0.853325046612803
Num rejected:  0
Percentage rejected:  0.0


In [3]:
metric_lr_waseem_semeval.print_optimal_threshold_stats()

Optimal threshold:  0.531
Optimal V(threshold):  18562.61196611752
Total value:  18562.61196611752
Num accepted:  1149
Accuracy original model:  0.6308333333333334
Accuracy accepted:  0.6457789382071366
Num rejected:  51
Percentage rejected:  0.0425


In [4]:
metric_distilbert_waseem_waseem.print_optimal_threshold_stats()

Optimal threshold:  0.5
Optimal V(threshold):  45250.32322824882
Total value:  45250.32322824882
Num accepted:  1609
Accuracy original model:  0.8527035425730267
Accuracy accepted:  0.8527035425730267
Num rejected:  0
Percentage rejected:  0.0


In [5]:
metric_distilbert_waseem_semeval.print_optimal_threshold_stats()

Optimal threshold:  0.5
Optimal V(threshold):  19131.475540102063
Total value:  19131.475540102063
Num accepted:  1200
Accuracy original model:  0.6425
Accuracy accepted:  0.6425
Num rejected:  0
Percentage rejected:  0.0


In [6]:
metric_cnn_waseem_waseem.print_optimal_threshold_stats()

Optimal threshold:  0.5
Optimal V(threshold):  44893.115994293286
Total value:  44893.115994293286
Num accepted:  1609
Accuracy original model:  0.8446239900559354
Accuracy accepted:  0.8446239900559354
Num rejected:  0
Percentage rejected:  0.0


In [7]:
metric_cnn_waseem_semeval.print_optimal_threshold_stats()

Optimal threshold:  0.5
Optimal V(threshold):  18385.260352600275
Total value:  18385.260352600275
Num accepted:  1200
Accuracy original model:  0.6241666666666666
Accuracy accepted:  0.6241666666666666
Num rejected:  0
Percentage rejected:  0.0


### Table results (TP and TN set to 0)
The results below do take all values into account but TP and TN are set to 0.

In [8]:
values = Values(value_TP=0.0, value_TN=0.0, value_FP=16.68669949423102, value_FN=28.08375563063063,value_rejection=4.82167904290429)

predictions = Prediction.load("input/lr-waseem-waseem.p", gold_class="Hate")
metric_lr_waseem_waseem = Metric(values, predictions)
predictions = Prediction.load("input/lr-waseem-semeval.p", gold_class="Hate")
metric_lr_waseem_semeval = Metric(values, predictions)
predictions = Prediction.load("input/distilbert-waseem-waseem.p", gold_class="Hate")
metric_distilbert_waseem_waseem = Metric(values, predictions)
predictions = Prediction.load("input/distilbert-waseem-semeval.p", gold_class="Hate")
metric_distilbert_waseem_semeval = Metric(values, predictions)
predictions = Prediction.load("input/cnn-waseem-waseem.p", gold_class="Hate")
metric_cnn_waseem_waseem = Metric(values, predictions)
predictions = Prediction.load("input/cnn-waseem-semeval.p", gold_class="Hate")
metric_cnn_waseem_semeval = Metric(values, predictions)


In [9]:
metric_lr_waseem_waseem.print_optimal_threshold_stats()

Optimal threshold:  0.8290000000000003
Optimal V(threshold):  4325.401794635397
Total value:  4325.401794635397
Num accepted:  1100
Accuracy original model:  0.853325046612803
Accuracy accepted:  0.9245454545454546
Num rejected:  509
Percentage rejected:  0.3163455562461156


In [10]:
metric_lr_waseem_semeval.print_optimal_threshold_stats()

Optimal threshold:  0.9990000000000004
Optimal V(threshold):  4403.610750434869
Total value:  4403.610750434869
Num accepted:  11
Accuracy original model:  0.6308333333333334
Accuracy accepted:  0.8181818181818182
Num rejected:  1189
Percentage rejected:  0.9908333333333333


In [11]:
metric_distilbert_waseem_waseem.print_optimal_threshold_stats()

Optimal threshold:  0.7860000000000003
Optimal V(threshold):  5172.197977192629
Total value:  5172.197977192629
Num accepted:  1284
Accuracy original model:  0.8527035425730267
Accuracy accepted:  0.9228971962616822
Num rejected:  325
Percentage rejected:  0.20198881292728402


In [12]:
metric_distilbert_waseem_semeval.print_optimal_threshold_stats()

Optimal threshold:  0.9740000000000004
Optimal V(threshold):  4213.074775386907
Total value:  4213.074775386907
Num accepted:  5
Accuracy original model:  0.6425
Accuracy accepted:  1.0
Num rejected:  1195
Percentage rejected:  0.9958333333333333


In [13]:
metric_cnn_waseem_waseem.print_optimal_threshold_stats()

Optimal threshold:  0.8150000000000003
Optimal V(threshold):  5459.9048328090175
Total value:  5459.9048328090175
Num accepted:  1128
Accuracy original model:  0.8446239900559354
Accuracy accepted:  0.9343971631205674
Num rejected:  481
Percentage rejected:  0.29894344313238036


In [14]:
metric_cnn_waseem_semeval.print_optimal_threshold_stats()

Optimal threshold:  0.9610000000000004
Optimal V(threshold):  5290.941627847682
Total value:  5290.941627847682
Num accepted:  24
Accuracy original model:  0.6241666666666666
Accuracy accepted:  0.8333333333333334
Num rejected:  1176
Percentage rejected:  0.98
