# Tables
This notebook prints the metric info about the optimal thresholds used in the tables of the paper.

It loads the datasets from all 3 models.
Each model is trained on the Waseem and Hovy (2016) dataset and evaluated on two test datasets:

1. Waseem and Hovy (2016): to simulate seen data.
2. SemEval (2019): to simulate unseen data.

### Table results (all values)
The results below do take all values into account.

In [1]:
from rejector.prediction import Prediction
from rejector.values import Values
from rejector.metric import Metric
import seaborn as sns

values = Values(value_TP=18.149543604085686, value_TN=36.31953463203463, value_FP=16.68669949423102, value_FN=28.08375563063063,value_rejection=4.82167904290429)

predictions = Prediction.load("input/lr-waseem-waseem.p", gold_class="Hate")
metric_lr_waseem_waseem = Metric(values, predictions)
predictions = Prediction.load("input/lr-waseem-semeval.p", gold_class="Hate")
metric_lr_waseem_semeval = Metric(values, predictions)
predictions = Prediction.load("input/distilbert-waseem-waseem.p", gold_class="Hate")
metric_distilbert_waseem_waseem = Metric(values, predictions)
predictions = Prediction.load("input/distilbert-waseem-semeval.p", gold_class="Hate")
metric_distilbert_waseem_semeval = Metric(values, predictions)
predictions = Prediction.load("input/cnn-waseem-waseem.p", gold_class="Hate")
metric_cnn_waseem_waseem = Metric(values, predictions)
predictions = Prediction.load("input/cnn-waseem-semeval.p", gold_class="Hate")
metric_cnn_waseem_semeval = Metric(values, predictions)



  L += func(f_i)
  L += func(f_i)


In [2]:
metric_lr_waseem_waseem.print_optimal_threshold_stats()

Optimal threshold:  0.508
Optimal V(threshold):  90141.78682533788
Total value:  90141.78682533788
Num accepted:  3197
Accuracy original model:  0.8467992541951522
Accuracy accepted:  0.8501720362840163
Num rejected:  21
Percentage rejected:  0.006525792417650715


In [3]:
metric_lr_waseem_semeval.print_optimal_threshold_stats()

Optimal threshold:  0.5
Optimal V(threshold):  38237.12144442372
Total value:  38237.12144442372
Num accepted:  2400
Accuracy original model:  0.6404166666666666
Accuracy accepted:  0.6404166666666666
Num rejected:  0
Percentage rejected:  0.0


In [4]:
metric_distilbert_waseem_waseem.print_optimal_threshold_stats()

Optimal threshold:  0.504
Optimal V(threshold):  90140.28351903966
Total value:  90140.28351903966
Num accepted:  3216
Accuracy original model:  0.8495960223741454
Accuracy accepted:  0.8501243781094527
Num rejected:  2
Percentage rejected:  0.0006215040397762585


In [5]:
metric_distilbert_waseem_semeval.print_optimal_threshold_stats()

Optimal threshold:  0.5
Optimal V(threshold):  38013.61459300258
Total value:  38013.61459300258
Num accepted:  2400
Accuracy original model:  0.6404166666666666
Accuracy accepted:  0.6404166666666666
Num rejected:  0
Percentage rejected:  0.0


In [6]:
metric_cnn_waseem_waseem.print_optimal_threshold_stats()

Optimal threshold:  0.5
Optimal V(threshold):  88139.59185443362
Total value:  88139.59185443362
Num accepted:  3218
Accuracy original model:  0.8346799254195152
Accuracy accepted:  0.8346799254195152
Num rejected:  0
Percentage rejected:  0.0


In [7]:
metric_cnn_waseem_semeval.print_optimal_threshold_stats()

Optimal threshold:  0.5
Optimal V(threshold):  36094.794959401865
Total value:  36094.794959401865
Num accepted:  2400
Accuracy original model:  0.6291666666666667
Accuracy accepted:  0.6291666666666667
Num rejected:  0
Percentage rejected:  0.0


### Table results (TP and TN set to 0)
The results below do take all values into account but TP and TN are set to 0.

In [8]:
values = Values(value_TP=0.0, value_TN=0.0, value_FP=16.68669949423102, value_FN=28.08375563063063,value_rejection=4.82167904290429)

predictions = Prediction.load("input/lr-waseem-waseem.p", gold_class="Hate")
metric_lr_waseem_waseem = Metric(values, predictions)
predictions = Prediction.load("input/lr-waseem-semeval.p", gold_class="Hate")
metric_lr_waseem_semeval = Metric(values, predictions)
predictions = Prediction.load("input/distilbert-waseem-waseem.p", gold_class="Hate")
metric_distilbert_waseem_waseem = Metric(values, predictions)
predictions = Prediction.load("input/distilbert-waseem-semeval.p", gold_class="Hate")
metric_distilbert_waseem_semeval = Metric(values, predictions)
predictions = Prediction.load("input/cnn-waseem-waseem.p", gold_class="Hate")
metric_cnn_waseem_waseem = Metric(values, predictions)
predictions = Prediction.load("input/cnn-waseem-semeval.p", gold_class="Hate")
metric_cnn_waseem_semeval = Metric(values, predictions)


In [9]:
metric_lr_waseem_waseem.print_optimal_threshold_stats()

Optimal threshold:  0.7800000000000002
Optimal V(threshold):  8865.710293736567
Total value:  8865.710293736567
Num accepted:  2425
Accuracy original model:  0.8467992541951522
Accuracy accepted:  0.9101030927835052
Num rejected:  793
Percentage rejected:  0.24642635177128652


In [10]:
metric_lr_waseem_semeval.print_optimal_threshold_stats()

Optimal threshold:  0.9950000000000004
Optimal V(threshold):  8949.538657453895
Total value:  8949.538657453895
Num accepted:  79
Accuracy original model:  0.6404166666666666
Accuracy accepted:  0.7848101265822784
Num rejected:  2321
Percentage rejected:  0.9670833333333333


In [11]:
metric_distilbert_waseem_waseem.print_optimal_threshold_stats()

Optimal threshold:  0.8790000000000003
Optimal V(threshold):  9862.042120841836
Total value:  9862.042120841836
Num accepted:  2402
Accuracy original model:  0.8495960223741454
Accuracy accepted:  0.9263114071606994
Num rejected:  816
Percentage rejected:  0.2535736482287135


In [12]:
metric_distilbert_waseem_semeval.print_optimal_threshold_stats()

Optimal threshold:  0.9490000000000004
Optimal V(threshold):  9225.034246887006
Total value:  9225.034246887006
Num accepted:  154
Accuracy original model:  0.6404166666666666
Accuracy accepted:  0.9155844155844156
Num rejected:  2246
Percentage rejected:  0.9358333333333333


In [13]:
metric_cnn_waseem_waseem.print_optimal_threshold_stats()

Optimal threshold:  0.7610000000000002
Optimal V(threshold):  8118.3095033504815
Total value:  8118.3095033504815
Num accepted:  2299
Accuracy original model:  0.8346799254195152
Accuracy accepted:  0.8995215311004785
Num rejected:  919
Percentage rejected:  0.2855811062771908


In [14]:
metric_cnn_waseem_semeval.print_optimal_threshold_stats()

Optimal threshold:  0.9950000000000004
Optimal V(threshold):  8119.374308763965
Total value:  8119.374308763965
Num accepted:  2
Accuracy original model:  0.6291666666666667
Accuracy accepted:  1.0
Num rejected:  2398
Percentage rejected:  0.9991666666666666
