# Tables
This notebook prints the metric info about the optimal thresholds used in the tables of the paper.

It loads the datasets from all 3 models.
Each model is trained on the Waseem and Hovy (2016) dataset and evaluated on two test datasets:

1. Waseem and Hovy (2016): to simulate seen data.
2. SemEval (2019): to simulate unseen data.

### Table results (all values)
The results below do take all values into account.

In [1]:
from rejector.prediction import Prediction
from rejector.values import Values
from rejector.metric import Metric
import seaborn as sns

values = Values(value_TP=18.149543604085686, value_TN=36.31953463203463, value_FP=16.68669949423102, value_FN=28.08375563063063,value_rejection=4.82167904290429)

predictions = Prediction.load("input/lr-waseem-waseem.p", gold_class="Hate")
metric_lr_waseem_waseem = Metric(values, predictions)
predictions = Prediction.load("input/lr-waseem-semeval.p", gold_class="Hate")
metric_lr_waseem_semeval = Metric(values, predictions)
predictions = Prediction.load("input/distilbert-waseem-waseem.p", gold_class="Hate")
metric_distilbert_waseem_waseem = Metric(values, predictions)
predictions = Prediction.load("input/distilbert-waseem-semeval.p", gold_class="Hate")
metric_distilbert_waseem_semeval = Metric(values, predictions)
predictions = Prediction.load("input/cnn-waseem-waseem.p", gold_class="Hate")
metric_cnn_waseem_waseem = Metric(values, predictions)
predictions = Prediction.load("input/cnn-waseem-semeval.p", gold_class="Hate")
metric_cnn_waseem_semeval = Metric(values, predictions)



  L += func(f_i)
  L += func(f_i)


In [2]:
metric_lr_waseem_waseem.print_optimal_threshold_stats(use_pdf=True)

Optimal threshold:  0.5
Optimal V(threshold):  27.706832886410012
Total value:  27.706832886410012
Num accepted:  3218
Accuracy original model:  0.8467992541951522
Accuracy accepted:  0.8467992541951522
Num rejected:  0
Percentage rejected:  0.0


In [3]:
metric_lr_waseem_semeval.print_optimal_threshold_stats(use_pdf=True)

Optimal threshold:  0.5
Optimal V(threshold):  15.689251914545434
Total value:  15.689251914545434
Num accepted:  2400
Accuracy original model:  0.6404166666666666
Accuracy accepted:  0.6404166666666666
Num rejected:  0
Percentage rejected:  0.0


In [4]:
metric_distilbert_waseem_waseem.print_optimal_threshold_stats(use_pdf=True)

Optimal threshold:  0.502
Optimal V(threshold):  28.000773216243942
Total value:  28.000773216243942
Num accepted:  3218
Accuracy original model:  0.8495960223741454
Accuracy accepted:  0.8495960223741454
Num rejected:  0
Percentage rejected:  0.0


In [5]:
metric_distilbert_waseem_semeval.print_optimal_threshold_stats(use_pdf=True)

Optimal threshold:  0.5
Optimal V(threshold):  15.822775404025263
Total value:  15.822775404025263
Num accepted:  2400
Accuracy original model:  0.6404166666666666
Accuracy accepted:  0.6404166666666666
Num rejected:  0
Percentage rejected:  0.0


In [6]:
metric_cnn_waseem_waseem.print_optimal_threshold_stats(use_pdf=True)

Optimal threshold:  0.5
Optimal V(threshold):  27.291234782067917
Total value:  27.291234782067917
Num accepted:  3218
Accuracy original model:  0.8346799254195152
Accuracy accepted:  0.8346799254195152
Num rejected:  0
Percentage rejected:  0.0


In [7]:
metric_cnn_waseem_semeval.print_optimal_threshold_stats(use_pdf=True)

Optimal threshold:  0.5
Optimal V(threshold):  14.868028931766595
Total value:  14.868028931766595
Num accepted:  2400
Accuracy original model:  0.6291666666666667
Accuracy accepted:  0.6291666666666667
Num rejected:  0
Percentage rejected:  0.0


### Table results (TP and TN set to 0)
The results below do take all values into account but TP and TN are set to 0.

In [8]:
values = Values(value_TP=0.0, value_TN=0.0, value_FP=16.68669949423102, value_FN=28.08375563063063,value_rejection=4.82167904290429)

predictions = Prediction.load("input/lr-waseem-waseem.p", gold_class="Hate")
metric_lr_waseem_waseem = Metric(values, predictions)
predictions = Prediction.load("input/lr-waseem-semeval.p", gold_class="Hate")
metric_lr_waseem_semeval = Metric(values, predictions)
predictions = Prediction.load("input/distilbert-waseem-waseem.p", gold_class="Hate")
metric_distilbert_waseem_waseem = Metric(values, predictions)
predictions = Prediction.load("input/distilbert-waseem-semeval.p", gold_class="Hate")
metric_distilbert_waseem_semeval = Metric(values, predictions)
predictions = Prediction.load("input/cnn-waseem-waseem.p", gold_class="Hate")
metric_cnn_waseem_waseem = Metric(values, predictions)
predictions = Prediction.load("input/cnn-waseem-semeval.p", gold_class="Hate")
metric_cnn_waseem_semeval = Metric(values, predictions)


In [9]:
metric_lr_waseem_waseem.print_optimal_threshold_stats(use_pdf=True)

Optimal threshold:  0.7830000000000003
Optimal V(threshold):  2.6875966574968135
Total value:  2.6875966574968135
Num accepted:  2414
Accuracy original model:  0.8467992541951522
Accuracy accepted:  0.9096934548467275
Num rejected:  804
Percentage rejected:  0.24984462399005594


In [10]:
metric_lr_waseem_semeval.print_optimal_threshold_stats(use_pdf=True)

Optimal threshold:  0.9940000000000004
Optimal V(threshold):  3.6676034224181153
Total value:  3.6676034224181153
Num accepted:  101
Accuracy original model:  0.6404166666666666
Accuracy accepted:  0.7524752475247525
Num rejected:  2299
Percentage rejected:  0.9579166666666666


In [11]:
metric_distilbert_waseem_waseem.print_optimal_threshold_stats(use_pdf=True)

Optimal threshold:  0.8780000000000003
Optimal V(threshold):  3.040608661995707
Total value:  3.040608661995707
Num accepted:  2408
Accuracy original model:  0.8495960223741454
Accuracy accepted:  0.9256644518272426
Num rejected:  810
Percentage rejected:  0.2517091361093847


In [12]:
metric_distilbert_waseem_semeval.print_optimal_threshold_stats(use_pdf=True)

Optimal threshold:  0.9480000000000004
Optimal V(threshold):  3.606319469684868
Total value:  3.606319469684868
Num accepted:  185
Accuracy original model:  0.6404166666666666
Accuracy accepted:  0.8810810810810811
Num rejected:  2215
Percentage rejected:  0.9229166666666667


In [13]:
metric_cnn_waseem_waseem.print_optimal_threshold_stats(use_pdf=True)

Optimal threshold:  0.7560000000000002
Optimal V(threshold):  2.489815977599296
Total value:  2.489815977599296
Num accepted:  2325
Accuracy original model:  0.8346799254195152
Accuracy accepted:  0.8976344086021505
Num rejected:  893
Percentage rejected:  0.27750155376009944


In [14]:
metric_cnn_waseem_semeval.print_optimal_threshold_stats(use_pdf=True)

Optimal threshold:  0.9990000000000004
Optimal V(threshold):  3.364996747276529
Total value:  3.364996747276529
Num accepted:  0
Accuracy original model:  0.6291666666666667
Accuracy accepted: -
Num rejected:  2400
Percentage rejected:  1.0
