In [None]:
from rejector.prediction import Prediction
from rejector.values import Values
from rejector.metric import Metric
from rejector.pdfs import PDFs
import numpy as np
from typing import List
from matplotlib import pyplot
from tqdm import tqdm
from IPython.display import set_matplotlib_formats
%matplotlib inline
set_matplotlib_formats('svg')

In [None]:
# The experiments below test the non-calibrated CNN 10 epochs model trained on the Hoseem dataset and tested on Hoseem dataset.
# Model file name: cnn-10-epochs-hoseem-2-classes-logits
predictions = Prediction.load("input/cnn-hoseem-calibrated.p", "Hate")
value_TP = 2
value_TN = 1
value_FP = 10
value_FN = 20
value_REJECTION = 3
values = Values(value_TP, value_TN, value_FP, value_FN, value_REJECTION)
# Run line below to find bandwidth values
# metric = Metric(values, predictions)
bandwidths = {'TPS': {'bandwidth': 0.00618112}, 'TNS': {'bandwidth': 0.0072521}, 'FPS': {
            'bandwidth': 0.01152453}, 'FNS': {'bandwidth': 0.0133493}}
metric = Metric(values, predictions, bandwidths)
metric.plot_pdfs()
metric.plot_effectiveness()

In [None]:
%matplotlib inline
"""
In this experiment, different FP/FN ratio's are tested for the Hoseem dataset.
"""
value_setups = [{'value_TP': 1, 'value_TN': 1, 'value_FP': 1, 'value_FN': 1, 'value_rejection': 1},
               {'value_TP': 1, 'value_TN': 1, 'value_FP': 1,
                   'value_FN': 1.1, 'value_rejection': 1},
                {'value_TP': 1, 'value_TN': 1, 'value_FP': 1,
                   'value_FN': 1.2, 'value_rejection': 1},
                {'value_TP': 1, 'value_TN': 1, 'value_FP': 1,
                   'value_FN': 1.3, 'value_rejection': 1},
                {'value_TP': 1, 'value_TN': 1, 'value_FP': 1,
                   'value_FN': 1.4, 'value_rejection': 1},
              {'value_TP': 1, 'value_TN': 1, 'value_FP': 1,
                   'value_FN': 1.5, 'value_rejection': 1},
              {'value_TP': 1, 'value_TN': 1, 'value_FP': 1,
                   'value_FN': 1.7, 'value_rejection': 1},
              {'value_TP': 1, 'value_TN': 1, 'value_FP': 1,
                   'value_FN': 1.8, 'value_rejection': 1},
              {'value_TP': 1, 'value_TN': 1, 'value_FP': 1,
                   'value_FN': 1.9, 'value_rejection': 1},
               {'value_TP': 1, 'value_TN': 1, 'value_FP': 1,
                   'value_FN': 2, 'value_rejection': 1},
               {'value_TP': 1, 'value_TN': 1, 'value_FP': 1,
                   'value_FN': 2.5, 'value_rejection': 1},
               {'value_TP': 1, 'value_TN': 1, 'value_FP': 1,
                   'value_FN': 3, 'value_rejection': 1},
                {'value_TP': 1, 'value_TN': 1, 'value_FP': 1,
                   'value_FN': 3.5, 'value_rejection': 1},
               {'value_TP': 1, 'value_TN': 1, 'value_FP': 1,
                   'value_FN': 4, 'value_rejection': 1},
              {'value_TP': 1, 'value_TN': 1, 'value_FP': 1,
                   'value_FN': 4.5, 'value_rejection': 1},
               {'value_TP': 1, 'value_TN': 1, 'value_FP': 1,
                   'value_FN': 5, 'value_rejection': 1},
               {'value_TP': 1, 'value_TN': 1, 'value_FP': 1,
                'value_FN': 6, 'value_rejection': 1},
               {'value_TP': 1, 'value_TN': 1, 'value_FP': 1,
                'value_FN': 7, 'value_rejection': 1},
               {'value_TP': 1, 'value_TN': 1, 'value_FP': 1,
                'value_FN': 8, 'value_rejection': 1},
               {'value_TP': 1, 'value_TN': 1, 'value_FP': 1,
                   'value_FN': 9, 'value_rejection': 1},
               {'value_TP': 1, 'value_TN': 1, 'value_FP': 1,
                   'value_FN': 10, 'value_rejection': 1},
               {'value_TP': 1, 'value_TN': 1, 'value_FP': 1,
                   'value_FN': 20, 'value_rejection': 1},
               {'value_TP': 1, 'value_TN': 1, 'value_FP': 1,
                   'value_FN': 50, 'value_rejection': 1},
               {'value_TP': 1, 'value_TN': 1, 'value_FP': 1,
                   'value_FN': 100, 'value_rejection': 1},
               {'value_TP': 1, 'value_TN': 1, 'value_FP': 1,
                'value_FN': 1000, 'value_rejection': 1}]
x_values = []
y1_values = []
y2_values = []
for setup in tqdm(value_setups):
    values = Values(setup['value_TP'], setup['value_TN'],
                  setup['value_FP'], setup['value_FN'], setup['value_rejection'])
    metric = Metric(values, predictions, bandwidths)
    metric.plot_effectiveness()
    pdfs = PDFs(predictions)
    thresholds = np.linspace(0, 1, 1000)

    effectiveness_values = list(
        map(lambda t:  metric.calculate_effectiveness(t), thresholds))

    (index, max_effectiveness) = metric.maximum_effectiveness(
        effectiveness_values)
    x_values.append(setup['value_FP']/setup['value_FN'])
    y1_values.append(thresholds[index])
    y2_values.append(max_effectiveness)

In [None]:
pyplot.plot(x_values, y1_values)
print(x_values)
print(y1_values)
pyplot.xlabel("value of FP / value of FN ratio")
pyplot.ylabel("Rejection threshold to achieve maximum effectiveness")
pyplot.show()

In [None]:
print(y2_values)
pyplot.plot(x_values, y2_values)
pyplot.xlabel("value of FP / value of FN ratio")
pyplot.ylabel("Effectiveness of the model when rejection is adopted")
pyplot.yscale("log")
pyplot.show()

In [None]:
# Difference calibrated and non-calibrated model
y2_calibrated = y2_values
y2_not_calibrated = [0, 1.0581881700595542e-05, 2.4993871173485706e-05, 4.167131368397256e-05, 5.9501933042052235e-05, 7.988889427406343e-05, 0.00012292224253185107, 0.00014708378426959192, 0.00017124532600733275, 0.00019622770909119867, 0.00034146701934953166, 0.0005116283511490258, 0.0007095415900240486, 0.0009435349145782605, 0.0012351370510787036, 0.002480591845983462, 0.008877664138994417, 0.015340015569166886, 0.025331485770889207, 0.04608456426658869, 0.07521106985123371, 0.4426201470115985, 2.33214836039741, 5.837671724012379, 72.34235992318838]
diff_y2 = np.subtract(y2_calibrated, y2_not_calibrated)
pyplot.plot(x_values, diff_y2)
pyplot.xlabel("value of FP / value of FN ratio")
pyplot.ylabel("Effectiveness gain when using calibrated model with rejection")
pyplot.show()

In [None]:
# Plot both the calibrated and non-calibrated lines for comparison
pyplot.plot(x_values, y2_not_calibrated, label="Not calibrated")
pyplot.plot(x_values, y2_calibrated, label="Calibrated")
pyplot.xlabel("value of FP / value of FN ratio")
pyplot.ylabel("Effectiveness of the model when rejection is adopted")
pyplot.yscale("log")
pyplot.legend()
pyplot.show()

In [None]:
# Plot both the calibrated and non-calibrated lines for comparison
y1_not_calibrated = [0.0, 0.4944944944944945, 0.4954954954954955, 0.4964964964964965, 0.4964964964964965, 0.4974974974974975, 0.4984984984984985, 0.4984984984984985, 0.4984984984984985, 0.4994994994994995, 0.5005005005005005, 0.5015015015015015, 0.5025025025025025, 0.5045045045045045, 0.5065065065065065, 0.5595595595595596, 0.5605605605605606, 0.5615615615615616, 0.6176176176176176, 0.8358358358358359, 0.8358358358358359, 0.9279279279279279, 0.986986986986987, 0.993993993993994, 1.0]
y1_calibrated = y1_values
pyplot.plot(x_values, y1_not_calibrated, label="Not calibrated")
pyplot.plot(x_values, y1_calibrated, label="Calibrated")
pyplot.xlabel("value of FP / value of FN ratio")
pyplot.ylabel("Rejection threshold to achieve maximum effectiveness")
pyplot.legend()
pyplot.show()