In [1]:
import tensorflow
import tensorflow_probability
import pickle

from cnn import CNN
from reader import Reader
from sklearn.metrics import classification_report, confusion_matrix 
from tensorflow.keras.utils import to_categorical
import numpy as np

In [2]:
EPOCHS = 10
BATCH_SIZE = 128
NUM_CLASSES = 2
LEARN_RATE = 0.01
EMBED_SIZE = 50
FILENAME = "data/sem_eval_all.pkl"
VOCAB_LEN = 10000
MAX_LEN = 100
LOSS_TYPE = "logits"
OVERSAMPLING_RATE = 3

reader = Reader(filename=FILENAME, num_classes=NUM_CLASSES, vocab_len=VOCAB_LEN)
X, y = reader.load()

mapping = {'hate':1,'none':0}
y = [mapping[b] for b in y]

In [3]:
model = CNN.load("results/cnn-10-epochs-hoseem-2-classes-logits")

In [4]:
model_predictions = model.predict(X)
predicted_classes = np.argmax(model_predictions, axis=1)
model_actual_classes = y

print(confusion_matrix(model_actual_classes, predicted_classes))
print(classification_report(model_actual_classes, predicted_classes))

[[4597 2360]
 [2383 2660]]
              precision    recall  f1-score   support

           0       0.66      0.66      0.66      6957
           1       0.53      0.53      0.53      5043

    accuracy                           0.60     12000
   macro avg       0.59      0.59      0.59     12000
weighted avg       0.60      0.60      0.60     12000



In [5]:
# Implementation from: https://github.com/ritun16/Machine_Learning_short_projecct/blob/master/Neural_Network_Calibration/model_calibration.ipynb
# ECE result before calibration
num_bins = 50
model_labels = tensorflow.convert_to_tensor(model_actual_classes, dtype=tensorflow.int32, name='labels_true')
model_logits = tensorflow.convert_to_tensor(model_predictions, dtype=tensorflow.float32, name='logits')
tensorflow_probability.stats.expected_calibration_error(num_bins=num_bins, 
                                     logits=model_logits, 
                                     labels_true=model_labels)

<tf.Tensor: shape=(), dtype=float32, numpy=0.25770476>

In [6]:
# Temperature Scaling
temp = tensorflow.Variable(initial_value=1.0, trainable=True, dtype=tensorflow.float32) 

def compute_loss():
    pred_divisions = tensorflow.math.divide(model_predictions, temp)
    loss = tensorflow.reduce_mean(tensorflow.nn.softmax_cross_entropy_with_logits(\
                                tensorflow.convert_to_tensor(tensorflow.keras.utils.to_categorical(model_actual_classes)), pred_divisions))
    return loss

optimizer = tensorflow.optimizers.Adam(learning_rate=0.01)

print('Temperature Initial value: {}'.format(temp.numpy()))

for i in range(300):
    opts = optimizer.minimize(compute_loss, var_list=[temp])


print('Temperature Final value: {}'.format(temp.numpy()))

Temperature Initial value: 1.0
Temperature Final value: 2.4869942665100098


In [7]:
# ECE result after calibration
pred_divisions = tensorflow.math.divide(model_predictions, temp)
num_bins = 50
labels_true = tensorflow.convert_to_tensor(model_actual_classes, dtype=tensorflow.int32, name='labels_true')
logits = tensorflow.convert_to_tensor(pred_divisions, dtype=tensorflow.float32, name='logits')
tensorflow_probability.stats.expected_calibration_error(num_bins=num_bins, 
                                     logits=logits, 
                                     labels_true=labels_true)

<tf.Tensor: shape=(), dtype=float32, numpy=0.11408787>

In [8]:
calibrated_predictions_all = tensorflow.nn.softmax(logits).numpy()
calibrated_predictions = np.argmax(calibrated_predictions_all, axis=1)

In [9]:
# Accuracy should remain the same
print(confusion_matrix(model_actual_classes, calibrated_predictions))
print(classification_report(model_actual_classes, calibrated_predictions))

[[4597 2360]
 [2383 2660]]
              precision    recall  f1-score   support

           0       0.66      0.66      0.66      6957
           1       0.53      0.53      0.53      5043

    accuracy                           0.60     12000
   macro avg       0.59      0.59      0.59     12000
weighted avg       0.60      0.60      0.60     12000



In [12]:
def to_predictions(predictions, labels):
    predicted_classes = np.argmax(predictions, 1)
    return list(map(lambda predicted_values, predicted_class, actual_class:
                    {'predicted_class': class_name(predicted_class),
                     'actual_class': class_name(actual_class),
                     'predicted_value': predicted_values[predicted_class],
                     'text': None},
                    predictions, predicted_classes, model_actual_classes))

def class_name(index):
    if index == 0:
        return "None"
    elif index == 1:
        return "Hate"


In [13]:
calibrated_predictions_info = to_predictions(calibrated_predictions_all, model_actual_classes)
pickle.dump(calibrated_predictions_info, open("results/cnn-sem-eval-calibrated.p", "wb"))

In [14]:
# Old model without calibration
old_model = CNN(max_len=MAX_LEN,
          num_classes=NUM_CLASSES, 
          batch_size=BATCH_SIZE, 
          epochs=EPOCHS, 
          embed_size=EMBED_SIZE, 
          vocab_len=VOCAB_LEN)

In [23]:
old_model = CNN.load("results/cnn-10-epochs-hoseem-2-classes")

In [24]:
old_model_predictions_all = old_model.predict(X)



In [25]:
old_model_predictions = np.argmax(old_model_predictions_all, axis=1)
print(confusion_matrix(model_actual_classes, old_model_predictions))
print(classification_report(model_actual_classes, old_model_predictions))

[[5009 1948]
 [2679 2364]]
              precision    recall  f1-score   support

           0       0.65      0.72      0.68      6957
           1       0.55      0.47      0.51      5043

    accuracy                           0.61     12000
   macro avg       0.60      0.59      0.59     12000
weighted avg       0.61      0.61      0.61     12000



In [26]:
old_model_predictions_info = to_predictions(old_model_predictions_all, model_actual_classes)
pickle.dump(old_model_predictions_info, open("results/cnn-sem-eval-not-calibrated.p", "wb"))