In [1]:
import tensorflow
import tensorflow_probability
import pickle

from cnn import CNN
from reader import Reader
from sklearn.metrics import classification_report, confusion_matrix 
from tensorflow.keras.utils import to_categorical
import numpy as np

In [3]:
EPOCHS = 10
BATCH_SIZE = 128
NUM_CLASSES = 2
LEARN_RATE = 0.01
EMBED_SIZE = 50
FILENAME = "data/twitter_data.pkl"
OVERSAMPLING_RATE = 3
VOCAB_LEN = 10000
LOSS_TYPE = "logits"
MAX_LEN = 100

reader = Reader(filename=FILENAME, num_classes=NUM_CLASSES, vocab_len=VOCAB_LEN)
X, y = reader.load()

mapping = {'racism':1,'sexism':1,'none':0}
y = [mapping[b] for b in y]

X_train, X_test, y_train, y_test = reader.split(X, y)

# Oversampling after split
hate = [i for i in range(len(y_train)) if y_train[i]==1]
X_train = X_train + [X_train[x] for x in hate]*(OVERSAMPLING_RATE-1)
y_train = y_train + [1 for i in range(len(hate))]*(OVERSAMPLING_RATE-1)

X_train = np.asarray(X_train)
X_test = np.asarray(X_test)
y_train = to_categorical(y_train, num_classes=NUM_CLASSES)
y_test = to_categorical(y_test, num_classes=NUM_CLASSES)

In [6]:
model = CNN(max_len=MAX_LEN,
          num_classes=NUM_CLASSES, 
          batch_size=BATCH_SIZE, 
          epochs=EPOCHS, 
          embed_size=EMBED_SIZE, 
          vocab_len=VOCAB_LEN,
          loss_type=LOSS_TYPE,
          save_model=True,
          save_path="results/cnn-10-epochs-hoseem-2-classes-logits",
          checkpoint_path="results/cnn-10-epochs-hoseem-2-classes-logits.ckpt")

In [7]:
model.fit(X_train, y_train)

Epoch 1/10
Epoch 1: saving model to results\cnn-10-epochs-hoseem-2-classes-logits.ckpt
Epoch 2/10
Epoch 2: saving model to results\cnn-10-epochs-hoseem-2-classes-logits.ckpt
Epoch 3/10
Epoch 3: saving model to results\cnn-10-epochs-hoseem-2-classes-logits.ckpt
Epoch 4/10
Epoch 4: saving model to results\cnn-10-epochs-hoseem-2-classes-logits.ckpt
Epoch 5/10
Epoch 5: saving model to results\cnn-10-epochs-hoseem-2-classes-logits.ckpt
Epoch 6/10
Epoch 6: saving model to results\cnn-10-epochs-hoseem-2-classes-logits.ckpt
Epoch 7/10
Epoch 7: saving model to results\cnn-10-epochs-hoseem-2-classes-logits.ckpt
Epoch 8/10
Epoch 8: saving model to results\cnn-10-epochs-hoseem-2-classes-logits.ckpt
Epoch 9/10
Epoch 9: saving model to results\cnn-10-epochs-hoseem-2-classes-logits.ckpt
Epoch 10/10
Epoch 10: saving model to results\cnn-10-epochs-hoseem-2-classes-logits.ckpt




INFO:tensorflow:Assets written to: results/cnn-10-epochs-hoseem-2-classes-logits\assets


INFO:tensorflow:Assets written to: results/cnn-10-epochs-hoseem-2-classes-logits\assets


<keras.engine.functional.Functional at 0x1d136d49cd0>

In [8]:
model_predictions = model.cnn.predict(X_test)
predicted_classes = np.argmax(model_predictions, axis=1)
actual_classes = np.argmax(y_test, axis=1)

print(confusion_matrix(actual_classes, predicted_classes))
print(classification_report(actual_classes, predicted_classes))

[[960 144]
 [121 384]]
              precision    recall  f1-score   support

           0       0.89      0.87      0.88      1104
           1       0.73      0.76      0.74       505

    accuracy                           0.84      1609
   macro avg       0.81      0.81      0.81      1609
weighted avg       0.84      0.84      0.84      1609



In [9]:
# Implementation from: https://github.com/ritun16/Machine_Learning_short_projecct/blob/master/Neural_Network_Calibration/model_calibration.ipynb
# ECE result before calibration
num_bins = 50
model_actual_classes = np.argmax(y_test, axis=1)
model_labels = tensorflow.convert_to_tensor(model_actual_classes, dtype=tensorflow.int32, name='labels_true')
model_logits = tensorflow.convert_to_tensor(model_predictions, dtype=tensorflow.float32, name='logits')
tensorflow_probability.stats.expected_calibration_error(num_bins=num_bins, 
                                     logits=model_logits, 
                                     labels_true=model_labels)

<tf.Tensor: shape=(), dtype=float32, numpy=0.08734518>

In [10]:
# Temperature Scaling
temp = tensorflow.Variable(initial_value=1.0, trainable=True, dtype=tensorflow.float32) 

def compute_loss():
    pred_divisions = tensorflow.math.divide(model_predictions, temp)
    loss = tensorflow.reduce_mean(tensorflow.nn.softmax_cross_entropy_with_logits(\
                                tensorflow.convert_to_tensor(tensorflow.keras.utils.to_categorical(model_actual_classes)), pred_divisions))
    return loss

optimizer = tensorflow.optimizers.Adam(learning_rate=0.01)

print('Temperature Initial value: {}'.format(temp.numpy()))

for i in range(300):
    opts = optimizer.minimize(compute_loss, var_list=[temp])


print('Temperature Final value: {}'.format(temp.numpy()))

Temperature Initial value: 1.0
Temperature Final value: 1.9471774101257324


In [11]:
# ECE result after calibration
pred_divisions = tensorflow.math.divide(model_predictions, temp)
num_bins = 50
labels_true = tensorflow.convert_to_tensor(model_actual_classes, dtype=tensorflow.int32, name='labels_true')
logits = tensorflow.convert_to_tensor(pred_divisions, dtype=tensorflow.float32, name='logits')
tensorflow_probability.stats.expected_calibration_error(num_bins=num_bins, 
                                     logits=logits, 
                                     labels_true=labels_true)

<tf.Tensor: shape=(), dtype=float32, numpy=0.061666507>

In [12]:
calibrated_predictions_all = tensorflow.nn.softmax(logits).numpy()
calibrated_predictions = np.argmax(calibrated_predictions_all, axis=1)

In [13]:
# Accuracy should remain the same
print(confusion_matrix(model_actual_classes, calibrated_predictions))
print(classification_report(model_actual_classes, calibrated_predictions))

[[960 144]
 [121 384]]
              precision    recall  f1-score   support

           0       0.89      0.87      0.88      1104
           1       0.73      0.76      0.74       505

    accuracy                           0.84      1609
   macro avg       0.81      0.81      0.81      1609
weighted avg       0.84      0.84      0.84      1609



In [19]:
def to_predictions(predictions, labels):
    predicted_classes = np.argmax(predictions, 1)
    return list(map(lambda predicted_values, predicted_class, actual_class:
                    {'predicted_class': class_name(predicted_class),
                     'actual_class': class_name(actual_class),
                     'predicted_value': predicted_values[predicted_class],
                     'text': None},
                    predictions, predicted_classes, labels))

def class_name(index):
    if index == 0:
        return "None"
    elif index == 1:
        return "Hate"


In [20]:
calibrated_predictions_info = to_predictions(calibrated_predictions_all, model_actual_classes)
pickle.dump(calibrated_predictions_info, open("results/cnn-hoseem-calibrated.p", "wb"))

NameError: name 'calibrated_predictions_all' is not defined

In [21]:
old_model = CNN.load("results/cnn-10-epochs-hoseem-2-classes")

In [22]:
old_model_predictions_all = old_model.predict(X_test)



In [23]:
old_model_predictions = np.argmax(old_model_predictions_all, axis=1)
old_model_classes = np.argmax(y_test, axis=1)
print(old_model_predictions_all)
print(confusion_matrix(old_model_classes, old_model_predictions))
print(classification_report(old_model_classes, old_model_predictions))

[[0.9912293  0.00877067]
 [0.00567411 0.9943258 ]
 [0.0058198  0.9941802 ]
 ...
 [0.6191817  0.38081834]
 [0.98527575 0.01472421]
 [0.9885338  0.01146614]]
[[979 125]
 [124 381]]
              precision    recall  f1-score   support

           0       0.89      0.89      0.89      1104
           1       0.75      0.75      0.75       505

    accuracy                           0.85      1609
   macro avg       0.82      0.82      0.82      1609
weighted avg       0.85      0.85      0.85      1609



In [25]:
old_model_predictions_info = to_predictions(old_model_predictions_all, old_model_classes)
pickle.dump(old_model_predictions_info, open("results/cnn-hoseem-not-calibrated.p", "wb"))