# CNN model
This notebook trains the CNN model.

**Trains on:** Waseem and Hovy (2016)

First we need to install the required packages.

In [1]:
!pip install pyyaml h5py

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
!pip install "ray[tune]"

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting ray[tune]
  Downloading ray-2.0.0-cp37-cp37m-manylinux2014_x86_64.whl (59.4 MB)
[K     |████████████████████████████████| 59.4 MB 1.4 MB/s 
Collecting grpcio<=1.43.0,>=1.28.1
  Downloading grpcio-1.43.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.1 MB)
[K     |████████████████████████████████| 4.1 MB 65.6 MB/s 
Collecting virtualenv
  Downloading virtualenv-20.16.3-py2.py3-none-any.whl (8.8 MB)
[K     |████████████████████████████████| 8.8 MB 66.7 MB/s 
Collecting tensorboardX>=1.9
  Downloading tensorboardX-2.5.1-py2.py3-none-any.whl (125 kB)
[K     |████████████████████████████████| 125 kB 81.6 MB/s 
Collecting platformdirs<3,>=2.4
  Downloading platformdirs-2.5.2-py3-none-any.whl (14 kB)
Collecting distlib<1,>=0.3.5
  Downloading distlib-0.3.6-py2.py3-none-any.whl (468 kB)
[K     |████████████████████████████████| 468 kB 72.6 MB/s 
[?25hInstalling coll

In [3]:
!pip install tweet-preprocessor

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting tweet-preprocessor
  Downloading tweet_preprocessor-0.6.0-py3-none-any.whl (27 kB)
Installing collected packages: tweet-preprocessor
Successfully installed tweet-preprocessor-0.6.0


In [4]:
!pip install wordsegment

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting wordsegment
  Downloading wordsegment-1.3.1-py2.py3-none-any.whl (4.8 MB)
[K     |████████████████████████████████| 4.8 MB 4.9 MB/s 
[?25hInstalling collected packages: wordsegment
Successfully installed wordsegment-1.3.1


In [5]:
pip install -q -U keras-tuner


[K     |████████████████████████████████| 135 kB 5.2 MB/s 
[K     |████████████████████████████████| 1.6 MB 48.7 MB/s 
[?25h

In [6]:
from google.colab import drive

# Mount drive for loading the data
drive.mount('/content/drive')

import sys
sys.path.append("drive/MyDrive/Colab Notebooks")

import tensorflow as tf
from cnn import CNN
from cnn_hyperparam_tune import HyperCNN
from reader import Reader
from sklearn.metrics import classification_report, confusion_matrix 
import numpy as np
from tensorflow.keras.utils import to_categorical
import html
from keras.layers import TextVectorization
import tensorflow_probability
import pickle
from ray import tune
import keras_tuner as kt

FILENAME = "drive/MyDrive/Colab Notebooks/data/twitter_data.pkl"
EPOCHS = 10
BATCH_SIZE = 128
NUM_CLASSES = 2
LEARN_RATE = 0.01
EMBED_SIZE = 100
LOSS_TYPE = "logits"
MAX_LEN = 250
MAX_TOKENS = 20000

Mounted at /content/drive


## Read and split the dataset

In [7]:
X, y = Reader.load(FILENAME)
X = Reader.preprocess(X)

mapping = {'racism': 1,'sexism': 1, 'none': 0}
y = [mapping[b] for b in y]

X_train, X_val, X_test, y_train, y_val, y_test = Reader.split_with_validation(X, y)

X_train = np.asarray(X_train)
X_test = np.asarray(X_test)
X_val = np.asarray(X_val)

y_train = to_categorical(y_train, num_classes=NUM_CLASSES)
y_test = to_categorical(y_test, num_classes=NUM_CLASSES)
y_val = to_categorical(y_val, num_classes=NUM_CLASSES)

## Setup the TextVectorization layer

In [8]:
text_vectorizer = TextVectorization(
    max_tokens=MAX_TOKENS,
    output_mode='int',
    output_sequence_length=MAX_LEN)
text_vectorizer.adapt(X_train)

vocab = text_vectorizer.get_vocabulary()

# Hyperparamter tuning

In [9]:
tuner = kt.RandomSearch(
    HyperCNN(num_classes=NUM_CLASSES, vocab_len=MAX_TOKENS, embed_size=EMBED_SIZE, text_vectorizer=text_vectorizer, loss_type=LOSS_TYPE),
    objective="val_accuracy",
    max_trials=20,
    overwrite=True,
    directory="output",
    project_name="cnn",
)

In [10]:
stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)

tuner.search(X_train, y_train, validation_split=0.2, callbacks=[stop_early])

best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]
learning_rate = best_hps.get('learning_rate')
batch_size = best_hps.get('batch_size')
epochs = best_hps.get('epochs')

print("Best params:")
print("learning_rate", learning_rate)
print("batch_size", batch_size)
print("epochs", epochs)

Trial 20 Complete [00h 00m 26s]
val_accuracy: 0.7674779891967773

Best val_accuracy So Far: 0.8146038055419922
Total elapsed time: 00h 17m 04s
Best params:
learning_rate 0.001
batch_size 32
epochs 5


## Setup the model

In [12]:
model = CNN(max_len=MAX_LEN,
          num_classes=NUM_CLASSES, 
          batch_size=batch_size, 
          epochs=epochs, 
          embed_size=EMBED_SIZE, 
          vocab_len=MAX_TOKENS,
          learning_rate=learning_rate,
          loss_type=LOSS_TYPE,
          save_model=True,
          save_path="drive/MyDrive/Colab Notebooks/output/cnn-waseem-hovy",
          checkpoint_path="drive/MyDrive/Colab Notebooks/output/cnn-waseem-hovy.ckpt",
          text_vectorizer=text_vectorizer)

## Training

In [13]:
model.fit(X_train, y_train)

Epoch 1/5
Epoch 1: saving model to drive/MyDrive/Colab Notebooks/output/cnn-waseem-hovy.ckpt
Epoch 2/5
Epoch 2: saving model to drive/MyDrive/Colab Notebooks/output/cnn-waseem-hovy.ckpt
Epoch 3/5
Epoch 3: saving model to drive/MyDrive/Colab Notebooks/output/cnn-waseem-hovy.ckpt
Epoch 4/5
Epoch 4: saving model to drive/MyDrive/Colab Notebooks/output/cnn-waseem-hovy.ckpt
Epoch 5/5
Epoch 5: saving model to drive/MyDrive/Colab Notebooks/output/cnn-waseem-hovy.ckpt


<keras.engine.functional.Functional at 0x7ffa98aa13d0>

In [14]:
model_predictions = model.cnn.predict(X_test)
predicted_classes = np.argmax(model_predictions, axis=1)
actual_classes = np.argmax(y_test, axis=1)

print(confusion_matrix(actual_classes, predicted_classes))
print(classification_report(actual_classes, predicted_classes))

[[2041  166]
 [ 389  622]]
              precision    recall  f1-score   support

           0       0.84      0.92      0.88      2207
           1       0.79      0.62      0.69      1011

    accuracy                           0.83      3218
   macro avg       0.81      0.77      0.79      3218
weighted avg       0.82      0.83      0.82      3218



## Waseem predictions calibration
We use temperature scaling to calibrate the model by finding the optimal T value on the validation set.



In [17]:
# Implementation from: https://github.com/ritun16/Machine_Learning_short_projecct/blob/master/Neural_Network_Calibration/model_calibration.ipynb
# ECE result before calibration
num_bins = 50
model_predictions = model.cnn.predict(X_val)
model_actual_classes = np.argmax(y_val, axis=1)
model_labels = tf.convert_to_tensor(model_actual_classes, dtype=tf.int32, name='labels_true')
model_logits = tf.convert_to_tensor(model_predictions, dtype=tf.float32, name='logits')
tensorflow_probability.stats.expected_calibration_error(num_bins=num_bins, 
                                     logits=model_logits, 
                                     labels_true=model_labels)

<tf.Tensor: shape=(), dtype=float32, numpy=0.074477814>

In [18]:
# Temperature Scaling
temp = tf.Variable(initial_value=1.0, trainable=True, dtype=tf.float32) 

def compute_loss():
    pred_divisions = tf.math.divide(model_predictions, temp)
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(\
                                tf.convert_to_tensor(tf.keras.utils.to_categorical(model_actual_classes)), pred_divisions))
    return loss

optimizer = tf.optimizers.Adam(learning_rate=0.01)

print('Temperature Initial value: {}'.format(temp.numpy()))

for i in range(300):
    opts = optimizer.minimize(compute_loss, var_list=[temp])


print('Temperature Final value: {}'.format(temp.numpy()))

Temperature Initial value: 1.0
Temperature Final value: 1.5466009378433228


In [19]:
# ECE result after calibration
pred_divisions = tf.math.divide(model_predictions, temp)
num_bins = 50
labels_true = tf.convert_to_tensor(model_actual_classes, dtype=tf.int32, name='labels_true')
logits = tf.convert_to_tensor(pred_divisions, dtype=tf.float32, name='logits')
tensorflow_probability.stats.expected_calibration_error(num_bins=num_bins, 
                                     logits=logits, 
                                     labels_true=labels_true)

<tf.Tensor: shape=(), dtype=float32, numpy=0.029316936>