In [1]:
import tensorflow

from cnn import CNN
from reader import Reader
from sklearn.metrics import classification_report, confusion_matrix 
import numpy as np
from tensorflow.keras.utils import to_categorical


In [2]:
EPOCHS = 10
BATCH_SIZE = 128
NUM_CLASSES = 2
LEARN_RATE = 0.01
EMBED_SIZE = 50
FILENAME = "data/sem_eval_all.pkl"
OVERSAMPLING_RATE = 3
VOCAB_LEN = 10000
MAX_LEN = 100
LOSS_TYPE = "logits"

reader = Reader(filename=FILENAME, num_classes=NUM_CLASSES, vocab_len=VOCAB_LEN)
X, y = reader.load()

mapping = {'hate': 1,'none': 0}
y = [mapping[b] for b in y]

X_train, X_test, y_train, y_test = reader.split(X, y)

# Oversampling after split
hate = [i for i in range(len(y_train)) if y_train[i]==1]
X_train = X_train + [X_train[x] for x in hate]*(OVERSAMPLING_RATE-1)
y_train = y_train + [1 for i in range(len(hate))]*(OVERSAMPLING_RATE-1)

X_train = np.asarray(X_train)
X_test = np.asarray(X_test)
y_train = to_categorical(y_train, num_classes=NUM_CLASSES)
y_test = to_categorical(y_test, num_classes=NUM_CLASSES)

In [3]:
model = CNN(max_len=MAX_LEN,
          num_classes=NUM_CLASSES, 
          batch_size=BATCH_SIZE, 
          epochs=EPOCHS, 
          embed_size=EMBED_SIZE, 
          vocab_len=VOCAB_LEN,
          loss_type=LOSS_TYPE,
          save_model=True,
          save_path="results/cnn-10-epochs-sem-eval-logits",
          checkpoint_path="results/cnn-10-epochs-sem-eval-logits.ckpt")

In [4]:
model.fit(X_train, y_train)

Epoch 1/10
Epoch 1: saving model to results\cnn-10-epochs-sem-eval-logits.ckpt
Epoch 2/10
Epoch 2: saving model to results\cnn-10-epochs-sem-eval-logits.ckpt
Epoch 3/10
Epoch 3: saving model to results\cnn-10-epochs-sem-eval-logits.ckpt
Epoch 4/10
Epoch 4: saving model to results\cnn-10-epochs-sem-eval-logits.ckpt
Epoch 5/10
Epoch 5: saving model to results\cnn-10-epochs-sem-eval-logits.ckpt
Epoch 6/10
Epoch 6: saving model to results\cnn-10-epochs-sem-eval-logits.ckpt
Epoch 7/10
Epoch 7: saving model to results\cnn-10-epochs-sem-eval-logits.ckpt
Epoch 8/10
Epoch 8: saving model to results\cnn-10-epochs-sem-eval-logits.ckpt
Epoch 9/10
Epoch 9: saving model to results\cnn-10-epochs-sem-eval-logits.ckpt
Epoch 10/10
Epoch 10: saving model to results\cnn-10-epochs-sem-eval-logits.ckpt




INFO:tensorflow:Assets written to: results/cnn-10-epochs-sem-eval-logits\assets


INFO:tensorflow:Assets written to: results/cnn-10-epochs-sem-eval-logits\assets


<keras.engine.functional.Functional at 0x1d3551b32e0>

In [5]:
predictions = model.predict(X_test)
classes = np.argmax(y_test, axis=1)

predictions = list(map(lambda x: int(x), predictions))
print(confusion_matrix(classes, predictions))
print(classification_report(classes, predictions))

[[486 210]
 [168 336]]
              precision    recall  f1-score   support

           0       0.74      0.70      0.72       696
           1       0.62      0.67      0.64       504

    accuracy                           0.69      1200
   macro avg       0.68      0.68      0.68      1200
weighted avg       0.69      0.69      0.69      1200



In [8]:
model = CNN.load("results/cnn-10-epochs-sem-eval-logits")
predictions = model.predict(X_test)
predictions = np.argmax(predictions, axis=1)
classes = np.argmax(y_test, axis=1)

predictions = list(map(lambda x: int(x), predictions))
print(confusion_matrix(classes, predictions))
print(classification_report(classes, predictions))

[[486 210]
 [168 336]]
              precision    recall  f1-score   support

           0       0.74      0.70      0.72       696
           1       0.62      0.67      0.64       504

    accuracy                           0.69      1200
   macro avg       0.68      0.68      0.68      1200
weighted avg       0.69      0.69      0.69      1200

