In [None]:
import os

import numpy as np
import pandas as pd

from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input, Dense, Embedding, Bidirectional, GRU, GlobalMaxPool1D, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

from sklearn.metrics import roc_auc_score
from sklearn.metrics import f1_score

In [4]:
train = pd.read_csv('data/train.csv')
list_classes = ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]
y = train[list_classes].values
list_sentences = train["comment_text"].fillna("_na_").values

In [5]:
max_features = 20000
maxlen = 200
embed_size = 100

tokenizer = Tokenizer(num_words=max_features)
tokenizer.fit_on_texts(list(list_sentences))
list_tokenized_train = tokenizer.texts_to_sequences(list_sentences)
X_t = pad_sequences(list_tokenized_train, maxlen=maxlen)

In [6]:
embeddings_index = {}
with open('data/glove.6B.100d.txt', encoding='utf8') as f:
    for line in f:
        values = line.split()
        word = values[0]
        coefs = np.asarray(values[1:], dtype='float32')
        embeddings_index[word] = coefs

word_index = tokenizer.word_index
nb_words = min(max_features, len(word_index))
embedding_matrix = np.zeros((nb_words, embed_size))
for word, i in word_index.items():
    if i >= max_features: continue
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None: embedding_matrix[i] = embedding_vector

In [7]:
inp = Input(shape=(maxlen,))
x = Embedding(max_features, embed_size, weights=[embedding_matrix])(inp)
x = Bidirectional(GRU(128, return_sequences=True, dropout=0.1, recurrent_dropout=0.1))(x)
x = GlobalMaxPool1D()(x)
x = Dense(50, activation="relu")(x)
x = Dropout(0.1)(x)
x = Dense(6, activation="sigmoid")(x)
model = Model(inputs=inp, outputs=x)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
os.makedirs("models", exist_ok=True)

history = model.fit(
    X_t, 
    y, 
    batch_size=32, 
    epochs=2, 
    validation_split=0.1, 
)

model.save("models/toxic_model.keras")

In [8]:
model = load_model("models/toxic_model.keras")

  saveable.load_own_variables(weights_store.get(inner_path))


In [9]:
test = pd.read_csv('data/test.csv')
list_classes = ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]
list_sentences_test = test["comment_text"].fillna("_na_").values
y_test_true = test[list_classes].values

list_tokenized_test = tokenizer.texts_to_sequences(list_sentences_test)
X_test = pad_sequences(list_tokenized_test, maxlen=maxlen)

y_test_pred = model.predict(X_test)

[1m2000/2000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m95s[0m 47ms/step


In [10]:
mean_auc = roc_auc_score(y_test_true, y_test_pred, average='macro')

print(mean_auc)

per_class_auc_scores = roc_auc_score(y_test_true, y_test_pred, average=None)

auc_report = pd.DataFrame({
    'Class': list_classes,
    'AUC_ROC_Score': per_class_auc_scores
})

auc_report_sorted = auc_report.sort_values(by='AUC_ROC_Score', ascending=False)
print(auc_report_sorted.to_string(index=False))

0.9801604154054294
        Class  AUC_ROC_Score
       threat       0.990243
 severe_toxic       0.989750
identity_hate       0.981485
      obscene       0.977440
       insult       0.975145
        toxic       0.966900


In [12]:
y_test_pred_binary_simple = (y_test_pred > 0.5).astype(int)

macro_f1_simple = f1_score(y_test_true, y_test_pred_binary_simple, average='macro')
micro_f1_simple = f1_score(y_test_true, y_test_pred_binary_simple, average='micro')

print(f"Macro F1: {macro_f1_simple:.4f}")
print(f"Micro F1: {micro_f1_simple:.4f}")

simple_f1_scores = f1_score(y_test_true, y_test_pred_binary_simple, average=None)

f1_report_simple = pd.DataFrame({
    'Class': list_classes,
    'F1_Score': simple_f1_scores
})

print(f1_report_simple.sort_values(by='F1_Score', ascending=False).to_string(index=False))

Macro F1: 0.5665
Micro F1: 0.6659
        Class  F1_Score
      obscene  0.688336
        toxic  0.678609
       insult  0.668414
identity_hate  0.582278
       threat  0.399015
 severe_toxic  0.382482
