In [1]:
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
#####################################################################
#                Generalising on Binary Level                       #
#####################################################################

"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
import pandas as pd
import numpy as np
from collections import Counter

from utilities.preprocess import Preproccesor
from utilities.attention_layer import Attention
from utilities.helping_functions import create_embedding_matrix

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import f1_score, balanced_accuracy_score, confusion_matrix
from sklearn.utils import shuffle
from sklearn.model_selection import KFold

from keras.preprocessing.sequence import pad_sequences
from keras.layers import GlobalAveragePooling1D, GlobalMaxPooling1D, SpatialDropout1D, Bidirectional, Dense, \
    LSTM, Conv1D, Dropout, concatenate
from keras import Input, Model
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.optimizers import Adam
from keras.layers.embeddings import Embedding
from keras.preprocessing.text import Tokenizer

import nltk
nltk.download('wordnet')
nltk.download('stopwords')

[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/johnmollas/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/johnmollas/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

Loading ETHOS and the dataset D1: Davidson, Thomas, et al. "Automated hate speech detection and the problem of offensive language." Proceedings of the International AAAI Conference on Web and Social Media. Vol. 11. No. 1. 2017.

In [2]:
X, y = Preproccesor.load_data(True)
X_tweets, y_tweets = Preproccesor.load_external_data(True)
class_names = ['noHateSpeech', 'hateSpeech']

This dataset contains 24783 instances, with 1430 containing hate speech content. Thus it can be considered imbalanced.

In [3]:
len(y_tweets), sum(y_tweets), len(y_tweets)-sum(y_tweets)

(24783, 1430, 23353)

Let's print some statistics for tweets 

In [4]:
cou = 0
for k in range(len(y_tweets)):
    if y_tweets[k] == 1:
        if 'faggot' in X_tweets[k].lower() or 'fag' in X_tweets[k].lower() or 'gay' in X_tweets[k].lower() or 'queer' in X_tweets[k].lower():
            cou = cou + 1
cou

417

Tweets related to sexuality

In [5]:
cou = 0
for k in range(len(y_tweets)):
    if y_tweets[k] == 1:
        if 'bitch' in X_tweets[k].lower() or 'cunt' in X_tweets[k].lower() or 'hoe' in X_tweets[k].lower():
            cou = cou + 1
cou

352

Tweets related to gender

In [6]:
cou = 0
for k in range(len(y_tweets)):
    if y_tweets[k] == 1:
        if 'nigger' in X_tweets[k].lower() or 'nigga' in X_tweets[k].lower() or 'niggu' in X_tweets[k].lower():
            cou = cou + 1
cou

378

Tweets related to race

Now let's train a model using the ETHOS and test on D1

In [7]:
kf = KFold(n_splits=10)
kf.get_n_splits(X)

f1ethos = []
f1ethosH = []
f1ethosNH = []
f1tweets = []
f1tweetsH = []
f1tweetsNH = []
accethos = []
acctweets = []

for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    vec = TfidfVectorizer(
        analyzer='word', ngram_range=(1, 5), max_features=50000)
    vec.fit(X_train)
    X_tr = vec.transform(X_train)
    X_te = vec.transform(X_test)
    X_tw = vec.transform(X_tweets)
    svm = SVC(kernel='rbf')
    svm.fit(X_tr, y_train)

    y_predict = svm.predict(X_te)
    accethos.append(balanced_accuracy_score(y_test, y_predict))  # y_tweets
    f1ethos.append(f1_score(y_test, y_predict, average='weighted'))
    f1ethosNH.append(f1_score(y_test, y_predict, average=None)[0])
    f1ethosH.append(f1_score(y_test, y_predict, average=None)[1])

    y_predict = svm.predict(X_tw)
    acctweets.append(balanced_accuracy_score(y_tweets, y_predict))  # y_tweets
    f1tweets.append(f1_score(y_tweets, y_predict, average='weighted'))
    f1tweetsNH.append(f1_score(y_tweets, y_predict, average=None)[0])
    f1tweetsH.append(f1_score(y_tweets, y_predict, average=None)[1])

In [8]:
print('F1 on Ethos', np.array(f1ethos).mean())
print('F1 Hate on Ethos', np.array(f1ethosH).mean())
print('F1 NoHate on Ethos', np.array(f1ethosNH).mean())
print('F1 on D1', np.array(f1tweets).mean())
print('F1 Hate on D1', np.array(f1tweetsH).mean())
print('F1 NoHate on D1', np.array(f1tweetsNH).mean())
print('Bal. Accuracy on Ethos', np.array(accethos).mean())
print('Bal. Accuracy on D1', np.array(acctweets).mean())

F1 on Ethos 0.5640485977487757
F1 Hate on Ethos 0.3320765493730913
F1 NoHate on Ethos 0.7403227437632174
F1 on D1 0.8732391902132248
F1 Hate on D1 0.12845200597648176
F1 NoHate on D1 0.9188455651311601
Bal. Accuracy on Ethos 0.580236017196485
Bal. Accuracy on D1 0.5402848423361848


Now let's train a model using the D1 and test on ETHOS

In [9]:
kf = KFold(n_splits=10)
kf.get_n_splits(X_tweets)

f1ethos = []
f1ethosH = []
f1ethosNH = []
f1tweets = []
f1tweetsH = []
f1tweetsNH = []
accethos = []
acctweets = []

for train_index, test_index in kf.split(X_tweets):
    X_train, X_test = X_tweets[train_index], X_tweets[test_index]
    y_train, y_test = y_tweets[train_index], y_tweets[test_index]

    vec = TfidfVectorizer(
        analyzer='word', ngram_range=(1, 5), max_features=50000)
    vec.fit(X_train)
    X_tr = vec.transform(X_train)
    X_te = vec.transform(X_test)
    X_et = vec.transform(X)
    svm = SVC(kernel='rbf')
    svm.fit(X_tr, y_train)

    y_predict = svm.predict(X_te)
    acctweets.append(balanced_accuracy_score(y_test, y_predict))
    f1tweets.append(f1_score(y_test, y_predict, average='weighted'))
    f1tweetsNH.append(f1_score(y_test, y_predict, average=None)[0])
    f1tweetsH.append(f1_score(y_test, y_predict, average=None)[1])

    y_predict = svm.predict(X_et)
    accethos.append(balanced_accuracy_score(y, y_predict))
    f1ethos.append(f1_score(y, y_predict, average='weighted'))
    f1ethosNH.append(f1_score(y, y_predict, average=None)[0])
    f1ethosH.append(f1_score(y, y_predict, average=None)[1])

In [10]:
print('F1 on D1', np.array(f1tweets).mean())
print('F1 Hate on D1', np.array(f1tweetsH).mean())
print('F1 NoHate on D1', np.array(f1tweetsNH).mean())
print('F1 on Ethos', np.array(f1ethos).mean())
print('F1 Hate on Ethos', np.array(f1ethosH).mean())
print('F1 NoHate on Ethos', np.array(f1ethosNH).mean())
print('Bal. Accuracy on D1', np.array(acctweets).mean())
print('Bal. Accuracy on Ethos', np.array(accethos).mean())

F1 on D1 0.9230500903911796
F1 Hate on D1 0.12385936302731079
F1 NoHate on D1 0.9710236819316258
F1 on Ethos 0.4266857670626746
F1 Hate on Ethos 0.03534806560292477
F1 NoHate on Ethos 0.7265958993318281
Bal. Accuracy on D1 0.5333261046305611
Bal. Accuracy on Ethos 0.5090069284064666


Finally, it would be interesting to investigate the overall performance of an SVMmodel trained on a combination dataset of those two

In [11]:
kf = KFold(n_splits=10, shuffle=True, random_state=7)

X_NEW = np.concatenate((X, X_tweets))
y_NEW = np.concatenate((y, y_tweets))
kf.get_n_splits(X_NEW)

f1 = []
f1H = []
f1NH = []
acc = []

for train_index, test_index in kf.split(X_NEW):
    X_train, X_test = X_NEW[train_index], X_NEW[test_index]
    y_train, y_test = y_NEW[train_index], y_NEW[test_index]

    vec = TfidfVectorizer(
        analyzer='word', ngram_range=(1, 5), max_features=50000)
    vec.fit(X_train)
    X_tr = vec.transform(X_train)
    X_te = vec.transform(X_test)
    svm = SVC(kernel='rbf')
    svm.fit(X_tr, y_train)

    y_predict = svm.predict(X_te)
    acc.append(balanced_accuracy_score(y_test, y_predict))  # y_tweets
    f1.append(f1_score(y_test, y_predict, average='weighted'))
    f1NH.append(f1_score(y_test, y_predict, average=None)[0])
    f1H.append(f1_score(y_test, y_predict, average=None)[1])

In [12]:
print('F1 on Both', np.array(f1).mean())
print('F1 Hate on Both', np.array(f1H).mean())
print('F1 NoHate on Both', np.array(f1NH).mean())
print('Bal. Accuracy on Both', np.array(acc).mean())

F1 on Both 0.9083655535830211
F1 Hate on Both 0.18481107803210042
F1 NoHate on Both 0.9647914678552706
Bal. Accuracy on Both 0.5516466691239722
