## Load data

In [1]:
import pickle
import data_helper

Using TensorFlow backend.


In [2]:
data_mixed_word, labels = data_helper.load_mixed_testset("word")


sampling 1277 none and 601 abusive label from davidson
(1277, 40)
(601, 40)
split:test, label:none, data shape:(2554, 40)
split:test, label:abusive, data shape:(3756, 40)


In [3]:
data_mixed_char, labels = data_helper.load_mixed_testset("char")


sampling 1277 none and 601 abusive label from davidson
(1277, 140, 70)
(601, 140, 70)
split:test, label:none, data shape:(2554, 140, 70)
split:test, label:abusive, data shape:(3756, 140, 70)


abusive binary with davidson

In [4]:
data_word_d, labels = data_helper.load_abusive_binary("word", True)
data_char_d, labels = data_helper.load_abusive_binary("char", True)

split:train, label:none, data shape:(13539, 40)
split:train, label:abusive, data shape:(34886, 40)
split:test, label:none, data shape:(1694, 40)
split:test, label:abusive, data shape:(4357, 40)
split:valid, label:none, data shape:(1734, 40)
split:valid, label:abusive, data shape:(4417, 40)
split:train, label:none, data shape:(13539, 140, 70)
split:train, label:abusive, data shape:(34886, 140, 70)
split:test, label:none, data shape:(1694, 140, 70)
split:test, label:abusive, data shape:(4357, 140, 70)
split:valid, label:none, data shape:(1734, 140, 70)
split:valid, label:abusive, data shape:(4417, 140, 70)


abusive binary without davidson

In [5]:
data_word_w, _ = data_helper.load_abusive_binary("word", False)
data_char_w, _ = data_helper.load_abusive_binary("char", False)

split:train, label:none, data shape:(10209, 40)
split:train, label:abusive, data shape:(15010, 40)
split:test, label:none, data shape:(1277, 40)
split:test, label:abusive, data shape:(1878, 40)
split:valid, label:none, data shape:(1276, 40)
split:valid, label:abusive, data shape:(1876, 40)
split:train, label:none, data shape:(10209, 140, 70)
split:train, label:abusive, data shape:(15010, 140, 70)
split:test, label:none, data shape:(1277, 140, 70)
split:test, label:abusive, data shape:(1878, 140, 70)
split:valid, label:none, data shape:(1276, 140, 70)
split:valid, label:abusive, data shape:(1876, 140, 70)


## Evaluate Abusive Classifier(first-step)

In [7]:
from keras import backend as K
from keras.models import load_model
from sklearn.metrics import classification_report
import numpy as np

In [8]:
x_word = data_mixed_word["x_test"]
y_word = data_mixed_word["y_test"]

In [9]:
x_char = data_mixed_char["x_test"]
y_char = data_mixed_char["y_test"]

### Waasem Word

In [10]:
model = load_model("./logs/waasem/word/weights.03.hdf5")
preds = model.predict(x_word, batch_size=128)
print(classification_report(np.argmax(y_word, axis=1), np.argmax(preds, axis=1), digits=3, target_names=labels))
K.clear_session()

             precision    recall  f1-score   support

       none      0.853     0.905     0.878      2554
    abusive      0.768     0.668     0.715      1202

avg / total      0.826     0.829     0.826      3756



### Waasem Hybrid

In [11]:
model = load_model("./logs/waasem/hybrid/weights.04.hdf5")
preds = model.predict([x_char, x_word], batch_size=128)
print(classification_report(np.argmax(y_word, axis=1), np.argmax(preds, axis=1), digits=3, target_names=labels))
K.clear_session()

             precision    recall  f1-score   support

       none      0.864     0.890     0.877      2554
    abusive      0.750     0.703     0.726      1202

avg / total      0.828     0.830     0.829      3756



### Davidson Word

In [13]:
model = load_model("./logs/davidson/word/weights.01.hdf5")
preds = model.predict(x_word, batch_size=128)
print(classification_report(np.argmax(y_word, axis=1), np.argmax(preds, axis=1), digits=3, target_names=labels))
K.clear_session()

             precision    recall  f1-score   support

       none      0.902     0.904     0.903      2554
    abusive      0.796     0.791     0.793      1202

avg / total      0.868     0.868     0.868      3756



### Davidson Hybrid

In [14]:
model = load_model("./logs/davidson/hybrid/weights.02.hdf5")
preds = model.predict([x_char, x_word], batch_size=128)
print(classification_report(np.argmax(y_word, axis=1), np.argmax(preds, axis=1), digits=3, target_names=labels))
K.clear_session()

             precision    recall  f1-score   support

       none      0.896     0.897     0.897      2554
    abusive      0.781     0.778     0.779      1202

avg / total      0.859     0.859     0.859      3756



## Train Sexism-racism classifier(second-step)

In [15]:
from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.pipeline import Pipeline

from model.helper import calculate_metrics
import numpy as np
from data.preprocess import load_from_file

In [16]:
original_data = load_from_file("waasem", ["sexism", "racism"])

loaded preprocessed tweets for sexism:3152
loaded preprocessed tweets for racism:1649
loaded preprocessed tweets for sexism:394
loaded preprocessed tweets for racism:206
loaded preprocessed tweets for sexism:394
loaded preprocessed tweets for racism:207


In [17]:
data = {}
for split in original_data.keys():
    x = "x_" + split
    y = "y_" + split
    data[x] = None
    data[y] = []
    for i, label in enumerate(["sexism", "racism"]):
        _data = original_data[split][label]
        if data[x] is not None:
            data[x] += _data
        else:
            data[x] = _data
        print("split:%s, label:%s" % (split, label))
        data[y] += [i+1 for _ in range(len(_data))]

split:train, label:sexism
split:train, label:racism
split:test, label:sexism
split:test, label:racism
split:valid, label:sexism
split:valid, label:racism


In [18]:
data["x_train"] = [" ".join(str(v) for v in row) for row in data["x_train"]]
data["x_train"][0]

'kat is actually psychotic . off mkr into a psychiatric ward mkr'

In [19]:
data["x_test"] = [" ".join(str(v) for v in row) for row in data["x_test"]]
data["x_test"][0]

": i'm not sexist , but women are inferior . proving that you can still be an idiot regardless of your …"

In [20]:
data["x_valid"] = [" ".join(str(v) for v in row) for row in data["x_valid"]]
data["x_valid"][0]

"thk u for standing up to the feminazi bullies . checked out your music and it's pretty fucking sweet . hope to see you in nj ."

In [21]:
len(data["x_test"]), len(data["y_test"])

(601, 601)

In [22]:
text_clf = Pipeline([('vect', CountVectorizer(ngram_range=(2,5), analyzer="char")),
                ('tfidf', TfidfTransformer()),
                ('clf', LogisticRegression(solver="sag"))])    


In [23]:
text_clf.fit(data["x_train"], data["y_train"])

Pipeline(steps=[('vect', CountVectorizer(analyzer='char', binary=False, decode_error='strict',
        dtype=<class 'numpy.int64'>, encoding='utf-8', input='content',
        lowercase=True, max_df=1.0, max_features=None, min_df=1,
        ngram_range=(2, 5), preprocessor=None, stop_words=None,
        strip... penalty='l2', random_state=None, solver='sag', tol=0.0001,
          verbose=0, warm_start=False))])

In [24]:
train_preds = text_clf.predict(data["x_train"])

In [25]:
print(classification_report(data["y_train"], train_preds, 
                            digits=3, target_names=["sexism", "racism"]))


             precision    recall  f1-score   support

     sexism      0.957     0.998     0.977      3152
     racism      0.996     0.914     0.953      1649

avg / total      0.970     0.969     0.969      4801



In [26]:
test_preds = text_clf.predict(data["x_test"])
print(classification_report(data["y_test"], test_preds, 
                            digits=3, target_names=["sexism", "racism"]))


             precision    recall  f1-score   support

     sexism      0.927     0.992     0.958       394
     racism      0.983     0.850     0.912       207

avg / total      0.946     0.943     0.942       601



## Evaluate Two-step classification

In [40]:
from data_helper import load_waasem, load_multiclass
from keras.utils import to_categorical

In [38]:
data_word_d = load_waasem("./data/word_outputs/")
data_char_d = load_waasem("./data/char_outputs/")

In [29]:
raw_data = load_from_file("waasem", ["none", "sexism", "racism"])

loaded preprocessed tweets for none:10209
loaded preprocessed tweets for sexism:3152
loaded preprocessed tweets for racism:1649
loaded preprocessed tweets for none:1276
loaded preprocessed tweets for sexism:394
loaded preprocessed tweets for racism:206
loaded preprocessed tweets for none:1277
loaded preprocessed tweets for sexism:394
loaded preprocessed tweets for racism:207


In [30]:
with open("./data/word_outputs/vocab.pkl", "rb") as f:
    vocab = pickle.load(f)

In [31]:
[" ".join([vocab["id2word"][token] for token in row]) for row in data_word_d["test"]["sexism"][-5:]]

['how annoying is this kat on mkr PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD',
 "maybe you don't like it that much . id like to consider myself open minded . and not sexist . but ... female ufc ? what in the fuck PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD",
 ': call me sexist all you want , but from stories / seeing this kind of stuff go down , women as a whole need to change how they co … PAD PAD PAD PAD PAD PAD PAD PAD PAD',
 'men\'s " logic " , ladies and UNK i don\'t UNK women over men . i\'m not sexist therefore i\'m not a feminist . PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD',
 'i hate the blameonenotall campaign i know not all men are rapists but all men benefit from male privilege and all men are guilty of sexism PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD PAD']

In [32]:
[" ".join(row) for row in raw_data["test"]["sexism"][-5:]]

['how annoying is this kat on mkr',
 "maybe you don't like it that much . id like to consider myself open minded . and not sexist . but ... female ufc ? what in the fuck",
 ': call me sexist all you want , but from stories / seeing this kind of stuff go down , women as a whole need to change how they co …',
 'men\'s " logic " , ladies and gentlemen i don\'t prioritize women over men . i\'m not sexist therefore i\'m not a feminist .',
 'i hate the blameonenotall campaign i know not all men are rapists but all men benefit from male privilege and all men are guilty of sexism']

In [42]:
x_word = None
x_char = None
x_text = []
y_test = []
for i, label in enumerate(["none", "sexism", "racism"]):
    assert len(data_word_d["test"][label]) == len(data_char_d["test"][label]) 
    assert len(data_word_d["test"][label]) == len(raw_data["test"][label])
    if x_word is None:
        x_word = data_word_d["test"][label]
    else:
        x_word = np.vstack((x_word, data_word_d["test"][label]))
    print(x_word.shape)
    
    if x_char is None:
        x_char = data_char_d["test"][label]
    else:
        x_char = np.vstack((x_char, data_char_d["test"][label]))
    print(x_char.shape)
    
    x_text += raw_data["test"][label]
    
    y_test += [i for _ in range(len(raw_data["test"][label]))]
y_test = to_categorical(y_test)

(1277, 40)
(1277, 140, 70)
(1671, 40)
(1671, 140, 70)
(1878, 40)
(1878, 140, 70)


### Waasem Word

In [57]:
model = load_model("./logs/waasem/word/weights.03.hdf5")
preds = np.argmax(model.predict(x_word, batch_size=128), axis=1)
for i, pred in enumerate(preds):
    if pred != 0:
        preds[i] = text_clf.predict([" ".join(x_text[i])])
print(classification_report(np.argmax(y_test, axis=1), preds, 
                            digits=3, target_names=["none", "sexism", "racism"]))
K.clear_session()

             precision    recall  f1-score   support

       none      0.855     0.908     0.880      1277
     sexism      0.745     0.622     0.678       394
     racism      0.767     0.715     0.740       207

avg / total      0.822     0.826     0.822      1878



### Waasem Hybrid

In [59]:
model = load_model("./logs/waasem/hybrid/weights.04.hdf5")
preds = np.argmax(model.predict([x_char, x_word], batch_size=128), axis=1)
for i, pred in enumerate(preds):
    if pred != 0:
        preds[i] = text_clf.predict([" ".join(x_text[i])])
print(classification_report(np.argmax(y_test, axis=1), preds, 
                            digits=3, target_names=["none", "sexism", "racism"]))
K.clear_session()

             precision    recall  f1-score   support

       none      0.861     0.908     0.884      1277
     sexism      0.732     0.632     0.678       394
     racism      0.786     0.729     0.757       207

avg / total      0.826     0.830     0.827      1878



### Davidson Word

In [60]:
model = load_model("./logs/davidson/word/weights.01.hdf5")
preds = np.argmax(model.predict(x_word, batch_size=128), axis=1)
for i, pred in enumerate(preds):
    if pred != 0:
        preds[i] = text_clf.predict([" ".join(x_text[i])])
print(classification_report(np.argmax(y_test, axis=1), preds, 
                            digits=3, target_names=["none", "sexism", "racism"]))
K.clear_session()

             precision    recall  f1-score   support

       none      0.839     0.900     0.868      1277
     sexism      0.704     0.574     0.632       394
     racism      0.761     0.691     0.724       207

avg / total      0.802     0.808     0.803      1878



### Davidson Hybrid

In [61]:
model = load_model("./logs/davidson/hybrid/weights.02.hdf5")
preds = np.argmax(model.predict([x_char, x_word], batch_size=128), axis=1)
for i, pred in enumerate(preds):
    if pred != 0:
        preds[i] = text_clf.predict([" ".join(x_text[i])])
print(classification_report(np.argmax(y_test, axis=1), preds, 
                            digits=3, target_names=["none", "sexism", "racism"]))
K.clear_session()

             precision    recall  f1-score   support

       none      0.831     0.908     0.868      1277
     sexism      0.711     0.569     0.632       394
     racism      0.775     0.633     0.697       207

avg / total      0.800     0.806     0.799      1878

