In [1]:
# IMPORTS 
import os
import numpy as np
import random as rn
import pandas as pd
from hate_bert_helper import *
import warnings
warnings.filterwarnings("ignore")

In [2]:
# PARAMETERS SETUP

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
n_gpu = torch.cuda.device_count()
print(torch.cuda.get_device_name(0))

rn.seed(501)
np.random.seed(501)
torch.manual_seed(501)
torch.cuda.manual_seed(501)


MAX_LEN = 128 # max lengrh of a sentence, fed into the network
hatebert_model_path = "./models/hate_bert"

NVIDIA GeForce GTX 980 Ti


# Experiments setup

In [3]:
dataset_definitions = [
    #TOXIC
    [(1, "toxic", "other"),
    (1, "severe_toxic", "other")],
    #HATE
    [(1, "identity_hate", "other"),
    (2, "hate_speech", "neither"),
    (3, "hate", "none"),
    (7, "hateful", "normal"),
    (10, "hateful", "none"),
    (11, "hateful", "none"),
    (12, "hate", "noHate"),
    (16, "hateful", "non-hateful"),
    (18, "hateful", "normal"),
    (21, "hatespeech", "normal"),
    (25, "hate", "nothate")],
    #ABUSIVE
    [(7, "abusive", "normal"),
    (18, "abusive", "normal")],
    #AGGRESSIVE
    [(17, "covertly-aggressive", "non-aggressive"),
    (17, "overtly-aggressive", "non-aggressive")],
    #OFFENSIVE
    [(2, "offensive_language", "neither"),
    (3, "offensive", "none"),
    (15, "offensive", "non-offensive"),
    (21, "offensive", "normal")],
    #SEXISM
    [(4, "sexism", "none"),
    (9, "sexist", "none"),
    (29, "sexism", "none"),
    (30, "sexism", "neither")],
    #CYBERBULLYING
    [(6, "cyberbullying", "none"),
    (28, "cyberbullying", "none")],
    #SPAM
    [(7, "spam", "normal"),
    (18, "spam", "normal")],
    #HARRASMENT
    [(19, "harrasment", "non-harrasment")],
    #OBSCENE
    [(1, "obscene", "other")],
    #INSULT
    [(1, "insult", "other")],
    #HOMOPHOBIA
    [(9, "homophobic", "none")],
    #RACIST
    [(9, "racist", "none")],
    #VULGAR
    [(27, "vulgar", "non-vulgar")],
    #THREAT
    [(1, "threat", "other")],
    #PROFANE
    [(3, "profane", "none")]
]

In [4]:
DATASETS_PATH = "outputs/full_classification_dataset.csv"
datasets = pd.read_csv(DATASETS_PATH)
datasets

Unnamed: 0,corpus_id,text,label
0,30,Cisco had to deal with a fat cash payout to th...,neither
1,30,"@MadamPlumpette I'm decent at editing, no worr...",neither
2,30,@girlziplocked will read. gotta go afk for a b...,neither
3,30,guys. show me the data. show me your github. t...,neither
4,30,@tpw_rules nothings broken. I was just driving...,neither
...,...,...,...
781481,30,via @weaselzippers: Feminazi Blog Reminds Libe...,sexism
781482,30,I used to have pet bunnies. :) I named them PO...,neither
781483,30,@alex SO GROSS. feeling the urge to shower in ...,neither
781484,30,Purpose of this group is to share the types of...,neither


In [5]:
for dataset_definition_group in dataset_definitions:
    for definition in dataset_definition_group: 
        corpus_id = definition[0]
        pos_label = definition[1]
        neg_label = definition[2]
        dataset = extract_dataset(datasets, corpus_id, pos_label, neg_label)
        print(f"Training corpus {corpus_id}-{pos_label} ...", end = "")
        model_path = "./models/finetuned_model_"+str(corpus_id)+"_"+str(pos_label)
        if os.path.exists(model_path):
            print(" ... model already exists!")
        else:
            # Training model
            train_and_save(device, 
                       MAX_LEN, 
                       hatebert_model_path, 
                       model_path, 
                       dataset["text"].values, 
                       dataset["label"].values, 
                       pos_label)

Training corpus 1-toxic ... ... model already exists!
Training corpus 1-severe_toxic ... ... model already exists!
Training corpus 1-identity_hate ... ... model already exists!
Training corpus 2-hate_speech ... ... model already exists!
Training corpus 3-hate ... ... model already exists!
Training corpus 7-hateful ... ... model already exists!
Training corpus 10-hateful ... ... model already exists!
Training corpus 11-hateful ... ... model already exists!
Training corpus 12-hate ... ... model already exists!
Training corpus 16-hateful ... ... model already exists!
Training corpus 18-hateful ... ... model already exists!
Training corpus 21-hatespeech ... ... model already exists!
Training corpus 25-hate ... ... model already exists!
Training corpus 7-abusive ... ... model already exists!
Training corpus 18-abusive ... ... model already exists!
Training corpus 17-covertly-aggressive ... ... model already exists!
Training corpus 17-overtly-aggressive ... ... model already exists!
Training

t_total value of -1 results in schedule not being applied
Epoch:   0%|          | 0/3 [00:00<?, ?it/s]

Train loss: 0.03316373263610117


Epoch:  33%|███▎      | 1/3 [24:43<49:27, 1483.77s/it]

Validation Accuracy: 0.9896232876712329
Train loss: 0.02878951472186183


Epoch:  67%|██████▋   | 2/3 [49:38<24:49, 1489.99s/it]

Validation Accuracy: 0.9896232876712329
Train loss: 0.02647990024432802


Epoch: 100%|██████████| 3/3 [1:14:33<00:00, 1491.26s/it]

Validation Accuracy: 0.9896232876712329
Accuracy: 98.97%
F1 micro: 98.97%
F1 macro: 49.74%
Precission: 0.00%
Recall: 0.00%
              precision    recall  f1-score   support

           0       1.00      0.99      0.99     17502
           1       0.00      0.00      0.00         0

    accuracy                           0.99     17502
   macro avg       0.50      0.49      0.50     17502
weighted avg       1.00      0.99      0.99     17502

[[17322   180]
 [    0     0]]





Training corpus 29-sexism ... ... model already exists!
Training corpus 30-sexism ... ... model already exists!
Training corpus 6-cyberbullying ... ... model already exists!
Training corpus 28-cyberbullying ... ... model already exists!
Training corpus 7-spam ... ... model already exists!
Training corpus 18-spam ... ... model already exists!
Training corpus 19-harrasment ... ... model already exists!
Training corpus 1-obscene ... ... model already exists!
Training corpus 1-insult ... ... model already exists!
Training corpus 9-homophobic ...0.0M
0.0M


t_total value of -1 results in schedule not being applied
Epoch:   0%|          | 0/3 [00:00<?, ?it/s]

Train loss: 0.0307939641860253


Epoch:  33%|███▎      | 1/3 [25:04<50:09, 1504.65s/it]

Validation Accuracy: 0.9864809782608696
Train loss: 0.02649940532689865


Epoch:  67%|██████▋   | 2/3 [50:12<25:06, 1506.77s/it]

Validation Accuracy: 0.9886209239130435
Train loss: 0.02454336822475902


Epoch: 100%|██████████| 3/3 [1:15:23<00:00, 1507.76s/it]

Validation Accuracy: 0.9880095108695651
Accuracy: 98.78%
F1 micro: 98.78%
F1 macro: 78.26%
Precission: 50.53%
Recall: 65.75%
              precision    recall  f1-score   support

           0       1.00      0.99      0.99     17427
           1       0.51      0.66      0.57       219

    accuracy                           0.99     17646
   macro avg       0.75      0.82      0.78     17646
weighted avg       0.99      0.99      0.99     17646

[[17286   141]
 [   75   144]]





Training corpus 9-racist ...0.0M
0.0M


t_total value of -1 results in schedule not being applied
Epoch:   0%|          | 0/3 [00:00<?, ?it/s]

Train loss: 0.09857102306129413


Epoch:  33%|███▎      | 1/3 [25:25<50:51, 1525.62s/it]

Validation Accuracy: 0.9761677665498041
Train loss: 0.09082476336241113


Epoch:  67%|██████▋   | 2/3 [50:54<25:27, 1527.50s/it]

Validation Accuracy: 0.9750850690864096
Train loss: 0.08304651832154601


Epoch: 100%|██████████| 3/3 [1:16:26<00:00, 1528.87s/it]

Validation Accuracy: 0.9742859352443803
Accuracy: 97.52%
F1 micro: 97.52%
F1 macro: 72.78%
Precission: 40.88%
Recall: 54.78%
              precision    recall  f1-score   support

           0       0.99      0.98      0.99     17539
           1       0.41      0.55      0.47       356

    accuracy                           0.98     17895
   macro avg       0.70      0.77      0.73     17895
weighted avg       0.98      0.98      0.98     17895

[[17257   282]
 [  161   195]]





Training corpus 27-vulgar ... ... model already exists!
Training corpus 1-threat ... ... model already exists!
Training corpus 3-profane ... ... model already exists!


In [None]:
sys.stdout = open("outputs/std_output_single_domain.txt", "w")
# Check within single domains

for dataset_definitions_list in dataset_definitions[0:8]:
    run_pairwise_analysis(device, 
                          MAX_LEN, 
                          datasets, 
                          dataset_definitions_list,
                          dataset_definitions_list)
            
sys.stdout.close()

In [None]:
sys.stdout = open("outputs/std_output_across_domain.txt", "w")
# Check across domains

print("Evaluation: \n")
sys.stdout.flush()

dataset_definitions_list_trained = [
    #TOXIC
    (1, "severe_toxic", "other"),
    #HATE
    (2, "hate_speech", "neither"),
    #ABUSIVE
    (7, "abusive", "normal"),
    #AGGRESSIVE
    (17, "covertly-aggressive", "non-aggressive"),
    #OFFENSIVE
    (15, "offensive", "non-offensive"),
    #SEXISM
    (29, "sexism", "none"),
    #CYBERBULLYING
    (6, "cyberbullying", "none"),
    #SPAM
    (18, "spam", "normal"),
    #HARRASMENT
    (19, "harrasment", "non-harrasment"),
    #OBSCENE
    (1, "obscene", "other"),
    #INSULT
    (1, "insult", "other"),
    #HOMOPHOBIA
    (9, "homophobic", "none"),
    #RACIST
    (9, "racist", "none"),
    #VULGAR
    (27, "vulgar", "non-vulgar"),
    #THREAT
    (1, "threat", "other"),
    #PROFANE
    (3, "profane", "none")
]

dataset_definitions_list_to_test = [
    #TOXIC
    (1, "severe_toxic", "other"),
    #HATE
    (2, "hate_speech", "neither"),
    #ABUSIVE
    (7, "abusive", "normal"),
    #AGGRESSIVE
    (17, "covertly-aggressive", "non-aggressive"),
    #OFFENSIVE
    (15, "offensive", "non-offensive"),
    #SEXISM
    (29, "sexism", "none"),
    #CYBERBULLYING
    (6, "cyberbullying", "none"),
    #SPAM
    (18, "spam", "normal"),
    #HARRASMENT
    (19, "harrasment", "non-harrasment"),
    #OBSCENE
    (1, "obscene", "other"),
    #INSULT
    (1, "insult", "other"),
    #HOMOPHOBIA
    (9, "homophobic", "none"),
    #RACIST
    (9, "racist", "none"),
    #VULGAR
    (27, "vulgar", "non-vulgar"),
    #THREAT
    (1, "threat", "other"),
    #PROFANE
    (3, "profane", "none")
]

run_pairwise_analysis(device, 
                      MAX_LEN, 
                      datasets, 
                      dataset_definitions_list_trained,
                      dataset_definitions_list_to_test
                     )

sys.stdout.close()