In [1]:
from utils import *
import numpy as np
import torch, os
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
set_random_seed(3)

Seed: 3


In [3]:
# def scaling_embeddings(embeddings):
#     for i in range(len(embeddings)):
#         embeddings[i] = StandardScaler().fit_transform(embeddings[i])
#     return embeddings

def reshape_embeddings(embeddings):
    if embeddings.shape[1] == 1:
        embeddings = embeddings.squeeze(1)
    return embeddings

In [4]:
def interleave_2_embeddings(embeddings1, embeddings2):
    embeddings1 = reshape_embeddings(embeddings1)
    embeddings2 = reshape_embeddings(embeddings2)
    new_embeddings = []
    for i in range(len(embeddings1)):
        embedding = []
        for j in range(len(embeddings1[i])):
            embedding.append(embeddings1[i][j])
            embedding.append(embeddings2[i][j])
        new_embeddings.append(np.array(embedding))
    return np.array(new_embeddings)
    

def interleave_3_embeddings(embeddings1, embeddings2, embeddings3):
    embeddings1 = reshape_embeddings(embeddings1)
    embeddings2 = reshape_embeddings(embeddings2)
    embeddings3 = reshape_embeddings(embeddings3)
    new_embeddings = []
    for i in range(len(embeddings1)):
        embedding = []
        for j in range(len(embeddings1[i])):
            embedding.append(embeddings1[i][j])
            embedding.append(embeddings2[i][j])
            embedding.append(embeddings3[i][j])
        new_embeddings.append(np.array(embedding))
    return np.array(new_embeddings)

In [5]:
bert_hatebert_interleaved_dynahate_train = interleave_2_embeddings(bert_dynahate_train_embeddings, hatebert_dynahate_train_embeddings)
bert_hatebert_interleaved_dynahate_dev = interleave_2_embeddings(bert_dynahate_dev_embeddings, hatebert_dynahate_dev_embeddings)
bert_hatebert_interleaved_dynahate_test = interleave_2_embeddings(bert_dynahate_test_embeddings, hatebert_dynahate_test_embeddings)

bert_bertweet_interleaved_dynahate_train = interleave_2_embeddings(bert_dynahate_train_embeddings, bertweet_dynahate_train_embeddings)
bert_bertweet_interleaved_dynahate_dev = interleave_2_embeddings(bert_dynahate_dev_embeddings, bertweet_dynahate_dev_embeddings)
bert_bertweet_interleaved_dynahate_test = interleave_2_embeddings(bert_dynahate_test_embeddings, bertweet_dynahate_test_embeddings)

hatebert_bertweet_interleaved_dynahate_train = interleave_2_embeddings(hatebert_dynahate_train_embeddings, bertweet_dynahate_train_embeddings)
hatebert_bertweet_interleaved_dynahate_dev = interleave_2_embeddings(hatebert_dynahate_dev_embeddings, bertweet_dynahate_dev_embeddings)
hatebert_bertweet_interleaved_dynahate_test = interleave_2_embeddings(hatebert_dynahate_test_embeddings, bertweet_dynahate_test_embeddings)

bert_hatebert_bertweet_interleaved_dynahate_train = interleave_3_embeddings(bert_dynahate_train_embeddings, hatebert_dynahate_train_embeddings, bertweet_dynahate_train_embeddings)
bert_hatebert_bertweet_interleaved_dynahate_dev = interleave_3_embeddings(bert_dynahate_dev_embeddings, hatebert_dynahate_dev_embeddings, bertweet_dynahate_dev_embeddings)
bert_hatebert_bertweet_interleaved_dynahate_test = interleave_3_embeddings(bert_dynahate_test_embeddings, hatebert_dynahate_test_embeddings, bertweet_dynahate_test_embeddings)

bert_hatebert_interleaved_olid_train = interleave_2_embeddings(bert_olid_train_embeddings, hatebert_olid_train_embeddings)
bert_hatebert_interleaved_olid_dev = interleave_2_embeddings(bert_olid_dev_embeddings, hatebert_olid_dev_embeddings)
bert_hatebert_interleaved_olid_test = interleave_2_embeddings(bert_olid_test_embeddings, hatebert_olid_test_embeddings)

bert_bertweet_interleaved_olid_train = interleave_2_embeddings(bert_olid_train_embeddings, bertweet_olid_train_embeddings)
bert_bertweet_interleaved_olid_dev = interleave_2_embeddings(bert_olid_dev_embeddings, bertweet_olid_dev_embeddings)
bert_bertweet_interleaved_olid_test = interleave_2_embeddings(bert_olid_test_embeddings, bertweet_olid_test_embeddings)

hatebert_bertweet_interleaved_olid_train = interleave_2_embeddings(hatebert_olid_train_embeddings, bertweet_olid_train_embeddings)
hatebert_bertweet_interleaved_olid_dev = interleave_2_embeddings(hatebert_olid_dev_embeddings, bertweet_olid_dev_embeddings)
hatebert_bertweet_interleaved_olid_test = interleave_2_embeddings(hatebert_olid_test_embeddings, bertweet_olid_test_embeddings)

bert_hatebert_bertweet_interleaved_olid_train = interleave_3_embeddings(bert_olid_train_embeddings, hatebert_olid_train_embeddings, bertweet_olid_train_embeddings)
bert_hatebert_bertweet_interleaved_olid_dev = interleave_3_embeddings(bert_olid_dev_embeddings, hatebert_olid_dev_embeddings, bertweet_olid_dev_embeddings)
bert_hatebert_bertweet_interleaved_olid_test = interleave_3_embeddings(bert_olid_test_embeddings, hatebert_olid_test_embeddings, bertweet_olid_test_embeddings)

bert_hatebert_interleaved_latenthatred_train = interleave_2_embeddings(bert_latenthatred_train_embeddings, hatebert_latenthatred_train_embeddings)
bert_hatebert_interleaved_latenthatred_dev = interleave_2_embeddings(bert_latenthatred_dev_embeddings, hatebert_latenthatred_dev_embeddings)
bert_hatebert_interleaved_latenthatred_test = interleave_2_embeddings(bert_latenthatred_test_embeddings, hatebert_latenthatred_test_embeddings)

bert_bertweet_interleaved_latenthatred_train = interleave_2_embeddings(bert_latenthatred_train_embeddings, bertweet_latenthatred_train_embeddings)
bert_bertweet_interleaved_latenthatred_dev = interleave_2_embeddings(bert_latenthatred_dev_embeddings, bertweet_latenthatred_dev_embeddings)
bert_bertweet_interleaved_latenthatred_test = interleave_2_embeddings(bert_latenthatred_test_embeddings, bertweet_latenthatred_test_embeddings)

hatebert_bertweet_interleaved_latenthatred_train = interleave_2_embeddings(hatebert_latenthatred_train_embeddings, bertweet_latenthatred_train_embeddings)
hatebert_bertweet_interleaved_latenthatred_dev = interleave_2_embeddings(hatebert_latenthatred_dev_embeddings, bertweet_latenthatred_dev_embeddings)
hatebert_bertweet_interleaved_latenthatred_test = interleave_2_embeddings(hatebert_latenthatred_test_embeddings, bertweet_latenthatred_test_embeddings)

bert_hatebert_bertweet_interleaved_latenthatred_train = interleave_3_embeddings(bert_latenthatred_train_embeddings, hatebert_latenthatred_train_embeddings, bertweet_latenthatred_train_embeddings)
bert_hatebert_bertweet_interleaved_latenthatred_dev = interleave_3_embeddings(bert_latenthatred_dev_embeddings, hatebert_latenthatred_dev_embeddings, bertweet_latenthatred_dev_embeddings)
bert_hatebert_bertweet_interleaved_latenthatred_test = interleave_3_embeddings(bert_latenthatred_test_embeddings, hatebert_latenthatred_test_embeddings, bertweet_latenthatred_test_embeddings)

In [6]:
dynahate_labels_train = process_labels(read_labels("dynahate", "train"))
dynahate_labels_dev = process_labels(read_labels("dynahate", "dev"))
dynahate_labels_test = process_labels(read_labels("dynahate", "test"))

latenthatred_labels_train = read_labels("latenthatred", "train")
latenthatred_labels_dev = read_labels("latenthatred", "dev")
latenthatred_labels_test = read_labels("latenthatred", "test")

olid_labels_train = read_labels("olid", "train")
olid_labels_dev = read_labels("olid", "dev")
olid_labels_test = read_labels("olid", "test")

In [7]:
mlp = MLPClassifier(random_state=3)
gridsearch = GridSearchCV(
    mlp,
    param_grid={
        "hidden_layer_sizes": [(128), (128,64)],
        "activation": ["relu"],
        "solver": ["adam"],
        "learning_rate_init": [0.001, 0.0001],
        "learning_rate": ["adaptive"],
        "early_stopping": [True],
        "max_iter": [10000]
    },
    verbose=4,
    n_jobs=os.cpu_count()//3,
    cv=5,
)

In [8]:
dynahate_labels_train_dev = np.concatenate((dynahate_labels_train, dynahate_labels_dev))
latenthatred_labels_train_dev = np.concatenate((latenthatred_labels_train, latenthatred_labels_dev))
olid_labels_train_dev = np.concatenate((olid_labels_train, olid_labels_dev))

### BERT-BERTweet

#### DynaHate

In [9]:
bert_bertweet_interleaved_dynahate_train_dev = np.concatenate((bert_bertweet_interleaved_dynahate_train, bert_bertweet_interleaved_dynahate_dev))
bert_bertweet_interleaved_dynahate_labels_train_dev = np.concatenate((dynahate_labels_train, dynahate_labels_dev))

grid_results = gridsearch.fit(bert_bertweet_interleaved_dynahate_train_dev, bert_bertweet_interleaved_dynahate_labels_train_dev)

best_params = grid_results.best_params_
mlp = grid_results.best_estimator_

print("Best params: ", best_params)

train_dev_preds = mlp.predict(bert_bertweet_interleaved_dynahate_train_dev)
test_preds = mlp.predict(bert_bertweet_interleaved_dynahate_test)

computeAllScores(train_dev_preds, test_preds, dynahate_labels_train_dev, dynahate_labels_test, "Results/bert_bertweet_interleaved_dynahate")
save_model(mlp, "Saves/bert_bertweet_interleaved_dynahate")

Fitting 5 folds for each of 4 candidates, totalling 20 fits
Best params:  {'activation': 'relu', 'early_stopping': True, 'hidden_layer_sizes': (128, 64), 'learning_rate': 'adaptive', 'learning_rate_init': 0.001, 'max_iter': 10000, 'solver': 'adam'}
Accuracy Train Dev:  0.7227203975799481
Accuracy Test:  0.6815533980582524
Weighted F1 Train Dev:  0.7216690978854313
Weighted F1 Test:  0.6805224119331919
Macro F1 Train Dev:  0.7193522525870887
Macro F1 Test:  0.6763984674329502
Micro F1 Train Dev:  0.7227203975799482
Micro F1 Test:  0.6815533980582524
Weighted Recall Train Dev:  0.7227203975799481
Weighted Recall Test:  0.6815533980582524
Macro Recall Train Dev:  0.7185453839585245
Macro Recall Test:  0.675698325086374
Micro Recall Train Dev:  0.7227203975799481
Micro Recall Test:  0.6815533980582524
Confusion Matrix Train Dev: 
[[11351  5766]
 [ 4500 15407]]
Confusion Matrix Test: 
[[1144  708]
 [ 604 1664]]


#### LatentHatred

In [10]:
bert_bertweet_interleaved_latenthatred_train_dev = np.concatenate((bert_bertweet_interleaved_latenthatred_train, bert_bertweet_interleaved_latenthatred_dev))
bert_bertweet_interleaved_latenthatred_labels_train_dev = np.concatenate((latenthatred_labels_train, latenthatred_labels_dev))

grid_results = gridsearch.fit(bert_bertweet_interleaved_latenthatred_train_dev, bert_bertweet_interleaved_latenthatred_labels_train_dev)

best_params = grid_results.best_params_
mlp = grid_results.best_estimator_

print("Best params: ", best_params)

train_dev_preds = mlp.predict(bert_bertweet_interleaved_latenthatred_train_dev)
test_preds = mlp.predict(bert_bertweet_interleaved_latenthatred_test)

computeAllScores(train_dev_preds, test_preds, latenthatred_labels_train_dev, latenthatred_labels_test, "Results/bert_bertweet_interleaved_latenthatred")
save_model(mlp, "Saves/bert_bertweet_interleaved_latenthatred")

Fitting 5 folds for each of 4 candidates, totalling 20 fits
Best params:  {'activation': 'relu', 'early_stopping': True, 'hidden_layer_sizes': 128, 'learning_rate': 'adaptive', 'learning_rate_init': 0.001, 'max_iter': 10000, 'solver': 'adam'}
Accuracy Train Dev:  0.6987585350713842
Accuracy Test:  0.6864059590316574
Weighted F1 Train Dev:  0.6801955007408066
Weighted F1 Test:  0.6684773945150163
Macro F1 Train Dev:  0.4645488094132229
Macro F1 Test:  0.45374594714729916
Micro F1 Train Dev:  0.6987585350713842
Micro F1 Test:  0.6864059590316574
Weighted Recall Train Dev:  0.6987585350713842
Weighted Recall Test:  0.6864059590316574
Macro Recall Train Dev:  0.47111815904853915
Macro Recall Test:  0.4596296313962485
Micro Recall Train Dev:  0.6987585350713842
Micro Recall Test:  0.6864059590316574
Confusion Matrix Train Dev: 
[[8134 1807    4]
 [2233 3112    2]
 [ 320  487   11]]
Confusion Matrix Test: 
[[2711  633    2]
 [ 779  971    3]
 [  97  170    4]]


#### OLID

In [11]:
bert_bertweet_interleaved_olid_train_dev = np.concatenate((bert_bertweet_interleaved_olid_train, bert_bertweet_interleaved_olid_dev))
bert_bertweet_interleaved_olid_labels_train_dev = np.concatenate((olid_labels_train, olid_labels_dev))

grid_results = gridsearch.fit(bert_bertweet_interleaved_olid_train_dev, bert_bertweet_interleaved_olid_labels_train_dev)

best_params = grid_results.best_params_
mlp = grid_results.best_estimator_

print("Best params: ", best_params)

train_dev_preds = mlp.predict(bert_bertweet_interleaved_olid_train_dev)
test_preds = mlp.predict(bert_bertweet_interleaved_olid_test)

computeAllScores(train_dev_preds, test_preds, olid_labels_train_dev, olid_labels_test, "Results/bert_bertweet_interleaved_olid")

save_model(mlp, "Saves/bert_bertweet_interleaved_olid")

Fitting 5 folds for each of 4 candidates, totalling 20 fits
Best params:  {'activation': 'relu', 'early_stopping': True, 'hidden_layer_sizes': 128, 'learning_rate': 'adaptive', 'learning_rate_init': 0.001, 'max_iter': 10000, 'solver': 'adam'}
Accuracy Train Dev:  0.7698640483383686
Accuracy Test:  0.8162790697674419
Weighted F1 Train Dev:  0.7612392373084431
Weighted F1 Test:  0.7979791062887301
Macro F1 Train Dev:  0.7230596152018938
Macro F1 Test:  0.7311138551165146
Micro F1 Train Dev:  0.7698640483383686
Micro F1 Test:  0.8162790697674419
Weighted Recall Train Dev:  0.7698640483383686
Weighted Recall Test:  0.8162790697674419
Macro Recall Train Dev:  0.7120809337721102
Macro Recall Test:  0.7053091397849462
Micro Recall Train Dev:  0.7698640483383686
Micro Recall Test:  0.8162790697674419
Confusion Matrix Train Dev: 
[[7818 1022]
 [2025 2375]]
Confusion Matrix Test: 
[[593  27]
 [131 109]]


### BERT-HateBERT

#### DynaHate

In [12]:
bert_hatebert_interleaved_dynahate_train_dev = np.concatenate((bert_hatebert_interleaved_dynahate_train, bert_hatebert_interleaved_dynahate_dev))
bert_hatebert_interleaved_dynahate_labels_train_dev = np.concatenate((dynahate_labels_train, dynahate_labels_dev))

grid_results = gridsearch.fit(bert_hatebert_interleaved_dynahate_train_dev, bert_hatebert_interleaved_dynahate_labels_train_dev)

best_params = grid_results.best_params_
mlp = grid_results.best_estimator_

print("Best params: ", best_params)

train_dev_preds = mlp.predict(bert_hatebert_interleaved_dynahate_train_dev)
test_preds = mlp.predict(bert_hatebert_interleaved_dynahate_test)

computeAllScores(train_dev_preds, test_preds, dynahate_labels_train_dev, dynahate_labels_test, "Results/bert_hatebert_interleaved_dynahate")

save_model(mlp, "Saves/bert_hatebert_interleaved_dynahate")

Fitting 5 folds for each of 4 candidates, totalling 20 fits
Best params:  {'activation': 'relu', 'early_stopping': True, 'hidden_layer_sizes': 128, 'learning_rate': 'adaptive', 'learning_rate_init': 0.001, 'max_iter': 10000, 'solver': 'adam'}
Accuracy Train Dev:  0.7352798184961107
Accuracy Test:  0.6898058252427185
Weighted F1 Train Dev:  0.7353069432597991
Weighted F1 Test:  0.6902098912291883
Macro F1 Train Dev:  0.7338230591286184
Macro F1 Test:  0.6874987388038424
Micro F1 Train Dev:  0.7352798184961107
Micro F1 Test:  0.6898058252427185
Weighted Recall Train Dev:  0.7352798184961107
Weighted Recall Test:  0.6898058252427185
Macro Recall Train Dev:  0.7338723871282764
Macro Recall Test:  0.6881459007088957
Micro Recall Train Dev:  0.7352798184961107
Micro Recall Test:  0.6898058252427185
Confusion Matrix Train Dev: 
[[12242  4875]
 [ 4926 14981]]
Confusion Matrix Test: 
[[1244  608]
 [ 670 1598]]


#### LatentHatred

In [13]:
bert_hatebert_interleaved_latenthatred_train_dev = np.concatenate((bert_hatebert_interleaved_latenthatred_train, bert_hatebert_interleaved_latenthatred_dev))
bert_hatebert_interleaved_latenthatred_labels_train_dev = np.concatenate((latenthatred_labels_train, latenthatred_labels_dev))

grid_results = gridsearch.fit(bert_hatebert_interleaved_latenthatred_train_dev, bert_hatebert_interleaved_latenthatred_labels_train_dev)

best_params = grid_results.best_params_
mlp = grid_results.best_estimator_

print("Best params: ", best_params)

train_dev_preds = mlp.predict(bert_hatebert_interleaved_latenthatred_train_dev)
test_preds = mlp.predict(bert_hatebert_interleaved_latenthatred_test)

computeAllScores(train_dev_preds, test_preds, latenthatred_labels_train_dev, latenthatred_labels_test, "Results/bert_hatebert_interleaved_latenthatred")

save_model(mlp, "Saves/bert_hatebert_interleaved_latenthatred")

Fitting 5 folds for each of 4 candidates, totalling 20 fits
Best params:  {'activation': 'relu', 'early_stopping': True, 'hidden_layer_sizes': (128, 64), 'learning_rate': 'adaptive', 'learning_rate_init': 0.001, 'max_iter': 10000, 'solver': 'adam'}
Accuracy Train Dev:  0.6977032898820609
Accuracy Test:  0.6800744878957169
Weighted F1 Train Dev:  0.6828822705063753
Weighted F1 Test:  0.6660901892813772
Macro F1 Train Dev:  0.47695420496599744
Macro F1 Test:  0.4621395648396915
Micro F1 Train Dev:  0.6977032898820609
Micro F1 Test:  0.6800744878957169
Weighted Recall Train Dev:  0.6977032898820609
Weighted Recall Test:  0.6800744878957169
Macro Recall Train Dev:  0.4802515811671733
Macro Recall Test:  0.46547156249970206
Micro Recall Train Dev:  0.6977032898820609
Micro Recall Test:  0.6800744878957169
Confusion Matrix Train Dev: 
[[7948 1989    8]
 [2076 3267    4]
 [ 248  545   25]]
Confusion Matrix Test: 
[[2621  721    4]
 [ 729 1023    1]
 [  93  170    8]]


#### OLID

In [14]:
bert_hatebert_interleaved_olid_train_dev = np.concatenate((bert_hatebert_interleaved_olid_train, bert_hatebert_interleaved_olid_dev))
bert_hatebert_interleaved_olid_labels_train_dev = np.concatenate((olid_labels_train, olid_labels_dev))

grid_results = gridsearch.fit(bert_hatebert_interleaved_olid_train_dev, bert_hatebert_interleaved_olid_labels_train_dev)

best_params = grid_results.best_params_
mlp = grid_results.best_estimator_

print("Best params: ", best_params)

train_dev_preds = mlp.predict(bert_hatebert_interleaved_olid_train_dev)
test_preds = mlp.predict(bert_hatebert_interleaved_olid_test)

computeAllScores(train_dev_preds, test_preds, olid_labels_train_dev, olid_labels_test, "Results/bert_hatebert_interleaved_olid")

save_model(mlp, "Saves/bert_hatebert_interleaved_olid")

Fitting 5 folds for each of 4 candidates, totalling 20 fits
Best params:  {'activation': 'relu', 'early_stopping': True, 'hidden_layer_sizes': 128, 'learning_rate': 'adaptive', 'learning_rate_init': 0.001, 'max_iter': 10000, 'solver': 'adam'}
Accuracy Train Dev:  0.7705438066465257
Accuracy Test:  0.8046511627906977
Weighted F1 Train Dev:  0.7705175845084978
Weighted F1 Test:  0.7952057245080502
Macro F1 Train Dev:  0.7414110112974179
Macro F1 Test:  0.7353846153846154
Micro F1 Train Dev:  0.7705438066465257
Micro F1 Test:  0.8046511627906977
Weighted Recall Train Dev:  0.7705438066465257
Weighted Recall Test:  0.8046511627906977
Macro Recall Train Dev:  0.7413559234882765
Macro Recall Test:  0.7189516129032258
Micro Recall Train Dev:  0.7705438066465257
Micro Recall Test:  0.8046511627906977
Confusion Matrix Train Dev: 
[[7323 1517]
 [1521 2879]]
Confusion Matrix Test: 
[[566  54]
 [114 126]]


### BERTweet-HateBERT

#### DynaHate

In [15]:
hatebert_bertweet_interleaved_dynahate_train_dev = np.concatenate((hatebert_bertweet_interleaved_dynahate_train, hatebert_bertweet_interleaved_dynahate_dev))
hatebert_bertweet_interleaved_dynahate_labels_train_dev = np.concatenate((dynahate_labels_train, dynahate_labels_dev))

grid_results = gridsearch.fit(hatebert_bertweet_interleaved_dynahate_train_dev, hatebert_bertweet_interleaved_dynahate_labels_train_dev)

best_params = grid_results.best_params_
mlp = grid_results.best_estimator_

print("Best params: ", best_params)

train_dev_preds = mlp.predict(hatebert_bertweet_interleaved_dynahate_train_dev)
test_preds = mlp.predict(hatebert_bertweet_interleaved_dynahate_test)

computeAllScores(train_dev_preds, test_preds, dynahate_labels_train_dev, dynahate_labels_test, "Results/hatebert_bertweet_interleaved_dynahate")

save_model(mlp, "Saves/hatebert_bertweet_interleaved_dynahate")

Fitting 5 folds for each of 4 candidates, totalling 20 fits
Best params:  {'activation': 'relu', 'early_stopping': True, 'hidden_layer_sizes': 128, 'learning_rate': 'adaptive', 'learning_rate_init': 0.001, 'max_iter': 10000, 'solver': 'adam'}
Accuracy Train Dev:  0.7571305099394987
Accuracy Test:  0.7077669902912621
Weighted F1 Train Dev:  0.7560329167093773
Weighted F1 Test:  0.7067543611484072
Macro F1 Train Dev:  0.7539122206484088
Macro F1 Test:  0.7029247818445197
Micro F1 Train Dev:  0.7571305099394987
Micro F1 Test:  0.7077669902912621
Weighted Recall Train Dev:  0.7571305099394987
Weighted Recall Test:  0.7077669902912621
Macro Recall Train Dev:  0.7527876536862115
Macro Recall Test:  0.7020333611406326
Micro Recall Train Dev:  0.7571305099394987
Micro Recall Test:  0.7077669902912621
Confusion Matrix Train Dev: 
[[11899  5218]
 [ 3774 16133]]
Confusion Matrix Test: 
[[1195  657]
 [ 547 1721]]


#### LatentHatred

In [16]:
hatebert_bertweet_interleaved_latenthatred_train_dev = np.concatenate((hatebert_bertweet_interleaved_latenthatred_train, hatebert_bertweet_interleaved_latenthatred_dev))
hatebert_bertweet_interleaved_latenthatred_labels_train_dev = np.concatenate((latenthatred_labels_train, latenthatred_labels_dev))

grid_results = gridsearch.fit(hatebert_bertweet_interleaved_latenthatred_train_dev, hatebert_bertweet_interleaved_latenthatred_labels_train_dev)

best_params = grid_results.best_params_
mlp = grid_results.best_estimator_

print("Best params: ", best_params)

train_dev_preds = mlp.predict(hatebert_bertweet_interleaved_latenthatred_train_dev)
test_preds = mlp.predict(hatebert_bertweet_interleaved_latenthatred_test)

computeAllScores(train_dev_preds, test_preds, latenthatred_labels_train_dev, latenthatred_labels_test, "Results/hatebert_bertweet_interleaved_latenthatred")

save_model(mlp, "Saves/hatebert_bertweet_interleaved_latenthatred")

Fitting 5 folds for each of 4 candidates, totalling 20 fits
Best params:  {'activation': 'relu', 'early_stopping': True, 'hidden_layer_sizes': 128, 'learning_rate': 'adaptive', 'learning_rate_init': 0.001, 'max_iter': 10000, 'solver': 'adam'}
Accuracy Train Dev:  0.7139664804469273
Accuracy Test:  0.7013035381750465
Weighted F1 Train Dev:  0.6905020285045007
Weighted F1 Test:  0.6758550239597128
Macro F1 Train Dev:  0.49343848084889386
Macro F1 Test:  0.4769551071780082
Micro F1 Train Dev:  0.7139664804469273
Micro F1 Test:  0.7013035381750465
Weighted Recall Train Dev:  0.7139664804469273
Weighted Recall Test:  0.7013035381750465
Macro Recall Train Dev:  0.4827313504846357
Macro Recall Test:  0.4672226046765739
Micro Recall Train Dev:  0.7139664804469273
Micro Recall Test:  0.7013035381750465
Confusion Matrix Train Dev: 
[[8740 1181   24]
 [2608 2711   28]
 [ 369  398   51]]
Confusion Matrix Test: 
[[2933  402   11]
 [ 926  817   10]
 [ 124  131   16]]


#### OLID

In [17]:
hatebert_bertweet_interleaved_olid_train_dev = np.concatenate((hatebert_bertweet_interleaved_olid_train, hatebert_bertweet_interleaved_olid_dev))
hatebert_bertweet_interleaved_olid_labels_train_dev = np.concatenate((olid_labels_train, olid_labels_dev))

grid_results = gridsearch.fit(hatebert_bertweet_interleaved_olid_train_dev, hatebert_bertweet_interleaved_olid_labels_train_dev)

best_params = grid_results.best_params_
mlp = grid_results.best_estimator_

print("Best params: ", best_params)

train_dev_preds = mlp.predict(hatebert_bertweet_interleaved_olid_train_dev)
test_preds = mlp.predict(hatebert_bertweet_interleaved_olid_test)

computeAllScores(train_dev_preds, test_preds, olid_labels_train_dev, olid_labels_test, "Results/hatebert_bertweet_interleaved_olid")

save_model(mlp, "Saves/hatebert_bertweet_interleaved_olid")

Fitting 5 folds for each of 4 candidates, totalling 20 fits
Best params:  {'activation': 'relu', 'early_stopping': True, 'hidden_layer_sizes': (128, 64), 'learning_rate': 'adaptive', 'learning_rate_init': 0.0001, 'max_iter': 10000, 'solver': 'adam'}
Accuracy Train Dev:  0.765785498489426
Accuracy Test:  0.7930232558139535
Weighted F1 Train Dev:  0.758543817922675
Weighted F1 Test:  0.7795316783392483
Macro F1 Train Dev:  0.7211130126649627
Macro F1 Test:  0.7120737909657228
Micro F1 Train Dev:  0.765785498489426
Micro F1 Test:  0.7930232558139535
Weighted Recall Train Dev:  0.765785498489426
Weighted Recall Test:  0.7930232558139535
Macro Recall Train Dev:  0.7115950226244344
Macro Recall Test:  0.6942876344086022
Micro Recall Train Dev:  0.765785498489426
Micro Recall Test:  0.7930232558139535
Confusion Matrix Train Dev: 
[[7719 1121]
 [1980 2420]]
Confusion Matrix Test: 
[[569  51]
 [127 113]]


### BERT-BERTweet-HateBERT

#### DynaHate

In [18]:
bert_bertweet_hatebert_interleaved_dynahate_train_dev = np.concatenate((bert_hatebert_bertweet_interleaved_dynahate_train, bert_hatebert_bertweet_interleaved_dynahate_dev))
bert_bertweet_hatebert_interleaved_dynahate_labels_train_dev = np.concatenate((dynahate_labels_train, dynahate_labels_dev))

grid_results = gridsearch.fit(bert_bertweet_hatebert_interleaved_dynahate_train_dev, bert_bertweet_hatebert_interleaved_dynahate_labels_train_dev)

best_params = grid_results.best_params_
mlp = grid_results.best_estimator_

print("Best params: ", best_params)

train_dev_preds = mlp.predict(bert_bertweet_hatebert_interleaved_dynahate_train_dev)
test_preds = mlp.predict(bert_hatebert_bertweet_interleaved_dynahate_test)

computeAllScores(train_dev_preds, test_preds, dynahate_labels_train_dev, dynahate_labels_test, "Results/bert_bertweet_hatebert_interleaved_dynahate")

save_model(mlp, "Saves/bert_bertweet_hatebert_interleaved_dynahate")

Fitting 5 folds for each of 4 candidates, totalling 20 fits
Best params:  {'activation': 'relu', 'early_stopping': True, 'hidden_layer_sizes': (128, 64), 'learning_rate': 'adaptive', 'learning_rate_init': 0.001, 'max_iter': 10000, 'solver': 'adam'}
Accuracy Train Dev:  0.7509723422644771
Accuracy Test:  0.7114077669902913
Weighted F1 Train Dev:  0.7478923525922802
Weighted F1 Test:  0.7080490943528953
Macro F1 Train Dev:  0.7449353081375554
Macro F1 Test:  0.7030050228108133
Micro F1 Train Dev:  0.7509723422644771
Micro F1 Test:  0.7114077669902914
Weighted Recall Train Dev:  0.7509723422644771
Weighted Recall Test:  0.7114077669902913
Macro Recall Train Dev:  0.7436016514004586
Macro Recall Test:  0.7015767310043768
Micro Recall Train Dev:  0.7509723422644771
Micro Recall Test:  0.7114077669902913
Confusion Matrix Train Dev: 
[[11054  6063]
 [ 3157 16750]]
Confusion Matrix Test: 
[[1119  733]
 [ 456 1812]]


#### LatentHatred

In [19]:
bert_bertweet_hatebert_interleaved_latenthatred_train_dev = np.concatenate((bert_hatebert_bertweet_interleaved_latenthatred_train, bert_hatebert_bertweet_interleaved_latenthatred_dev))
bert_bertweet_hatebert_interleaved_latenthatred_labels_train_dev = np.concatenate((latenthatred_labels_train, latenthatred_labels_dev))

grid_results = gridsearch.fit(bert_bertweet_hatebert_interleaved_latenthatred_train_dev, bert_bertweet_hatebert_interleaved_latenthatred_labels_train_dev)

best_params = grid_results.best_params_
mlp = grid_results.best_estimator_

print("Best params: ", best_params)

train_dev_preds = mlp.predict(bert_bertweet_hatebert_interleaved_latenthatred_train_dev)
test_preds = mlp.predict(bert_hatebert_bertweet_interleaved_latenthatred_test)

computeAllScores(train_dev_preds, test_preds, latenthatred_labels_train_dev, latenthatred_labels_test, "Results/bert_bertweet_hatebert_interleaved_latenthatred")

save_model(mlp, "Saves/bert_bertweet_hatebert_interleaved_latenthatred")

Fitting 5 folds for each of 4 candidates, totalling 20 fits
Best params:  {'activation': 'relu', 'early_stopping': True, 'hidden_layer_sizes': (128, 64), 'learning_rate': 'adaptive', 'learning_rate_init': 0.001, 'max_iter': 10000, 'solver': 'adam'}
Accuracy Train Dev:  0.7130974549968964
Accuracy Test:  0.6916201117318436
Weighted F1 Train Dev:  0.6937655655309716
Weighted F1 Test:  0.6701437256879693
Macro F1 Train Dev:  0.5050562930914425
Macro F1 Test:  0.47411488618264847
Micro F1 Train Dev:  0.7130974549968964
Micro F1 Test:  0.6916201117318436
Weighted Recall Train Dev:  0.7130974549968964
Weighted Recall Test:  0.6916201117318436
Macro Recall Train Dev:  0.49193650307247444
Macro Recall Test:  0.465482383518117
Micro Recall Train Dev:  0.7130974549968964
Micro Recall Test:  0.6916201117318436
Confusion Matrix Train Dev: 
[[8558 1354   33]
 [2450 2865   32]
 [ 340  413   65]]
Confusion Matrix Test: 
[[2843  496    7]
 [ 885  855   13]
 [ 116  139   16]]


#### OLID

In [20]:
bert_bertweet_hatebert_interleaved_olid_train_dev = np.concatenate((bert_hatebert_bertweet_interleaved_olid_train, bert_hatebert_bertweet_interleaved_olid_dev))
bert_bertweet_hatebert_interleaved_olid_labels_train_dev = np.concatenate((olid_labels_train, olid_labels_dev))

grid_results = gridsearch.fit(bert_bertweet_hatebert_interleaved_olid_train_dev, bert_bertweet_hatebert_interleaved_olid_labels_train_dev)

best_params = grid_results.best_params_
mlp = grid_results.best_estimator_

print("Best params: ", best_params)

train_dev_preds = mlp.predict(bert_bertweet_hatebert_interleaved_olid_train_dev)
test_preds = mlp.predict(bert_hatebert_bertweet_interleaved_olid_test)

computeAllScores(train_dev_preds, test_preds, olid_labels_train_dev, olid_labels_test, "Results/bert_bertweet_hatebert_interleaved_olid")

save_model(mlp, "Saves/bert_bertweet_hatebert_interleaved_olid")

Fitting 5 folds for each of 4 candidates, totalling 20 fits
Best params:  {'activation': 'relu', 'early_stopping': True, 'hidden_layer_sizes': 128, 'learning_rate': 'adaptive', 'learning_rate_init': 0.0001, 'max_iter': 10000, 'solver': 'adam'}
Accuracy Train Dev:  0.7824773413897281
Accuracy Test:  0.8093023255813954
Weighted F1 Train Dev:  0.7731388194857468
Weighted F1 Test:  0.7903074267807074
Macro F1 Train Dev:  0.7359855602978649
Macro F1 Test:  0.7209029888551166
Micro F1 Train Dev:  0.7824773413897281
Micro F1 Test:  0.8093023255813954
Weighted Recall Train Dev:  0.7824773413897281
Weighted Recall Test:  0.8093023255813954
Macro Recall Train Dev:  0.7230676676264911
Macro Recall Test:  0.6966397849462366
Micro Recall Train Dev:  0.7824773413897281
Micro Recall Test:  0.8093023255813954
Confusion Matrix Train Dev: 
[[7958  882]
 [1998 2402]]
Confusion Matrix Test: 
[[590  30]
 [134 106]]
