In [19]:
from utils import *
import torch, os
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV

In [20]:
def scaling_embeddings(embeddings):
    for i in range(len(embeddings)):
        embeddings[i] = StandardScaler().fit_transform(embeddings[i])
    return embeddings

In [21]:
def reshape_embeddings(embeddings):
    if embeddings.shape[1] == 1:
        embeddings = embeddings.squeeze(1)
    return embeddings

In [22]:
bert_dynahate_train_embeddings.shape

(32924, 1, 768)

In [23]:
# scaling embeddings for all the models with their respective datasets

scaled_bert_dynahate_train_embeddings = reshape_embeddings(scaling_embeddings(bert_dynahate_train_embeddings))
scaled_bert_dynahate_dev_embeddings = reshape_embeddings(scaling_embeddings(bert_dynahate_dev_embeddings))
scaled_bert_dynahate_test_embeddings = reshape_embeddings(scaling_embeddings(bert_dynahate_test_embeddings))

scaled_bert_latenthatred_train_embeddings = reshape_embeddings(scaling_embeddings(bert_latenthatred_train_embeddings))
scaled_bert_latenthatred_dev_embeddings = reshape_embeddings(scaling_embeddings(bert_latenthatred_dev_embeddings))
scaled_bert_latenthatred_test_embeddings = reshape_embeddings(scaling_embeddings(bert_latenthatred_test_embeddings))

scaled_bert_olid_train_embeddings = reshape_embeddings(scaling_embeddings(bert_olid_train_embeddings))
scaled_bert_olid_dev_embeddings = reshape_embeddings(scaling_embeddings(bert_olid_dev_embeddings))
scaled_bert_olid_test_embeddings = reshape_embeddings(scaling_embeddings(bert_olid_test_embeddings))

KeyboardInterrupt: 

In [None]:
scaled_bert_dynahate_train_embeddings.shape

(32924, 768)

In [None]:
# reshaping embeddings for all the models with their respective datasets

reshaped_bert_dynahate_train_embeddings = reshape_embeddings(bert_dynahate_train_embeddings)
reshaped_bert_dynahate_test_embeddings = reshape_embeddings(bert_dynahate_test_embeddings)
reshaped_bert_dynahate_dev_embeddings = reshape_embeddings(bert_dynahate_dev_embeddings)

reshaped_bert_latenthatred_train_embeddings = reshape_embeddings(bert_latenthatred_train_embeddings)
reshaped_bert_latenthatred_test_embeddings = reshape_embeddings(bert_latenthatred_test_embeddings)
reshaped_bert_latenthatred_dev_embeddings = reshape_embeddings(bert_latenthatred_dev_embeddings)

reshaped_bert_olid_train_embeddings = reshape_embeddings(bert_olid_train_embeddings)
reshaped_bert_olid_test_embeddings = reshape_embeddings(bert_olid_test_embeddings)
reshaped_bert_olid_dev_embeddings = reshape_embeddings(bert_olid_dev_embeddings)

In [None]:
reshaped_bert_dynahate_train_embeddings.shape

(32924, 768)

In [None]:
dynahate_labels_train = process_labels(read_labels("dynahate", "train"))
dynahate_labels_dev = process_labels(read_labels("dynahate", "dev"))
dynahate_labels_test = process_labels(read_labels("dynahate", "test"))

latenthatred_labels_train = read_labels("latenthatred", "train")
latenthatred_labels_dev = read_labels("latenthatred", "dev")
latenthatred_labels_test = read_labels("latenthatred", "test")

olid_labels_train = read_labels("olid", "train")
olid_labels_dev = read_labels("olid", "dev")
olid_labels_test = read_labels("olid", "test")

In [None]:
mlp = MLPClassifier()
gridsearch = GridSearchCV(
    mlp,
    param_grid={
        "hidden_layer_sizes": [(256, 128), (256, 128, 64)],
        "activation": ["relu", "tanh"],
        "solver": ["adam", "sgd"],
        "learning_rate_init": [0.001, 0.0001],
        "learning_rate": ["constant", "adaptive"],
        # "early_stopping": [True]
    },
    verbose=4,
    n_jobs=os.cpu_count()//3,
)

### DynaHate

##### Embeddings without normalization

In [None]:
grid_results = gridsearch.fit(reshaped_bert_dynahate_train_embeddings, dynahate_labels_train)

Fitting 5 folds for each of 32 candidates, totalling 160 fits


In [None]:
best_params = grid_results.best_params_
mlp = grid_results.best_estimator_

In [None]:
best_params

{'activation': 'relu',
 'hidden_layer_sizes': (256, 128),
 'learning_rate': 'constant',
 'learning_rate_init': 0.001,
 'solver': 'adam'}

In [None]:
mlp = mlp.fit(prescaling_bert_bertweet_dynahate_train_embeddings.cpu(), dynahate_labels_train)
save_model(mlp, "dynahate_bert_bertweet_without_scaling.pickle")

In [None]:
train_preds = mlp.predict(prescaling_bert_bertweet_dynahate_train_embeddings.cpu())
dev_preds = mlp.predict(prescaling_bert_bertweet_dynahate_dev_embeddings.cpu())
test_preds = mlp.predict(prescaling_bert_bertweet_dynahate_test_embeddings.cpu())

In [None]:
computeAllScores(train_preds, dev_preds, test_preds, dynahate_labels_train, dynahate_labels_dev, dynahate_labels_test)

##### Concatenated normalized embeddings

In [None]:
grid_results = gridsearch.fit(scaled_prescaling_bert_bertweet_dynahate_train_embeddings.cpu(), dynahate_labels_train)

Fitting 5 folds for each of 32 candidates, totalling 160 fits


In [None]:
best_params = grid_results.best_params_
mlp = grid_results.best_estimator_

In [None]:
best_params

In [None]:
mlp = mlp.fit(prescaling_bert_bertweet_dynahate_train_embeddings.cpu(), dynahate_labels_train)
save_model(mlp, "dynahate_bert_bertweet_with_pre_scaling.pickle")

In [None]:
train_preds = mlp.predict(scaled_prescaling_bert_bertweet_dynahate_train_embeddings.cpu())
dev_preds = mlp.predict(scaled_prescaling_bert_bertweet_dynahate_dev_embeddings.cpu())
test_preds = mlp.predict(scaled_prescaling_bert_bertweet_dynahate_test_embeddings.cpu())

In [None]:
computeAllScores(train_preds, dev_preds, test_preds, dynahate_labels_train, dynahate_labels_dev, dynahate_labels_test)

##### Normalized concatenated embeddings

In [None]:
grid_results = gridsearch.fit(postscaling_bert_bertweet_dynahate_train_embeddings.cpu(), dynahate_labels_train)

Fitting 5 folds for each of 32 candidates, totalling 160 fits


In [None]:
best_params = grid_results.best_params_
mlp = grid_results.best_estimator_

In [None]:
best_params

In [None]:
mlp = mlp.fit(postscaling_bert_bertweet_dynahate_train_embeddings.cpu(), dynahate_labels_train)
save_model(mlp, "dynahate_bert_bertweet_with_post_scaling.pickle")

In [None]:
train_preds = mlp.predict(postscaling_bert_bertweet_dynahate_train_embeddings.cpu())
dev_preds = mlp.predict(postscaling_bert_bertweet_dynahate_dev_embeddings.cpu())
test_preds = mlp.predict(postscaling_bert_bertweet_dynahate_test_embeddings.cpu())

In [None]:
computeAllScores(train_preds, dev_preds, test_preds, dynahate_labels_train, dynahate_labels_dev, dynahate_labels_test)

#### LatentHatred

In [None]:
scaled_prescaling_bert_bertweet_latenthatred_train_embeddings = scaling_embeddings(prescaling_bert_bertweet_latenthatred_train_embeddings)
scaled_prescaling_bert_bertweet_latenthatred_dev_embeddings = scaling_embeddings(prescaling_bert_bertweet_latenthatred_dev_embeddings)
scaled_prescaling_bert_bertweet_latenthatred_test_embeddings = scaling_embeddings(prescaling_bert_bertweet_latenthatred_test_embeddings)

##### Concatenated embeddings without normalization

In [None]:
grid_results = gridsearch.fit(prescaling_bert_bertweet_latenthatred_train_embeddings.cpu(), latenthatred_labels_train)

Fitting 5 folds for each of 32 candidates, totalling 160 fits


In [None]:
best_params = grid_results.best_params_
mlp = grid_results.best_estimator_

In [None]:
best_params

In [None]:
mlp = mlp.fit(prescaling_bert_bertweet_latenthatred_train_embeddings.cpu(), latenthatred_labels_train)
save_model(mlp, "latenthatred_bert_bertweet_without_scaling.pickle")

In [None]:
train_preds = mlp.predict(prescaling_bert_bertweet_latenthatred_train_embeddings.cpu())
dev_preds = mlp.predict(prescaling_bert_bertweet_latenthatred_dev_embeddings.cpu())
test_preds = mlp.predict(prescaling_bert_bertweet_latenthatred_test_embeddings.cpu())

In [None]:
computeAllScores(train_preds, dev_preds, test_preds, latenthatred_labels_train, latenthatred_labels_dev, latenthatred_labels_test)

##### Concatenated normalized embeddings

In [None]:
grid_results = gridsearch.fit(scaled_prescaling_bert_bertweet_latenthatred_train_embeddings.cpu(), latenthatred_labels_train)

Fitting 5 folds for each of 32 candidates, totalling 160 fits


In [None]:
best_params = grid_results.best_params_
mlp = grid_results.best_estimator_

In [None]:
best_params

In [None]:
mlp = mlp.fit(prescaling_bert_bertweet_latenthatred_train_embeddings.cpu(), latenthatred_labels_train)
save_model(mlp, "latenthatred_bert_bertweet_with_pre_scaling.pickle")

In [None]:
train_preds = mlp.predict(scaled_prescaling_bert_bertweet_latenthatred_train_embeddings.cpu())
dev_preds = mlp.predict(scaled_prescaling_bert_bertweet_latenthatred_dev_embeddings.cpu())
test_preds = mlp.predict(scaled_prescaling_bert_bertweet_latenthatred_test_embeddings.cpu())

In [None]:
computeAllScores(train_preds, dev_preds, test_preds, latenthatred_labels_train, latenthatred_labels_dev, latenthatred_labels_test)

##### Normalized concatenated embeddings

In [None]:
grid_results = gridsearch.fit(postscaling_bert_bertweet_latenthatred_train_embeddings.cpu(), latenthatred_labels_train)

Fitting 5 folds for each of 32 candidates, totalling 160 fits


In [None]:
best_params = grid_results.best_params_
mlp = grid_results.best_estimator_

In [None]:
best_params

In [None]:
mlp = mlp.fit(postscaling_bert_bertweet_latenthatred_train_embeddings.cpu(), latenthatred_labels_train)
save_model(mlp, "latenthatred_bert_bertweet_with_post_scaling.pickle")

In [None]:
train_preds = mlp.predict(postscaling_bert_bertweet_latenthatred_train_embeddings.cpu())
dev_preds = mlp.predict(postscaling_bert_bertweet_latenthatred_dev_embeddings.cpu())
test_preds = mlp.predict(postscaling_bert_bertweet_latenthatred_test_embeddings.cpu())

In [None]:
computeAllScores(train_preds, dev_preds, test_preds, latenthatred_labels_train, latenthatred_labels_dev, latenthatred_labels_test)

#### OLID

In [None]:
scaled_prescaling_bert_bertweet_olid_train_embeddings = scaling_embeddings(prescaling_bert_bertweet_olid_train_embeddings)
scaled_prescaling_bert_bertweet_olid_dev_embeddings = scaling_embeddings(prescaling_bert_bertweet_olid_dev_embeddings)
scaled_prescaling_bert_bertweet_olid_test_embeddings = scaling_embeddings(prescaling_bert_bertweet_olid_test_embeddings)

##### Concatenated embeddings without normalization

In [None]:
grid_results = gridsearch.fit(prescaling_bert_bertweet_olid_train_embeddings.cpu(), olid_labels_train)

Fitting 5 folds for each of 32 candidates, totalling 160 fits


In [None]:
best_params = grid_results.best_params_
mlp = grid_results.best_estimator_

In [None]:
best_params

In [None]:
mlp = mlp.fit(prescaling_bert_bertweet_olid_train_embeddings.cpu(), olid_labels_train)
save_model(mlp, "olid_bert_bertweet_without_scaling.pickle")

In [None]:
train_preds = mlp.predict(prescaling_bert_bertweet_olid_train_embeddings.cpu())
dev_preds = mlp.predict(prescaling_bert_bertweet_olid_dev_embeddings.cpu())
test_preds = mlp.predict(prescaling_bert_bertweet_olid_test_embeddings.cpu())

In [None]:
computeAllScores(train_preds, dev_preds, test_preds, olid_labels_train, olid_labels_dev, olid_labels_test)

##### Concatenated normalized embeddings

In [None]:
grid_results = gridsearch.fit(scaled_prescaling_bert_bertweet_olid_train_embeddings.cpu(), olid_labels_train)

Fitting 5 folds for each of 32 candidates, totalling 160 fits


In [None]:
best_params = grid_results.best_params_
mlp = grid_results.best_estimator_

In [None]:
best_params

In [None]:
mlp = mlp.fit(prescaling_bert_bertweet_olid_train_embeddings.cpu(), olid_labels_train)
save_model(mlp, "latenthatred_bert_bertweet_with_pre_scaling.pickle")

In [None]:
train_preds = mlp.predict(scaled_prescaling_bert_bertweet_olid_train_embeddings.cpu())
dev_preds = mlp.predict(scaled_prescaling_bert_bertweet_olid_dev_embeddings.cpu())
test_preds = mlp.predict(scaled_prescaling_bert_bertweet_olid_test_embeddings.cpu())

In [None]:
computeAllScores(train_preds, dev_preds, test_preds, olid_labels_train, olid_labels_dev, olid_labels_test)

##### Normalized concatenated embeddings

In [None]:
grid_results = gridsearch.fit(postscaling_bert_bertweet_olid_train_embeddings.cpu(), olid_labels_train)

Fitting 5 folds for each of 32 candidates, totalling 160 fits


In [None]:
best_params = grid_results.best_params_
mlp = grid_results.best_estimator_

In [None]:
best_params

In [None]:
mlp = mlp.fit(postscaling_bert_bertweet_olid_train_embeddings.cpu(), olid_labels_train)
save_model(mlp, "olid_bert_bertweet_with_post_scaling.pickle")

In [None]:
train_preds = mlp.predict(postscaling_bert_bertweet_olid_train_embeddings.cpu())
dev_preds = mlp.predict(postscaling_bert_bertweet_olid_dev_embeddings.cpu())
test_preds = mlp.predict(postscaling_bert_bertweet_olid_test_embeddings.cpu())

In [None]:
computeAllScores(train_preds, dev_preds, test_preds, olid_labels_train, olid_labels_dev, olid_labels_test)

### BERT-HateBERT

#### DynaHate

In [None]:
scaled_prescaling_bert_hatebert_dynahate_train_embeddings = scaling_embeddings(prescaling_bert_hatebert_dynahate_train_embeddings)
scaled_prescaling_bert_hatebert_dynahate_dev_embeddings = scaling_embeddings(prescaling_bert_hatebert_dynahate_dev_embeddings)
scaled_prescaling_bert_hatebert_dynahate_test_embeddings = scaling_embeddings(prescaling_bert_hatebert_dynahate_test_embeddings)

##### Concatenated embeddings without normalization

In [None]:
grid_results = gridsearch.fit(prescaling_bert_hatebert_dynahate_train_embeddings.cpu(), dynahate_labels_train)

Fitting 5 folds for each of 32 candidates, totalling 160 fits


In [None]:
best_params = grid_results.best_params_
mlp = grid_results.best_estimator_

In [None]:
best_params

In [None]:
mlp = mlp.fit(prescaling_bert_hatebert_dynahate_train_embeddings.cpu(), dynahate_labels_train)
save_model(mlp, "dynahate_bert_hatebert_without_scaling.pickle")

In [None]:
train_preds = mlp.predict(prescaling_bert_hatebert_dynahate_train_embeddings.cpu())
dev_preds = mlp.predict(prescaling_bert_hatebert_dynahate_dev_embeddings.cpu())
test_preds = mlp.predict(prescaling_bert_hatebert_dynahate_test_embeddings.cpu())

In [None]:
computeAllScores(train_preds, dev_preds, test_preds, dynahate_labels_train, dynahate_labels_dev, dynahate_labels_test)

##### Concatenated normalized embeddings

In [None]:
grid_results = gridsearch.fit(scaled_prescaling_bert_hatebert_dynahate_train_embeddings.cpu(), dynahate_labels_train)

Fitting 5 folds for each of 32 candidates, totalling 160 fits


In [None]:
best_params = grid_results.best_params_
mlp = grid_results.best_estimator_

In [None]:
best_params

In [None]:
mlp = mlp.fit(prescaling_bert_hatebert_dynahate_train_embeddings.cpu(), dynahate_labels_train)
save_model(mlp, "dynahate_bert_hatebert_with_pre_scaling.pickle")

In [None]:
train_preds = mlp.predict(scaled_prescaling_bert_hatebert_dynahate_train_embeddings.cpu())
dev_preds = mlp.predict(scaled_prescaling_bert_hatebert_dynahate_dev_embeddings.cpu())
test_preds = mlp.predict(scaled_prescaling_bert_hatebert_dynahate_test_embeddings.cpu())

In [None]:
computeAllScores(train_preds, dev_preds, test_preds, dynahate_labels_train, dynahate_labels_dev, dynahate_labels_test)

##### Normalized concatenated embeddings

In [None]:
grid_results = gridsearch.fit(postscaling_bert_hatebert_dynahate_train_embeddings.cpu(), dynahate_labels_train)

Fitting 5 folds for each of 32 candidates, totalling 160 fits


In [None]:
best_params = grid_results.best_params_
mlp = grid_results.best_estimator_

In [None]:
best_params

In [None]:
mlp = mlp.fit(postscaling_bert_hatebert_dynahate_train_embeddings.cpu(), dynahate_labels_train)
save_model(mlp, "dynahate_bert_hatebert_with_post_scaling.pickle")

In [None]:
train_preds = mlp.predict(postscaling_bert_hatebert_dynahate_train_embeddings.cpu())
dev_preds = mlp.predict(postscaling_bert_hatebert_dynahate_dev_embeddings.cpu())
test_preds = mlp.predict(postscaling_bert_hatebert_dynahate_test_embeddings.cpu())

In [None]:
computeAllScores(train_preds, dev_preds, test_preds, dynahate_labels_train, dynahate_labels_dev, dynahate_labels_test)

#### LatentHatred

In [None]:
scaled_prescaling_bert_hatebert_latenthatred_train_embeddings = scaling_embeddings(prescaling_bert_hatebert_latenthatred_train_embeddings)
scaled_prescaling_bert_hatebert_latenthatred_dev_embeddings = scaling_embeddings(prescaling_bert_hatebert_latenthatred_dev_embeddings)
scaled_prescaling_bert_hatebert_latenthatred_test_embeddings = scaling_embeddings(prescaling_bert_hatebert_latenthatred_test_embeddings)

##### Concatenated embeddings without normalization

In [None]:
grid_results = gridsearch.fit(prescaling_bert_hatebert_latenthatred_train_embeddings.cpu(), latenthatred_labels_train)

Fitting 5 folds for each of 32 candidates, totalling 160 fits


In [None]:
best_params = grid_results.best_params_
mlp = grid_results.best_estimator_

In [None]:
best_params

In [None]:
mlp = mlp.fit(prescaling_bert_hatebert_latenthatred_train_embeddings.cpu(), latenthatred_labels_train)
save_model(mlp, "latenthatred_bert_hatebert_without_scaling.pickle")

In [None]:
train_preds = mlp.predict(prescaling_bert_hatebert_latenthatred_train_embeddings.cpu())
dev_preds = mlp.predict(prescaling_bert_hatebert_latenthatred_dev_embeddings.cpu())
test_preds = mlp.predict(prescaling_bert_hatebert_latenthatred_test_embeddings.cpu())

In [None]:
computeAllScores(train_preds, dev_preds, test_preds, latenthatred_labels_train, latenthatred_labels_dev, latenthatred_labels_test)

##### Concatenated normalized embeddings

In [None]:
grid_results = gridsearch.fit(scaled_prescaling_bert_hatebert_latenthatred_train_embeddings.cpu(), latenthatred_labels_train)

Fitting 5 folds for each of 32 candidates, totalling 160 fits


In [None]:
best_params = grid_results.best_params_
mlp = grid_results.best_estimator_

In [None]:
best_params

In [None]:
mlp = mlp.fit(prescaling_bert_hatebert_latenthatred_train_embeddings.cpu(), latenthatred_labels_train)
save_model(mlp, "latenthatred_bert_hatebert_with_pre_scaling.pickle")

In [None]:
train_preds = mlp.predict(scaled_prescaling_bert_hatebert_latenthatred_train_embeddings.cpu())
dev_preds = mlp.predict(scaled_prescaling_bert_hatebert_latenthatred_dev_embeddings.cpu())
test_preds = mlp.predict(scaled_prescaling_bert_hatebert_latenthatred_test_embeddings.cpu())

In [None]:
computeAllScores(train_preds, dev_preds, test_preds, latenthatred_labels_train, latenthatred_labels_dev, latenthatred_labels_test)

##### Normalized concatenated embeddings

In [None]:
grid_results = gridsearch.fit(postscaling_bert_hatebert_latenthatred_train_embeddings.cpu(), latenthatred_labels_train)

Fitting 5 folds for each of 32 candidates, totalling 160 fits


In [None]:
best_params = grid_results.best_params_
mlp = grid_results.best_estimator_

In [None]:
best_params

In [None]:
mlp = mlp.fit(postscaling_bert_hatebert_latenthatred_train_embeddings.cpu(), latenthatred_labels_train)
save_model(mlp, "latenthatred_bert_hatebert_with_post_scaling.pickle")

In [None]:
train_preds = mlp.predict(postscaling_bert_hatebert_latenthatred_train_embeddings.cpu())
dev_preds = mlp.predict(postscaling_bert_hatebert_latenthatred_dev_embeddings.cpu())
test_preds = mlp.predict(postscaling_bert_hatebert_latenthatred_test_embeddings.cpu())

In [None]:
computeAllScores(train_preds, dev_preds, test_preds, latenthatred_labels_train, latenthatred_labels_dev, latenthatred_labels_test)

#### OLID

In [None]:
scaled_prescaling_bert_hatebert_olid_train_embeddings = scaling_embeddings(prescaling_bert_hatebert_olid_train_embeddings)
scaled_prescaling_bert_hatebert_olid_dev_embeddings = scaling_embeddings(prescaling_bert_hatebert_olid_dev_embeddings)
scaled_prescaling_bert_hatebert_olid_test_embeddings = scaling_embeddings(prescaling_bert_hatebert_olid_test_embeddings)

##### Concatenated embeddings without normalization

In [None]:
grid_results = gridsearch.fit(prescaling_bert_hatebert_olid_train_embeddings.cpu(), olid_labels_train)

Fitting 5 folds for each of 32 candidates, totalling 160 fits


In [None]:
best_params = grid_results.best_params_
mlp = grid_results.best_estimator_

In [None]:
best_params

In [None]:
mlp = mlp.fit(prescaling_bert_hatebert_olid_train_embeddings.cpu(), olid_labels_train)
save_model(mlp, "olid_bert_hatebert_without_scaling.pickle")

In [None]:
train_preds = mlp.predict(prescaling_bert_hatebert_olid_train_embeddings.cpu())
dev_preds = mlp.predict(prescaling_bert_hatebert_olid_dev_embeddings.cpu())
test_preds = mlp.predict(prescaling_bert_hatebert_olid_test_embeddings.cpu())

In [None]:
computeAllScores(train_preds, dev_preds, test_preds, olid_labels_train, olid_labels_dev, olid_labels_test)

##### Concatenated normalized embeddings

In [None]:
grid_results = gridsearch.fit(scaled_prescaling_bert_hatebert_olid_train_embeddings.cpu(), olid_labels_train)

Fitting 5 folds for each of 32 candidates, totalling 160 fits


In [None]:
best_params = grid_results.best_params_
mlp = grid_results.best_estimator_

In [None]:
best_params

In [None]:
mlp = mlp.fit(prescaling_bert_hatebert_olid_train_embeddings.cpu(), olid_labels_train)
save_model(mlp, "latenthatred_bert_hatebert_with_pre_scaling.pickle")

In [None]:
train_preds = mlp.predict(scaled_prescaling_bert_hatebert_olid_train_embeddings.cpu())
dev_preds = mlp.predict(scaled_prescaling_bert_hatebert_olid_dev_embeddings.cpu())
test_preds = mlp.predict(scaled_prescaling_bert_hatebert_olid_test_embeddings.cpu())

In [None]:
computeAllScores(train_preds, dev_preds, test_preds, olid_labels_train, olid_labels_dev, olid_labels_test)

##### Normalized concatenated embeddings

In [None]:
grid_results = gridsearch.fit(postscaling_bert_hatebert_olid_train_embeddings.cpu(), olid_labels_train)

Fitting 5 folds for each of 32 candidates, totalling 160 fits


In [None]:
best_params = grid_results.best_params_
mlp = grid_results.best_estimator_

In [None]:
best_params

In [None]:
mlp = mlp.fit(postscaling_bert_hatebert_olid_train_embeddings.cpu(), olid_labels_train)
save_model(mlp, "olid_bert_hatebert_with_post_scaling.pickle")

In [None]:
train_preds = mlp.predict(postscaling_bert_hatebert_olid_train_embeddings.cpu())
dev_preds = mlp.predict(postscaling_bert_hatebert_olid_dev_embeddings.cpu())
test_preds = mlp.predict(postscaling_bert_hatebert_olid_test_embeddings.cpu())

In [None]:
computeAllScores(train_preds, dev_preds, test_preds, olid_labels_train, olid_labels_dev, olid_labels_test)

### BERTweet-HateBERT

#### DynaHate

In [None]:
scaled_prescaling_bertweet_hatebert_dynahate_train_embeddings = scaling_embeddings(prescaling_bertweet_hatebert_dynahate_train_embeddings)
scaled_prescaling_bertweet_hatebert_dynahate_dev_embeddings = scaling_embeddings(prescaling_bertweet_hatebert_dynahate_dev_embeddings)
scaled_prescaling_bertweet_hatebert_dynahate_test_embeddings = scaling_embeddings(prescaling_bertweet_hatebert_dynahate_test_embeddings)

##### Concatenated embeddings without normalization

In [None]:
grid_results = gridsearch.fit(prescaling_bertweet_hatebert_dynahate_train_embeddings.cpu(), dynahate_labels_train)

Fitting 5 folds for each of 32 candidates, totalling 160 fits


In [None]:
best_params = grid_results.best_params_
mlp = grid_results.best_estimator_

In [None]:
best_params

In [None]:
mlp = mlp.fit(prescaling_bertweet_hatebert_dynahate_train_embeddings.cpu(), dynahate_labels_train)
save_model(mlp, "dynahate_bertweet_hatebert_without_scaling.pickle")

In [None]:
train_preds = mlp.predict(prescaling_bertweet_hatebert_dynahate_train_embeddings.cpu())
dev_preds = mlp.predict(prescaling_bertweet_hatebert_dynahate_dev_embeddings.cpu())
test_preds = mlp.predict(prescaling_bertweet_hatebert_dynahate_test_embeddings.cpu())

In [None]:
computeAllScores(train_preds, dev_preds, test_preds, dynahate_labels_train, dynahate_labels_dev, dynahate_labels_test)

##### Concatenated normalized embeddings

In [None]:
grid_results = gridsearch.fit(scaled_prescaling_bertweet_hatebert_dynahate_train_embeddings.cpu(), dynahate_labels_train)

Fitting 5 folds for each of 32 candidates, totalling 160 fits


In [None]:
best_params = grid_results.best_params_
mlp = grid_results.best_estimator_

In [None]:
best_params

In [None]:
mlp = mlp.fit(prescaling_bertweet_hatebert_dynahate_train_embeddings.cpu(), dynahate_labels_train)
save_model(mlp, "dynahate_bertweet_hatebert_with_pre_scaling.pickle")

In [None]:
train_preds = mlp.predict(scaled_prescaling_bertweet_hatebert_dynahate_train_embeddings.cpu())
dev_preds = mlp.predict(scaled_prescaling_bertweet_hatebert_dynahate_dev_embeddings.cpu())
test_preds = mlp.predict(scaled_prescaling_bertweet_hatebert_dynahate_test_embeddings.cpu())

In [None]:
computeAllScores(train_preds, dev_preds, test_preds, dynahate_labels_train, dynahate_labels_dev, dynahate_labels_test)

##### Normalized concatenated embeddings

In [None]:
grid_results = gridsearch.fit(postscaling_bertweet_hatebert_dynahate_train_embeddings.cpu(), dynahate_labels_train)

Fitting 5 folds for each of 32 candidates, totalling 160 fits


In [None]:
best_params = grid_results.best_params_
mlp = grid_results.best_estimator_

In [None]:
best_params

In [None]:
mlp = mlp.fit(postscaling_bertweet_hatebert_dynahate_train_embeddings.cpu(), dynahate_labels_train)
save_model(mlp, "dynahate_bertweet_hatebert_with_post_scaling.pickle")

In [None]:
train_preds = mlp.predict(postscaling_bertweet_hatebert_dynahate_train_embeddings.cpu())
dev_preds = mlp.predict(postscaling_bertweet_hatebert_dynahate_dev_embeddings.cpu())
test_preds = mlp.predict(postscaling_bertweet_hatebert_dynahate_test_embeddings.cpu())

In [None]:
computeAllScores(train_preds, dev_preds, test_preds, dynahate_labels_train, dynahate_labels_dev, dynahate_labels_test)

#### LatentHatred

In [None]:
scaled_prescaling_bertweet_hatebert_latenthatred_train_embeddings = scaling_embeddings(prescaling_bertweet_hatebert_latenthatred_train_embeddings)
scaled_prescaling_bertweet_hatebert_latenthatred_dev_embeddings = scaling_embeddings(prescaling_bertweet_hatebert_latenthatred_dev_embeddings)
scaled_prescaling_bertweet_hatebert_latenthatred_test_embeddings = scaling_embeddings(prescaling_bertweet_hatebert_latenthatred_test_embeddings)

##### Concatenated embeddings without normalization

In [None]:
grid_results = gridsearch.fit(prescaling_bertweet_hatebert_latenthatred_train_embeddings.cpu(), latenthatred_labels_train)

Fitting 5 folds for each of 32 candidates, totalling 160 fits


In [None]:
best_params = grid_results.best_params_
mlp = grid_results.best_estimator_

In [None]:
best_params

In [None]:
mlp = mlp.fit(prescaling_bertweet_hatebert_latenthatred_train_embeddings.cpu(), latenthatred_labels_train)
save_model(mlp, "latenthatred_bertweet_hertbert_without_scaling.pickle")

NameError: name 'mlp' is not defined

In [None]:
train_preds = mlp.predict(prescaling_bertweet_hatebert_latenthatred_train_embeddings.cpu())
dev_preds = mlp.predict(prescaling_bertweet_hatebert_latenthatred_dev_embeddings.cpu())
test_preds = mlp.predict(prescaling_bertweet_hatebert_latenthatred_test_embeddings.cpu())

In [None]:
computeAllScores(train_preds, dev_preds, test_preds, latenthatred_labels_train, latenthatred_labels_dev, latenthatred_labels_test)

##### Concatenated normalized embeddings

In [None]:
grid_results = gridsearch.fit(scaled_prescaling_bertweet_hatebert_latenthatred_train_embeddings.cpu(), latenthatred_labels_train)

Fitting 5 folds for each of 32 candidates, totalling 160 fits


In [None]:
best_params = grid_results.best_params_
mlp = grid_results.best_estimator_

In [None]:
best_params

In [None]:
mlp = mlp.fit(prescaling_bertweet_hatebert_latenthatred_train_embeddings.cpu(), latenthatred_labels_train)
save_model(mlp, "latenthatred_bertweet_hatebert_with_pre_scaling.pickle")

In [None]:
train_preds = mlp.predict(scaled_prescaling_bertweet_hatebert_latenthatred_train_embeddings.cpu())
dev_preds = mlp.predict(scaled_prescaling_bertweet_hatebert_latenthatred_dev_embeddings.cpu())
test_preds = mlp.predict(scaled_prescaling_bertweet_hatebert_latenthatred_test_embeddings.cpu())

In [None]:
computeAllScores(train_preds, dev_preds, test_preds, latenthatred_labels_train, latenthatred_labels_dev, latenthatred_labels_test)

##### Normalized concatenated embeddings

In [None]:
grid_results = gridsearch.fit(postscaling_bertweet_hatebert_latenthatred_train_embeddings.cpu(), latenthatred_labels_train)

Fitting 5 folds for each of 32 candidates, totalling 160 fits


In [None]:
best_params = grid_results.best_params_
mlp = grid_results.best_estimator_

In [None]:
best_params

In [None]:
mlp = mlp.fit(postscaling_bertweet_hatebert_latenthatred_train_embeddings.cpu(), latenthatred_labels_train)
save_model(mlp, "latenthatred_bertweet_hatebert_with_post_scaling.pickle")

In [None]:
train_preds = mlp.predict(postscaling_bertweet_hatebert_latenthatred_train_embeddings.cpu())
dev_preds = mlp.predict(postscaling_bertweet_hatebert_latenthatred_dev_embeddings.cpu())
test_preds = mlp.predict(postscaling_bertweet_hatebert_latenthatred_test_embeddings.cpu())

In [None]:
computeAllScores(train_preds, dev_preds, test_preds, latenthatred_labels_train, latenthatred_labels_dev, latenthatred_labels_test)

#### OLID

In [None]:
scaled_prescaling_bertweet_hatebert_olid_train_embeddings = scaling_embeddings(prescaling_bertweet_hatebert_olid_train_embeddings)
scaled_prescaling_bertweet_hatebert_olid_dev_embeddings = scaling_embeddings(prescaling_bertweet_hatebert_olid_dev_embeddings)
scaled_prescaling_bertweet_hatebert_olid_test_embeddings = scaling_embeddings(prescaling_bertweet_hatebert_olid_test_embeddings)

##### Concatenated embeddings without normalization

In [None]:
grid_results = gridsearch.fit(prescaling_bertweet_hatebert_olid_train_embeddings.cpu(), olid_labels_train)

Fitting 5 folds for each of 32 candidates, totalling 160 fits


In [None]:
best_params = grid_results.best_params_
mlp = grid_results.best_estimator_

In [None]:
best_params

In [None]:
mlp = mlp.fit(prescaling_bertweet_hatebert_olid_train_embeddings.cpu(), olid_labels_train)
save_model(mlp, "olid_bertweet_hatebert_without_scaling.pickle")

In [None]:
train_preds = mlp.predict(prescaling_bertweet_hatebert_olid_train_embeddings.cpu())
dev_preds = mlp.predict(prescaling_bertweet_hatebert_olid_dev_embeddings.cpu())
test_preds = mlp.predict(prescaling_bertweet_hatebert_olid_test_embeddings.cpu())

In [None]:
computeAllScores(train_preds, dev_preds, test_preds, olid_labels_train, olid_labels_dev, olid_labels_test)

##### Concatenated normalized embeddings

In [None]:
grid_results = gridsearch.fit(scaled_prescaling_bertweet_hatebert_olid_train_embeddings.cpu(), olid_labels_train)

Fitting 5 folds for each of 32 candidates, totalling 160 fits


In [None]:
best_params = grid_results.best_params_
mlp = grid_results.best_estimator_

In [None]:
best_params

In [None]:
mlp = mlp.fit(prescaling_bertweet_hatebert_olid_train_embeddings.cpu(), olid_labels_train)
save_model(mlp, "latenthatred_bertweet_hatebert_with_pre_scaling.pickle")

In [None]:
train_preds = mlp.predict(scaled_prescaling_bertweet_hatebert_olid_train_embeddings.cpu())
dev_preds = mlp.predict(scaled_prescaling_bertweet_hatebert_olid_dev_embeddings.cpu())
test_preds = mlp.predict(scaled_prescaling_bertweet_hatebert_olid_test_embeddings.cpu())

In [None]:
computeAllScores(train_preds, dev_preds, test_preds, olid_labels_train, olid_labels_dev, olid_labels_test)

##### Normalized concatenated embeddings

In [None]:
grid_results = gridsearch.fit(postscaling_bertweet_hatebert_olid_train_embeddings.cpu(), olid_labels_train)

Fitting 5 folds for each of 32 candidates, totalling 160 fits


In [None]:
best_params = grid_results.best_params_
mlp = grid_results.best_estimator_

In [None]:
best_params

In [None]:
mlp = mlp.fit(postscaling_bertweet_hatebert_olid_train_embeddings.cpu(), olid_labels_train)
save_model(mlp, "olid_bertweet_hatebert_with_post_scaling.pickle")

In [None]:
train_preds = mlp.predict(postscaling_bertweet_hatebert_olid_train_embeddings.cpu())
dev_preds = mlp.predict(postscaling_bertweet_hatebert_olid_dev_embeddings.cpu())
test_preds = mlp.predict(postscaling_bertweet_hatebert_olid_test_embeddings.cpu())

In [None]:
computeAllScores(train_preds, dev_preds, test_preds, olid_labels_train, olid_labels_dev, olid_labels_test)