In [14]:
from embeddings_loader import *
from sklearn.cluster import KMeans
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import roc_auc_score
from utils import *

In [15]:
train_labels, dev_labels, test_labels = load_labels()

In [3]:
label_replacement = {
    'Hope_speech': 0,
    'Non_hope_speech': 1,
    'not-English': 2,
}

In [4]:
# Replace labels with numbers
train_labels = [label_replacement[label] for label in train_labels]
dev_labels = [label_replacement[label] for label in dev_labels]
test_labels = [label_replacement[label] for label in test_labels]

### Glove Twitter 25

In [16]:
gt25_train, gt25_dev, gt25_test = load_glove_twitter_25()

In [17]:
# Set all NaN values to 0
gt25_train = np.nan_to_num(gt25_train)
gt25_dev = np.nan_to_num(gt25_dev)
gt25_test = np.nan_to_num(gt25_test)

In [18]:
k_means = KMeans(n_clusters=3, random_state=0).fit(gt25_train, train_labels)
save_model(k_means, "k_means_gt25.joblib")

In [19]:
train_preds = k_means.predict(gt25_train)
dev_preds = k_means.predict(gt25_dev)
test_preds = k_means.predict(gt25_test)

In [20]:
computeAllScores(train_preds, dev_preds, test_preds)

Accuracy Train:  0.18153062121078992
Accuracy Dev:  0.17622230038691522
Accuracy Test:  0.18657765284609978
Weighted F1 Train:  0.24751635808632247
Weighted F1 Dev:  0.24277870398836413
Weighted F1 Test:  0.25577147685913026
Macro F1 Train:  0.147388082222974
Macro F1 Dev:  0.14283631492082952
Macro F1 Test:  0.15016522555352096
Micro F1 Train:  0.1815306212107899
Micro F1 Dev:  0.17622230038691522
Micro F1 Test:  0.18657765284609978
Weighted Recall Train:  0.18153062121078992
Weighted Recall Dev:  0.17622230038691525
Weighted Recall Test:  0.18657765284609978
Macro Recall Train:  0.32618430603949866
Macro Recall Dev:  0.2107389958708279
Macro Recall Test:  0.23452063247204014
Micro Recall Train:  0.18153062121078992
Micro Recall Dev:  0.17622230038691522
Micro Recall Test:  0.18657765284609978
Confusion Matrix Train: 
[[1099   70  793]
 [8657 3027 9094]
 [   4   12    6]]
Confusion Matrix Dev: 
[[ 133    8  131]
 [1083  368 1118]
 [   0    2    0]]
Confusion Matrix Test: 
[[ 138    5 

### FastText 300 

In [21]:
ft300_train, ft300_dev, ft300_test = load_fasttext_300()

In [22]:
# Set all NaN values to 0
ft300_train = np.nan_to_num(ft300_train)
ft300_dev = np.nan_to_num(ft300_dev)
ft300_test = np.nan_to_num(ft300_test)

In [24]:
k_means = KMeans(n_clusters=3, random_state=0).fit(ft300_train, train_labels)
save_model(k_means, "k_means_ft300.joblib")

In [25]:
train_preds = k_means.predict(ft300_train)
dev_preds = k_means.predict(ft300_dev)
test_preds = k_means.predict(ft300_test)

In [26]:
computeAllScores(train_preds, dev_preds, test_preds)

Accuracy Train:  0.09138037079342765
Accuracy Dev:  0.09672880759760816
Accuracy Test:  0.08995080815179199
Weighted F1 Train:  0.13087248253966885
Weighted F1 Dev:  0.13325569716627297
Weighted F1 Test:  0.13044425113779154
Macro F1 Train:  0.09425327100502011
Macro F1 Dev:  0.10600388101230933
Macro F1 Test:  0.09230425202211685
Micro F1 Train:  0.09138037079342765
Micro F1 Dev:  0.09672880759760816
Micro F1 Test:  0.08995080815179197
Weighted Recall Train:  0.09138037079342765
Weighted Recall Dev:  0.09672880759760816
Weighted Recall Test:  0.08995080815179199
Macro Recall Train:  0.3564043982385188
Macro Recall Dev:  0.30960108839176
Macro Recall Test:  0.34402810986844923
Micro Recall Train:  0.09138037079342765
Micro Recall Dev:  0.09672880759760816
Micro Recall Test:  0.08995080815179199
Confusion Matrix Train: 
[[  624   115  1223]
 [ 5633  1441 13704]
 [    7     0    15]]
Confusion Matrix Dev: 
[[  98   18  156]
 [ 663  176 1730]
 [   1    0    1]]
Confusion Matrix Test: 
[[ 

### Word2Vec 300

In [27]:
w2v300_train, w2v300_dev, w2v300_test = load_word2vec_300()

In [28]:
# Set all NaN values to 0
w2v300_train = np.nan_to_num(w2v300_train)
w2v300_dev = np.nan_to_num(w2v300_dev)
w2v300_test = np.nan_to_num(w2v300_test)

In [30]:
k_means = KMeans(n_clusters=3, random_state=0).fit(w2v300_train, train_labels)
save_model(k_means, "k_means_w2v300.joblib")

In [31]:
train_preds = k_means.predict(w2v300_train)
dev_preds = k_means.predict(w2v300_dev)
test_preds = k_means.predict(w2v300_test)

In [32]:
computeAllScores(train_preds, dev_preds, test_preds)

Accuracy Train:  0.601704595378262
Accuracy Dev:  0.5993668659866338
Accuracy Test:  0.5948699929725931
Weighted F1 Train:  0.6959033719130684
Weighted F1 Dev:  0.689509514326368
Weighted F1 Test:  0.6882557941972192
Macro F1 Train:  0.259209304347983
Macro F1 Dev:  0.2591562570724056
Macro F1 Test:  0.25488634436540464
Micro F1 Train:  0.601704595378262
Micro F1 Dev:  0.5993668659866338
Micro F1 Test:  0.5948699929725931
Weighted Recall Train:  0.601704595378262
Weighted Recall Dev:  0.5993668659866338
Weighted Recall Test:  0.5948699929725932
Macro Recall Train:  0.3596312774201172
Macro Recall Dev:  0.3909218319480381
Macro Recall Test:  0.4408071303080945
Micro Recall Train:  0.601704595378262
Micro Recall Dev:  0.5993668659866338
Micro Recall Test:  0.5948699929725931
Confusion Matrix Train: 
[[   24  1452   486]
 [ 1540 13663  5575]
 [    1    12     9]]
Confusion Matrix Dev: 
[[   3  195   74]
 [ 178 1700  691]
 [   1    0    1]]
Confusion Matrix Test: 
[[   1  193   56]
 [ 194 

### TF-IDF PCA (1000 Dims)

In [33]:
tfidf_pca_train, tfidf_pca_dev, tfidf_pca_test = load_tfidf_pca()

In [34]:
k_means = KMeans(n_clusters=3, random_state=0).fit(tfidf_pca_train, train_labels)
save_model(k_means, "k_means_tfidf_pca.joblib")

In [35]:
train_preds = k_means.predict(tfidf_pca_train)
dev_preds = k_means.predict(tfidf_pca_dev)
test_preds = k_means.predict(tfidf_pca_test)

In [36]:
computeAllScores(train_preds, dev_preds, test_preds)

Accuracy Train:  0.13904753536596082
Accuracy Dev:  0.13823425958494548
Accuracy Test:  0.15038650737877723
Weighted F1 Train:  0.22831088910939898
Weighted F1 Dev:  0.22530830620676157
Weighted F1 Test:  0.24220668970195194
Macro F1 Train:  0.1378300930462873
Macro F1 Dev:  0.14548665436659838
Macro F1 Test:  0.15596801860050574
Micro F1 Train:  0.13904753536596082
Micro F1 Dev:  0.13823425958494548
Micro F1 Test:  0.15038650737877723
Weighted Recall Train:  0.13904753536596082
Weighted Recall Dev:  0.13823425958494548
Weighted Recall Test:  0.15038650737877723
Macro Recall Train:  0.36377233556457167
Macro Recall Dev:  0.2810823239377495
Macro Recall Test:  0.45904974932510606
Micro Recall Train:  0.13904753536596082
Micro Recall Dev:  0.13823425958494548
Micro Recall Test:  0.15038650737877723
Confusion Matrix Train: 
[[  362   318  1282]
 [ 1736  2786 16256]
 [    1     4    17]]
Confusion Matrix Dev: 
[[  58   36  178]
 [ 226  334 2009]
 [   1    0    1]]
Confusion Matrix Test: 
[

### Sentence Transformer Faster No PCA

In [37]:
train, dev, test = load_sent_trans_fast_no_pca()

In [38]:
k_means = KMeans(n_clusters=3, random_state=0).fit(train, train_labels)
save_model(k_means, "k_means_sent_trans_fast_no_pca.joblib")

In [39]:
train_preds = k_means.predict(train)
dev_preds = k_means.predict(dev)
test_preds = k_means.predict(test)

In [40]:
computeAllScores(train_preds, dev_preds, test_preds)

Accuracy Train:  0.18153062121078992
Accuracy Dev:  0.1790362293352093
Accuracy Test:  0.18587491215741392
Weighted F1 Train:  0.2747734281083643
Weighted F1 Dev:  0.26935044308319933
Weighted F1 Test:  0.27766552587042764
Macro F1 Train:  0.14011556969253425
Macro F1 Dev:  0.14181942622918112
Macro F1 Test:  0.1460971673361054
Micro F1 Train:  0.1815306212107899
Micro F1 Dev:  0.1790362293352093
Micro F1 Test:  0.18587491215741392
Weighted Recall Train:  0.18153062121078992
Weighted Recall Dev:  0.17903622933520927
Weighted Recall Test:  0.18587491215741392
Macro Recall Train:  0.27789482147876393
Macro Recall Dev:  0.30709095627351757
Macro Recall Test:  0.4852860264815529
Micro Recall Train:  0.18153062121078992
Micro Recall Dev:  0.1790362293352093
Micro Recall Test:  0.18587491215741392
Confusion Matrix Train: 
[[  490   747   725]
 [ 5069  3633 12076]
 [    4     9     9]]
Confusion Matrix Dev: 
[[  68   99  105]
 [ 623  440 1506]
 [   1    0    1]]
Confusion Matrix Test: 
[[  70

### Sentence Transformer Faster PCA

In [41]:
train, dev, test = load_sent_trans_fast_pca()

In [42]:
k_means = KMeans(n_clusters=3, random_state=0).fit(train, train_labels)
save_model(k_means, "k_means_sent_trans_fast_pca.joblib")

In [43]:
train_preds = k_means.predict(train)
dev_preds = k_means.predict(dev)
test_preds = k_means.predict(test)

In [44]:
computeAllScores(train_preds, dev_preds, test_preds)

Accuracy Train:  0.25498638081012215
Accuracy Dev:  0.25606753429475904
Accuracy Test:  0.26317638791286013
Weighted F1 Train:  0.3598936000704762
Weighted F1 Dev:  0.35593078941038625
Weighted F1 Test:  0.3687425678969312
Macro F1 Train:  0.16236791928746389
Macro F1 Dev:  0.16451878372140416
Macro F1 Test:  0.16584946491566122
Micro F1 Train:  0.25498638081012215
Micro F1 Dev:  0.25606753429475904
Micro F1 Test:  0.26317638791286013
Weighted Recall Train:  0.25498638081012215
Weighted Recall Dev:  0.25606753429475904
Weighted Recall Test:  0.26317638791286013
Macro Recall Train:  0.340719445147117
Macro Recall Dev:  0.20951207458460222
Macro Recall Test:  0.20953438745340017
Micro Recall Train:  0.25498638081012215
Micro Recall Dev:  0.25606753429475904
Micro Recall Test:  0.26317638791286013
Confusion Matrix Train: 
[[  724   490   748]
 [12077  5071  3630]
 [    9     4     9]]
Confusion Matrix Dev: 
[[ 105   68   99]
 [1506  623  440]
 [   1    1    0]]
Confusion Matrix Test: 
[[ 

### Sentence Transformer Better No PCA

In [45]:
train, dev, test = load_sent_trans_better_no_pca()

In [46]:
k_means = KMeans(n_clusters=3, random_state=0).fit(train, train_labels)
save_model(k_means, "k_means_sent_trans_best_no_pca.joblib")

In [47]:
train_preds = k_means.predict(train)
dev_preds = k_means.predict(dev)
test_preds = k_means.predict(test)

In [48]:
computeAllScores(train_preds, dev_preds, test_preds)

Accuracy Train:  0.26276249890167824
Accuracy Dev:  0.2655645444952515
Accuracy Test:  0.26879831342234717
Weighted F1 Train:  0.3747948990278369
Weighted F1 Dev:  0.37513415836791636
Weighted F1 Test:  0.38114795393263773
Macro F1 Train:  0.16384026720836667
Macro F1 Dev:  0.16543060063977458
Macro F1 Test:  0.16483106838829087
Micro F1 Train:  0.26276249890167824
Micro F1 Dev:  0.2655645444952515
Micro F1 Test:  0.26879831342234717
Weighted Recall Train:  0.26276249890167824
Weighted Recall Dev:  0.2655645444952515
Weighted Recall Test:  0.26879831342234717
Macro Recall Train:  0.3390013963269374
Macro Recall Dev:  0.18671767071951395
Macro Recall Test:  0.1874955649826456
Micro Recall Train:  0.26276249890167824
Micro Recall Dev:  0.2655645444952515
Micro Recall Test:  0.26879831342234717
Confusion Matrix Train: 
[[  596   546   820]
 [11501  5375  3902]
 [    8     4    10]]
Confusion Matrix Dev: 
[[  81   80  111]
 [1431  674  464]
 [   1    1    0]]
Confusion Matrix Test: 
[[  74

### Sentence Transformer Better PCA

In [49]:
train, dev, test = load_sent_trans_better_pca()

In [50]:
k_means = KMeans(n_clusters=3, random_state=0).fit(train, train_labels)
save_model(k_means, "k_means_sent_trans_best_pca.joblib")

In [51]:
train_preds = k_means.predict(train)
dev_preds = k_means.predict(dev)
test_preds = k_means.predict(test)

In [52]:
computeAllScores(train_preds, dev_preds, test_preds)

Accuracy Train:  0.2627185660311045
Accuracy Dev:  0.2655645444952515
Accuracy Test:  0.26879831342234717
Weighted F1 Train:  0.3748332708818302
Weighted F1 Dev:  0.37513415836791636
Weighted F1 Test:  0.381156395881623
Macro F1 Train:  0.1637930676291687
Macro F1 Dev:  0.16543060063977458
Macro F1 Test:  0.16486310277070693
Micro F1 Train:  0.2627185660311045
Micro F1 Dev:  0.2655645444952515
Micro F1 Test:  0.26879831342234717
Weighted Recall Train:  0.2627185660311045
Weighted Recall Dev:  0.2655645444952515
Weighted Recall Test:  0.26879831342234717
Macro Recall Train:  0.33867764960549235
Macro Recall Dev:  0.18671767071951395
Macro Recall Test:  0.1874955649826456
Micro Recall Train:  0.2627185660311045
Micro Recall Dev:  0.2655645444952515
Micro Recall Test:  0.26879831342234717
Confusion Matrix Train: 
[[  594   546   822]
 [11487  5376  3915]
 [    8     4    10]]
Confusion Matrix Dev: 
[[  81   80  111]
 [1431  674  464]
 [   1    1    0]]
Confusion Matrix Test: 
[[  74   85 