In [1]:

from embeddings_loader import *
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import roc_auc_score
from utils import *

In [2]:

train_labels, dev_labels, test_labels = load_labels()

In [3]:
label_replacement = {
    'Hope_speech': 0,
    'Non_hope_speech': 1,
    'not-English': 2,
}

In [4]:
# Replace labels with numbers
train_labels = [label_replacement[label] for label in train_labels]
dev_labels = [label_replacement[label] for label in dev_labels]
test_labels = [label_replacement[label] for label in test_labels]

In [5]:
logistic_reg = LogisticRegression()
grid = {"penalty" : ['l1', 'l2'], "C" : np.logspace(-3,3,7), "solver" : ['lbfgs', 'liblinear'], "max_iter":[1000]}
gridsearch = GridSearchCV(logistic_reg, param_grid = grid, scoring = "f1_micro", n_jobs=os.cpu_count()//2)

### Glove Twitter 25

In [7]:
gt25_train, gt25_dev, gt25_test = load_glove_twitter_25()

In [8]:
# Set all NaN values to 0
gt25_train = np.nan_to_num(gt25_train)
gt25_dev = np.nan_to_num(gt25_dev)
gt25_test = np.nan_to_num(gt25_test)

In [9]:
grid_results = gridsearch.fit(gt25_train, train_labels)
best_params = grid_results.best_params_
logistic_reg = grid_results.best_estimator_


35 fits failed out of a total of 140.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
35 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\diksh\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\diksh\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 1461, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "c:\Users\diksh\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 447, in _check_solver
    raise ValueError(
ValueError: Solver lbf

In [10]:
print(best_params)

{'C': 0.01, 'max_iter': 1000, 'penalty': 'l2', 'solver': 'liblinear'}


In [11]:

logistic_reg = logistic_reg.fit(gt25_train, train_labels)
save_model(logistic_reg, "logistic_reg_gt25.joblib")

In [12]:
train_preds = logistic_reg.predict(gt25_train)
dev_preds = logistic_reg.predict(gt25_dev)
test_preds = logistic_reg.predict(gt25_test)

In [13]:
computeAllScores(train_preds, dev_preds, test_preds)

Accuracy Train:  0.9128811176522273
Accuracy Dev:  0.9036229335209286
Accuracy Test:  0.9111033028812369
Weighted F1 Train:  0.8713494955888584
Weighted F1 Dev:  0.85787410059692
Weighted F1 Test:  0.8687225094212346
Macro F1 Train:  0.3184911585290693
Macro F1 Dev:  0.31645725548164577
Macro F1 Test:  0.31782803211374644
Micro F1 Train:  0.9128811176522273
Micro F1 Dev:  0.9036229335209286
Micro F1 Test:  0.9111033028812369
Weighted Recall Train:  0.9128811176522273
Weighted Recall Dev:  0.9036229335209286
Weighted Recall Test:  0.9111033028812369
Macro Recall Train:  0.3335032279986408
Macro Recall Dev:  0.3333333333333333
Macro Recall Test:  0.3333333333333333
Micro Recall Train:  0.9128811176522273
Micro Recall Dev:  0.9036229335209286
Micro Recall Test:  0.9111033028812369
Confusion Matrix Train: 
[[    1  1961     0]
 [    0 20778     0]
 [    0    22     0]]
Confusion Matrix Dev: 
[[   0  272    0]
 [   0 2569    0]
 [   0    2    0]]
Confusion Matrix Test: 
[[   0  250    0]
 [

### FastText 300

In [14]:
ft300_train, ft300_dev, ft300_test = load_fasttext_300()

In [15]:
# Set all NaN values to 0
ft300_train = np.nan_to_num(ft300_train)
ft300_dev = np.nan_to_num(ft300_dev)
ft300_test = np.nan_to_num(ft300_test)

In [16]:
grid_results = gridsearch.fit(ft300_train, train_labels)
best_params = grid_results.best_params_
logistic_reg = grid_results.best_estimator_

35 fits failed out of a total of 140.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
35 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\diksh\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\diksh\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 1461, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "c:\Users\diksh\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 447, in _check_solver
    raise ValueError(
ValueError: Solver lbf

In [17]:
print(best_params)

{'C': 1.0, 'max_iter': 1000, 'penalty': 'l2', 'solver': 'lbfgs'}


In [18]:
logistic_reg = logistic_reg.fit(ft300_train, train_labels)
save_model(logistic_reg, "logistic_reg_ft300.joblib")

In [19]:
train_preds = logistic_reg.predict(ft300_train)
dev_preds = logistic_reg.predict(ft300_dev)
test_preds = logistic_reg.predict(ft300_test)

In [20]:
computeAllScores(train_preds, dev_preds, test_preds)

Accuracy Train:  0.9142869695105879
Accuracy Dev:  0.9064368624692226
Accuracy Test:  0.9125087842586086
Weighted F1 Train:  0.8788590497232962
Weighted F1 Dev:  0.8693550919504244
Weighted F1 Test:  0.8752741881019258
Macro F1 Train:  0.34563763231027994
Macro F1 Dev:  0.3529130466769665
Macro F1 Test:  0.3407610668900991
Micro F1 Train:  0.9142869695105879
Micro F1 Dev:  0.9064368624692226
Micro F1 Test:  0.9125087842586086
Weighted Recall Train:  0.9142869695105879
Weighted Recall Dev:  0.9064368624692226
Weighted Recall Test:  0.9125087842586086
Macro Recall Train:  0.3470940162637715
Macro Recall Dev:  0.35190315908379705
Macro Recall Test:  0.34469057719501217
Micro Recall Train:  0.9142869695105879
Micro Recall Dev:  0.9064368624692226
Micro Recall Test:  0.9125087842586086
Confusion Matrix Train: 
[[   86  1876     0]
 [   53 20725     0]
 [    0    22     0]]
Confusion Matrix Dev: 
[[  16  256    0]
 [   8 2561    0]
 [   0    2    0]]
Confusion Matrix Test: 
[[   9  241    0]

### Word2Vec 300

In [21]:
w2v300_train, w2v300_dev, w2v300_test = load_word2vec_300()

In [22]:
# Set all NaN values to 0
w2v300_train = np.nan_to_num(w2v300_train)
w2v300_dev = np.nan_to_num(w2v300_dev)
w2v300_test = np.nan_to_num(w2v300_test)

In [23]:
grid_results = gridsearch.fit(w2v300_train, train_labels)
best_params = grid_results.best_params_
logistic_reg = grid_results.best_estimator_

35 fits failed out of a total of 140.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
35 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\diksh\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\diksh\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 1461, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "c:\Users\diksh\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 447, in _check_solver
    raise ValueError(
ValueError: Solver lbf

In [24]:
logistic_reg = logistic_reg.fit(w2v300_train, train_labels)
save_model(logistic_reg, "logistic_reg_w2v300.joblib")

In [25]:
print(best_params)

{'C': 0.1, 'max_iter': 1000, 'penalty': 'l2', 'solver': 'lbfgs'}


In [26]:
train_preds = logistic_reg.predict(w2v300_train)
dev_preds = logistic_reg.predict(w2v300_dev)
test_preds = logistic_reg.predict(w2v300_test)

In [27]:
computeAllScores(train_preds, dev_preds, test_preds)

Accuracy Train:  0.914243036640014
Accuracy Dev:  0.9064368624692226
Accuracy Test:  0.9104005621925509
Weighted F1 Train:  0.8791989825142942
Weighted F1 Dev:  0.8699112143150298
Weighted F1 Test:  0.8740860037544342
Macro F1 Train:  0.3470623060519927
Macro F1 Dev:  0.35490221977435915
Macro F1 Test:  0.3398700047159173
Micro F1 Train:  0.914243036640014
Micro F1 Dev:  0.9064368624692226
Micro F1 Test:  0.9104005621925509
Weighted Recall Train:  0.914243036640014
Weighted Recall Dev:  0.9064368624692226
Weighted Recall Test:  0.9104005621925509
Macro Recall Train:  0.3478472339352893
Macro Recall Dev:  0.35299889710652654
Macro Recall Test:  0.3439192698290268
Micro Recall Train:  0.914243036640014
Micro Recall Dev:  0.9064368624692226
Micro Recall Test:  0.9104005621925509
Confusion Matrix Train: 
[[   91  1871     0]
 [   59 20719     0]
 [    0    22     0]]
Confusion Matrix Dev: 
[[  17  255    0]
 [   9 2560    0]
 [   0    2    0]]
Confusion Matrix Test: 
[[   9  241    0]
 [  

### TF-IDF PCA (1000 Dims)

In [28]:
tfidf_pca_train, tfidf_pca_dev, tfidf_pca_test = load_tfidf_pca()

In [29]:
grid_results = gridsearch.fit(tfidf_pca_train, train_labels)
best_params = grid_results.best_params_
logistic_reg = grid_results.best_estimator_

35 fits failed out of a total of 140.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
35 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\diksh\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\diksh\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 1461, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "c:\Users\diksh\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 447, in _check_solver
    raise ValueError(
ValueError: Solver lbf

In [30]:
print(best_params)

{'C': 10.0, 'max_iter': 1000, 'penalty': 'l2', 'solver': 'liblinear'}


In [31]:

logistic_reg = logistic_reg.fit(tfidf_pca_train, train_labels)
save_model(logistic_reg, "logistic_reg_tfidf_pca.joblib")

In [32]:
train_preds = logistic_reg.predict(tfidf_pca_train)
dev_preds = logistic_reg.predict(tfidf_pca_dev)
test_preds = logistic_reg.predict(tfidf_pca_test)

In [33]:
computeAllScores(train_preds, dev_preds, test_preds)

Accuracy Train:  0.9358140760917318
Accuracy Dev:  0.9162856137882518
Accuracy Test:  0.9226985242445538
Weighted F1 Train:  0.9260862642946405
Weighted F1 Dev:  0.9051578788710987
Weighted F1 Test:  0.9116437521939569
Macro F1 Train:  0.4944128742208025
Macro F1 Dev:  0.466049692708707
Macro F1 Test:  0.46496183555007087
Micro F1 Train:  0.9358140760917318
Micro F1 Dev:  0.9162856137882518
Micro F1 Test:  0.9226985242445538
Weighted Recall Train:  0.9358140760917318
Weighted Recall Dev:  0.9162856137882518
Weighted Recall Test:  0.9226985242445538
Macro Recall Train:  0.4604974052674116
Macro Recall Dev:  0.44100378571046944
Macro Recall Test:  0.4387772207224579
Micro Recall Train:  0.9358140760917318
Micro Recall Dev:  0.9162856137882518
Micro Recall Test:  0.9226985242445538
Confusion Matrix Train: 
[[  772  1190     0]
 [  249 20529     0]
 [    0    22     0]]
Confusion Matrix Dev: 
[[  94  178    0]
 [  58 2511    0]
 [   0    2    0]]
Confusion Matrix Test: 
[[  84  166    0]
 

### Seentence Transformer Faster No PCA

In [34]:
train, dev, test = load_sent_trans_fast_no_pca()

In [35]:
grid_results = gridsearch.fit(train, train_labels)
best_params = grid_results.best_params_
logistic_reg = grid_results.best_estimator_

35 fits failed out of a total of 140.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
35 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\diksh\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\diksh\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 1461, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "c:\Users\diksh\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 447, in _check_solver
    raise ValueError(
ValueError: Solver lbf

In [36]:
print(best_params)

{'C': 1.0, 'max_iter': 1000, 'penalty': 'l2', 'solver': 'liblinear'}


In [37]:

logistic_reg = logistic_reg.fit(train, train_labels)
save_model(logistic_reg, "logistic_reg_sent_trans_fast_no_pca.joblib")

In [38]:
train_preds = logistic_reg.predict(train)
dev_preds = logistic_reg.predict(dev)
test_preds = logistic_reg.predict(test)

In [39]:
computeAllScores(train_preds, dev_preds, test_preds)

Accuracy Train:  0.9288726825410772
Accuracy Dev:  0.9176925782623989
Accuracy Test:  0.9248067463106114
Weighted F1 Train:  0.9153117608065434
Weighted F1 Dev:  0.9013103392248377
Weighted F1 Test:  0.9112775168450953
Macro F1 Train:  0.4641122403976934
Macro F1 Dev:  0.44951195604016525
Macro F1 Test:  0.45951751150945497
Micro F1 Train:  0.9288726825410772
Micro F1 Dev:  0.9176925782623989
Micro F1 Test:  0.9248067463106114
Weighted Recall Train:  0.9288726825410772
Weighted Recall Dev:  0.9176925782623989
Weighted Recall Test:  0.9248067463106114
Macro Recall Train:  0.4319616755313121
Macro Recall Dev:  0.4207037719720041
Macro Recall Test:  0.4299102712430904
Micro Recall Train:  0.9288726825410772
Micro Recall Dev:  0.9176925782623989
Micro Recall Test:  0.9248067463106114
Confusion Matrix Train: 
[[  603  1359     0]
 [  238 20540     0]
 [    0    22     0]]
Confusion Matrix Dev: 
[[  75  197    0]
 [  35 2534    0]
 [   0    2    0]]
Confusion Matrix Test: 
[[  76  174    0]


### Sentence Transformer Faster PCA

In [40]:
train, dev, test = load_sent_trans_fast_pca()

In [41]:
grid_results = gridsearch.fit(train, train_labels)
best_params = grid_results.best_params_
logistic_reg = grid_results.best_estimator_

35 fits failed out of a total of 140.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
35 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\diksh\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\diksh\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 1461, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "c:\Users\diksh\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 447, in _check_solver
    raise ValueError(
ValueError: Solver lbf

In [42]:
print(best_params)

{'C': 1.0, 'max_iter': 1000, 'penalty': 'l1', 'solver': 'liblinear'}


In [43]:

logistic_reg = logistic_reg.fit(train, train_labels)
save_model(logistic_reg, "logistic_reg_sent_trans_fast_pca.joblib")

In [44]:
train_preds = logistic_reg.predict(train)
dev_preds =logistic_reg.predict(dev)
test_preds = logistic_reg.predict(test)

In [45]:
computeAllScores(train_preds, dev_preds, test_preds)

Accuracy Train:  0.9272032334592742
Accuracy Dev:  0.9162856137882518
Accuracy Test:  0.9251581166549543
Weighted F1 Train:  0.9139051598905698
Weighted F1 Dev:  0.9013350066111487
Weighted F1 Test:  0.9123467976863688
Macro F1 Train:  0.46161651290490496
Macro F1 Dev:  0.4520082267407457
Macro F1 Test:  0.463105918850385
Micro F1 Train:  0.9272032334592742
Micro F1 Dev:  0.9162856137882518
Micro F1 Test:  0.9251581166549543
Weighted Recall Train:  0.9272032334592742
Weighted Recall Dev:  0.9162856137882518
Weighted Recall Test:  0.9251581166549543
Macro Recall Train:  0.43104435227057863
Macro Recall Dev:  0.4245677153695266
Macro Recall Test:  0.4336531687877619
Micro Recall Train:  0.9272032334592742
Micro Recall Dev:  0.9162856137882518
Micro Recall Test:  0.9251581166549543
Confusion Matrix Train: 
[[  601  1361     0]
 [  274 20504     0]
 [    0    22     0]]
Confusion Matrix Dev: 
[[  79  193    0]
 [  43 2526    0]
 [   0    2    0]]
Confusion Matrix Test: 
[[  79  171    0]
 

### Sentence Transformer Better No PCA

In [46]:
train, dev, test = load_sent_trans_better_no_pca()

In [47]:
grid_results = gridsearch.fit(train, train_labels)
best_params = grid_results.best_params_
logistic_reg = grid_results.best_estimator_

35 fits failed out of a total of 140.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
35 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\diksh\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\diksh\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 1461, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "c:\Users\diksh\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 447, in _check_solver
    raise ValueError(
ValueError: Solver lbf

In [48]:
print(best_params)

{'C': 1.0, 'max_iter': 1000, 'penalty': 'l2', 'solver': 'lbfgs'}


In [49]:

logistic_reg = logistic_reg.fit(train, train_labels)
save_model(logistic_reg, "logistic_reg_sent_trans_best_no_pca.joblib")

In [50]:
train_preds = logistic_reg.predict(train)
dev_preds =logistic_reg.predict(dev)
test_preds = logistic_reg.predict(test)

In [51]:
computeAllScores(train_preds, dev_preds, test_preds)

Accuracy Train:  0.9352868816448466
Accuracy Dev:  0.9240239183960605
Accuracy Test:  0.9311314125087843
Weighted F1 Train:  0.9261101311818531
Weighted F1 Dev:  0.9136867360603871
Weighted F1 Test:  0.9216696889708637
Macro F1 Train:  0.49553253878259124
Macro F1 Dev:  0.4839473214912968
Macro F1 Test:  0.48895830301854454
Micro F1 Train:  0.9352868816448465
Micro F1 Dev:  0.9240239183960605
Micro F1 Test:  0.9311314125087843
Weighted Recall Train:  0.9352868816448466
Weighted Recall Dev:  0.9240239183960605
Weighted Recall Test:  0.9311314125087843
Macro Recall Train:  0.46307423096784817
Macro Recall Dev:  0.4548157137514406
Macro Recall Test:  0.4587293996657668
Micro Recall Train:  0.9352868816448466
Micro Recall Dev:  0.9240239183960605
Micro Recall Test:  0.9311314125087843
Confusion Matrix Train: 
[[  790  1172     0]
 [  279 20499     0]
 [    0    22     0]]
Confusion Matrix Dev: 
[[ 104  168    0]
 [  46 2523    0]
 [   0    2    0]]
Confusion Matrix Test: 
[[  98  152    0]

### Sentence Transformer Better PCA

In [52]:
train, dev, test = load_sent_trans_better_pca()

In [53]:
grid_results = gridsearch.fit(train, train_labels)
best_params = grid_results.best_params_
logistic_reg = grid_results.best_estimator_

35 fits failed out of a total of 140.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
35 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\diksh\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\diksh\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 1461, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "c:\Users\diksh\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 447, in _check_solver
    raise ValueError(
ValueError: Solver lbf

In [54]:
print(best_params)

{'C': 1.0, 'max_iter': 1000, 'penalty': 'l2', 'solver': 'lbfgs'}


In [55]:

logistic_reg = logistic_reg.fit(train, train_labels)
save_model(logistic_reg, "logistic_reg_sent_trans_best_pca.joblib")

In [56]:
train_preds = logistic_reg.predict(train)
dev_preds =logistic_reg.predict(dev)
test_preds = logistic_reg.predict(test)

In [57]:
computeAllScores(train_preds, dev_preds, test_preds)

Accuracy Train:  0.9336174325630436
Accuracy Dev:  0.9236721772775237
Accuracy Test:  0.9311314125087843
Weighted F1 Train:  0.9240535983557454
Weighted F1 Dev:  0.912960528756923
Weighted F1 Test:  0.9212422930437894
Macro F1 Train:  0.4904077199598218
Macro F1 Dev:  0.48188597793477134
Macro F1 Test:  0.4872493519552343
Micro F1 Train:  0.9336174325630436
Micro F1 Dev:  0.9236721772775237
Micro F1 Test:  0.9311314125087843
Weighted Recall Train:  0.9336174325630436
Weighted Recall Dev:  0.9236721772775237
Weighted Recall Test:  0.9311314125087843
Macro Recall Train:  0.45861831041595097
Macro Recall Dev:  0.45249448553263266
Macro Recall Test:  0.4563198354544286
Micro Recall Train:  0.9336174325630436
Micro Recall Dev:  0.9236721772775237
Micro Recall Test:  0.9311314125087843
Confusion Matrix Train: 
[[  765  1197     0]
 [  292 20486     0]
 [    0    22     0]]
Confusion Matrix Dev: 
[[ 102  170    0]
 [  45 2524    0]
 [   0    2    0]]
Confusion Matrix Test: 
[[  96  154    0]
