In [1]:
from embeddings_loader import *
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import roc_auc_score


In [2]:
train_labels, dev_labels, test_labels = load_labels()

In [3]:
def computeAllScores(y_pred_train, y_pred_dev, y_pred_test):
    print("Accuracy Train: ", accuracy_score(train_labels, y_pred_train))
    print("Accuracy Dev: ", accuracy_score(dev_labels, y_pred_dev))
    print("Accuracy Test: ", accuracy_score(test_labels, y_pred_test))
    print("F1 Train: ", f1_score(train_labels, y_pred_train, average='macro'))
    print("F1 Dev: ", f1_score(dev_labels, y_pred_dev, average='macro'))
    print("F1 Test: ", f1_score(test_labels, y_pred_test, average='macro'))
    print("Precision Train: ", precision_score(train_labels, y_pred_train, average='macro'))
    print("Precision Dev: ", precision_score(dev_labels, y_pred_dev, average='macro'))
    print("Precision Test: ", precision_score(test_labels, y_pred_test, average='macro'))
    print("Recall Train: ", recall_score(train_labels, y_pred_train, average='macro'))
    print("Recall Dev: ", recall_score(dev_labels, y_pred_dev, average='macro'))
    print("Recall Test: ", recall_score(test_labels, y_pred_test, average='macro'))
    # Confusion Matrix
    print("Confusion Matrix Train: ")
    print(confusion_matrix(train_labels, y_pred_train))
    print("Confusion Matrix Dev: ")
    print(confusion_matrix(dev_labels, y_pred_dev))
    print("Confusion Matrix Test: ")
    print(confusion_matrix(test_labels, y_pred_test))

In [4]:
label_replacement = {
    'Hope_speech': 0,
    'Non_hope_speech': 1,
    'not-English': 2,
}

In [5]:
# Replace labels with numbers
train_labels = [label_replacement[label] for label in train_labels]
dev_labels = [label_replacement[label] for label in dev_labels]
test_labels = [label_replacement[label] for label in test_labels]

In [6]:
extratrees_classifier = ExtraTreesClassifier(verbose=3,n_jobs=-1)
gridsearch = GridSearchCV(extratrees_classifier, param_grid = {
    "n_estimators": [100, 125, 150],
    "max_depth": [5, 10, 15, 20],
    "min_samples_split": [2, 5, 10],
    "bootstrap": [True, False],
}, scoring = "f1_micro")

### Glove Twitter 25

In [7]:
gt25_train, gt25_dev, gt25_test = load_glove_twitter_25()

In [8]:
# Set all NaN values to 0
gt25_train = np.nan_to_num(gt25_train)
gt25_dev = np.nan_to_num(gt25_dev)
gt25_test = np.nan_to_num(gt25_test)

In [9]:
grid_results = gridsearch.fit(gt25_train, train_labels)
best_params = grid_results.best_params_
extratrees_classifier = grid_results.best_estimator_
print(best_params)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    3.7s
[Parallel(n_jobs=-1)]: Done  77 out of 100 | elapsed:    4.2s remaining:    1.2s
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:    5.3s finished
[Parallel(n_jobs=12)]: Using backend ThreadingBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   8 tasks      | elapsed:    0.8s
[Parallel(n_jobs=12)]: Done 100 out of 100 | elapsed:    0.8s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:    1.1s finished
[Parallel(n_jobs=12)]: Using backend ThreadingBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   8 tasks      | elapsed:    0.4s
[Parallel(n_jobs=12)]: Done 100 out of 100 | elapsed:    0.4s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent w

building tree 1 of 150
building tree 2 of 150
building tree 3 of 150
building tree 4 of 150
building tree 5 of 150
building tree 6 of 150
building tree 7 of 150building tree 8 of 150

building tree 9 of 150
building tree 10 of 150
building tree 11 of 150building tree 12 of 150

building tree 13 of 150
building tree 14 of 150building tree 15 of 150
building tree 16 of 150
building tree 17 of 150
building tree 18 of 150

building tree 19 of 150
building tree 20 of 150building tree 21 of 150

building tree 22 of 150
building tree 23 of 150
building tree 24 of 150
building tree 25 of 150building tree 26 of 150

building tree 27 of 150
building tree 28 of 150
building tree 29 of 150
building tree 30 of 150building tree 31 of 150

building tree 32 of 150
building tree 33 of 150
building tree 34 of 150
building tree 35 of 150
building tree 36 of 150
building tree 37 of 150
building tree 38 of 150
building tree 39 of 150building tree 40 of 150

building tree 41 of 150
building tree 42 of 150
b

[Parallel(n_jobs=-1)]: Done 104 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:    0.5s finished


In [10]:
extratrees_classifier = extratrees_classifier.fit(gt25_train, train_labels)

building tree 1 of 150
building tree 2 of 150
building tree 3 of 150
building tree 4 of 150
building tree 5 of 150
building tree 6 of 150
building tree 7 of 150
building tree 8 of 150
building tree 9 of 150
building tree 10 of 150
building tree 11 of 150building tree 12 of 150

building tree 13 of 150
building tree 14 of 150
building tree 15 of 150building tree 16 of 150

building tree 17 of 150
building tree 18 of 150
building tree 19 of 150
building tree 20 of 150
building tree 21 of 150
building tree 22 of 150building tree 23 of 150

building tree 24 of 150
building tree 25 of 150


[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    0.0s


building tree 26 of 150building tree 27 of 150

building tree 28 of 150
building tree 29 of 150building tree 30 of 150building tree 31 of 150

building tree 32 of 150

building tree 33 of 150
building tree 34 of 150
building tree 35 of 150
building tree 36 of 150
building tree 37 of 150building tree 38 of 150

building tree 39 of 150
building tree 40 of 150building tree 41 of 150

building tree 42 of 150building tree 43 of 150

building tree 44 of 150building tree 45 of 150

building tree 46 of 150
building tree 47 of 150
building tree 48 of 150
building tree 49 of 150
building tree 50 of 150
building tree 51 of 150
building tree 52 of 150building tree 53 of 150

building tree 54 of 150
building tree 55 of 150building tree 56 of 150building tree 57 of 150building tree 58 of 150



building tree 59 of 150
building tree 60 of 150
building tree 61 of 150
building tree 62 of 150
building tree 63 of 150
building tree 64 of 150
building tree 65 of 150building tree 66 of 150building tree 67 o

[Parallel(n_jobs=-1)]: Done 104 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:    0.5s finished


In [11]:
train_preds = extratrees_classifier.predict(gt25_train)
dev_preds = extratrees_classifier.predict(gt25_dev)
test_preds = extratrees_classifier.predict(gt25_test)

[Parallel(n_jobs=12)]: Using backend ThreadingBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   8 tasks      | elapsed:    0.0s
[Parallel(n_jobs=12)]: Done 104 tasks      | elapsed:    0.0s
[Parallel(n_jobs=12)]: Done 150 out of 150 | elapsed:    0.0s finished
[Parallel(n_jobs=12)]: Using backend ThreadingBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   8 tasks      | elapsed:    0.0s
[Parallel(n_jobs=12)]: Done 104 tasks      | elapsed:    0.0s
[Parallel(n_jobs=12)]: Done 150 out of 150 | elapsed:    0.0s finished
[Parallel(n_jobs=12)]: Using backend ThreadingBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   8 tasks      | elapsed:    0.0s
[Parallel(n_jobs=12)]: Done 104 tasks      | elapsed:    0.0s
[Parallel(n_jobs=12)]: Done 150 out of 150 | elapsed:    0.0s finished


In [12]:
computeAllScores(train_preds, dev_preds, test_preds)

Accuracy Train:  0.9480713469818118
Accuracy Dev:  0.9078438269433696
Accuracy Test:  0.9139142656359803
F1 Train:  0.7508871182549127
F1 Dev:  0.3516252626374985
F1 Test:  0.3529593085780835
Precision Train:  0.9816509586866632
Precision Dev:  0.58055063913471
Precision Test:  0.5507567574278939
Recall Train:  0.6495221528005851
Recall Dev:  0.3513264297544631
Recall Test:  0.3512286926340147
Confusion Matrix Train: 
[[  791  1171     0]
 [    1 20777     0]
 [    0    10    12]]
Confusion Matrix Dev: 
[[  15  257    0]
 [   3 2566    0]
 [   0    2    0]]
Confusion Matrix Test: 
[[  14  236    0]
 [   5 2587    1]
 [   0    3    0]]


  _warn_prf(average, modifier, msg_start, len(result))


### FastText 300 

In [13]:
ft300_train, ft300_dev, ft300_test = load_fasttext_300()

In [14]:
# Set all NaN values to 0
ft300_train = np.nan_to_num(ft300_train)
ft300_dev = np.nan_to_num(ft300_dev)
ft300_test = np.nan_to_num(ft300_test)

In [15]:
grid_results = gridsearch.fit(ft300_train, train_labels)
best_params = grid_results.best_params_
extratrees_classifier = grid_results.best_estimator_
print(best_params)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done  77 out of 100 | elapsed:    0.5s remaining:    0.1s
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:    0.6s finished
[Parallel(n_jobs=12)]: Using backend ThreadingBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   8 tasks      | elapsed:    0.0s
[Parallel(n_jobs=12)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:    0.5s finished
[Parallel(n_jobs=12)]: Using backend ThreadingBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   8 tasks      | elapsed:    0.0s
[Parallel(n_jobs=12)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent w

building tree 1 of 125
building tree 2 of 125
building tree 3 of 125
building tree 4 of 125building tree 5 of 125
building tree 6 of 125
building tree 7 of 125

building tree 8 of 125
building tree 9 of 125
building tree 10 of 125
building tree 11 of 125
building tree 12 of 125
building tree 13 of 125building tree 14 of 125

building tree 15 of 125
building tree 16 of 125
building tree 17 of 125
building tree 18 of 125
building tree 19 of 125
building tree 20 of 125
building tree 21 of 125
building tree 22 of 125
building tree 23 of 125
building tree 24 of 125
building tree 25 of 125
building tree 26 of 125building tree 27 of 125

building tree 28 of 125
building tree 29 of 125


[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    0.9s


building tree 30 of 125
building tree 31 of 125
building tree 32 of 125
building tree 33 of 125
building tree 34 of 125
building tree 35 of 125
building tree 36 of 125
building tree 37 of 125
building tree 38 of 125
building tree 39 of 125
building tree 40 of 125
building tree 41 of 125
building tree 42 of 125
building tree 43 of 125
building tree 44 of 125
building tree 45 of 125
building tree 46 of 125building tree 47 of 125

building tree 48 of 125
building tree 49 of 125
building tree 50 of 125
building tree 51 of 125
building tree 52 of 125
building tree 53 of 125
building tree 54 of 125
building tree 55 of 125
building tree 56 of 125
building tree 57 of 125
building tree 58 of 125
building tree 59 of 125
building tree 60 of 125
building tree 61 of 125
building tree 62 of 125
building tree 63 of 125
building tree 64 of 125
building tree 65 of 125
building tree 66 of 125
building tree 67 of 125
building tree 68 of 125
building tree 69 of 125
building tree 70 of 125
building tree 71

[Parallel(n_jobs=-1)]: Done 125 out of 125 | elapsed:    4.2s finished


In [16]:
extratrees_classifier = extratrees_classifier.fit(ft300_train, train_labels)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 12 concurrent workers.


building tree 1 of 125
building tree 2 of 125building tree 3 of 125

building tree 4 of 125building tree 5 of 125
building tree 6 of 125building tree 7 of 125
building tree 8 of 125
building tree 9 of 125

building tree 10 of 125

building tree 11 of 125
building tree 12 of 125
building tree 13 of 125
building tree 14 of 125
building tree 15 of 125
building tree 16 of 125
building tree 17 of 125
building tree 18 of 125
building tree 19 of 125
building tree 20 of 125
building tree 21 of 125

[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    0.9s



building tree 22 of 125
building tree 23 of 125
building tree 24 of 125
building tree 25 of 125
building tree 26 of 125
building tree 27 of 125
building tree 28 of 125
building tree 29 of 125
building tree 30 of 125
building tree 31 of 125
building tree 32 of 125
building tree 33 of 125
building tree 34 of 125
building tree 35 of 125building tree 36 of 125building tree 37 of 125
building tree 38 of 125
building tree 39 of 125
building tree 40 of 125


building tree 41 of 125building tree 42 of 125
building tree 43 of 125
building tree 44 of 125

building tree 45 of 125
building tree 46 of 125
building tree 47 of 125
building tree 48 of 125
building tree 49 of 125
building tree 50 of 125
building tree 51 of 125building tree 52 of 125
building tree 53 of 125
building tree 54 of 125
building tree 55 of 125

building tree 56 of 125
building tree 57 of 125building tree 58 of 125
building tree 59 of 125

building tree 60 of 125
building tree 61 of 125
building tree 62 of 125
building tree 6

[Parallel(n_jobs=-1)]: Done 125 out of 125 | elapsed:    3.8s finished


In [17]:
train_preds = extratrees_classifier.predict(ft300_train)
dev_preds = extratrees_classifier.predict(ft300_dev)
test_preds = extratrees_classifier.predict(ft300_test)

[Parallel(n_jobs=12)]: Using backend ThreadingBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   8 tasks      | elapsed:    0.0s
[Parallel(n_jobs=12)]: Done 125 out of 125 | elapsed:    0.0s finished
[Parallel(n_jobs=12)]: Using backend ThreadingBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   8 tasks      | elapsed:    0.0s
[Parallel(n_jobs=12)]: Done 125 out of 125 | elapsed:    0.0s finished
[Parallel(n_jobs=12)]: Using backend ThreadingBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   8 tasks      | elapsed:    0.0s
[Parallel(n_jobs=12)]: Done 125 out of 125 | elapsed:    0.0s finished


In [18]:
computeAllScores(train_preds, dev_preds, test_preds)

Accuracy Train:  0.9800105438889377
Accuracy Dev:  0.9113612381287373
Accuracy Test:  0.9142656359803233
F1 Train:  0.8904145501827498
F1 Dev:  0.37512089599621995
F1 Test:  0.3617832621013957
Precision Train:  0.9924479140645706
Precision Dev:  0.592834854050638
Precision Test:  0.536314932554734
Recall Train:  0.8181342879939049
Recall Dev:  0.3646770697379769
Recall Test:  0.35617637228435534
Confusion Matrix Train: 
[[ 1516   446     0]
 [    2 20776     0]
 [    0     7    15]]
Confusion Matrix Dev: 
[[  26  246    0]
 [   4 2565    0]
 [   0    2    0]]
Confusion Matrix Test: 
[[  18  232    0]
 [   8 2584    1]
 [   0    3    0]]


  _warn_prf(average, modifier, msg_start, len(result))


### Word2Vec 300

In [19]:
w2v300_train, w2v300_dev, w2v300_test = load_word2vec_300()

In [20]:
# Set all NaN values to 0
w2v300_train = np.nan_to_num(w2v300_train)
w2v300_dev = np.nan_to_num(w2v300_dev)
w2v300_test = np.nan_to_num(w2v300_test)

In [21]:
grid_results = gridsearch.fit(w2v300_train, train_labels)
best_params = grid_results.best_params_
extratrees_classifier = grid_results.best_estimator_
print(best_params)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done  77 out of 100 | elapsed:    0.7s remaining:    0.1s
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:    0.9s finished
[Parallel(n_jobs=12)]: Using backend ThreadingBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   8 tasks      | elapsed:    0.8s
[Parallel(n_jobs=12)]: Done 100 out of 100 | elapsed:    0.8s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:    1.2s finished
[Parallel(n_jobs=12)]: Using backend ThreadingBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   8 tasks      | elapsed:    0.9s
[Parallel(n_jobs=12)]: Done 100 out of 100 | elapsed:    0.9s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent w

building tree 1 of 125
building tree 2 of 125
building tree 3 of 125building tree 4 of 125
building tree 5 of 125
building tree 6 of 125
building tree 7 of 125

building tree 8 of 125
building tree 9 of 125
building tree 10 of 125
building tree 11 of 125
building tree 12 of 125
building tree 13 of 125
building tree 14 of 125
building tree 15 of 125
building tree 16 of 125
building tree 17 of 125building tree 18 of 125
building tree 19 of 125

building tree 20 of 125
building tree 21 of 125
building tree 22 of 125building tree 23 of 125

building tree 24 of 125


[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    0.2s


building tree 25 of 125
building tree 26 of 125
building tree 27 of 125
building tree 28 of 125
building tree 29 of 125
building tree 30 of 125
building tree 31 of 125
building tree 32 of 125
building tree 33 of 125
building tree 34 of 125
building tree 35 of 125
building tree 36 of 125
building tree 37 of 125
building tree 38 of 125
building tree 39 of 125
building tree 40 of 125
building tree 41 of 125
building tree 42 of 125
building tree 43 of 125
building tree 44 of 125
building tree 45 of 125
building tree 46 of 125
building tree 47 of 125
building tree 48 of 125
building tree 49 of 125
building tree 50 of 125
building tree 51 of 125
building tree 52 of 125
building tree 53 of 125
building tree 54 of 125
building tree 55 of 125
building tree 56 of 125building tree 57 of 125

building tree 58 of 125
building tree 59 of 125
building tree 60 of 125building tree 61 of 125

building tree 62 of 125
building tree 63 of 125
building tree 64 of 125building tree 65 of 125

building tree 66

[Parallel(n_jobs=-1)]: Done 125 out of 125 | elapsed:    2.9s finished


In [22]:
extratrees_classifier = extratrees_classifier.fit(w2v300_train, train_labels)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 12 concurrent workers.


building tree 1 of 125
building tree 2 of 125
building tree 3 of 125
building tree 4 of 125
building tree 5 of 125
building tree 6 of 125
building tree 7 of 125
building tree 8 of 125
building tree 9 of 125
building tree 10 of 125
building tree 11 of 125
building tree 12 of 125
building tree 13 of 125
building tree 14 of 125
building tree 15 of 125
building tree 16 of 125
building tree 17 of 125
building tree 18 of 125
building tree 19 of 125
building tree 20 of 125
building tree 21 of 125
building tree 22 of 125
building tree 23 of 125
building tree 24 of 125


[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    0.9s


building tree 25 of 125
building tree 26 of 125
building tree 27 of 125
building tree 28 of 125
building tree 29 of 125
building tree 30 of 125
building tree 31 of 125
building tree 32 of 125
building tree 33 of 125building tree 34 of 125
building tree 35 of 125
building tree 36 of 125

building tree 37 of 125building tree 38 of 125

building tree 39 of 125
building tree 40 of 125
building tree 41 of 125
building tree 42 of 125
building tree 43 of 125
building tree 44 of 125
building tree 45 of 125
building tree 46 of 125
building tree 47 of 125building tree 48 of 125

building tree 49 of 125
building tree 50 of 125
building tree 51 of 125
building tree 52 of 125
building tree 53 of 125
building tree 54 of 125
building tree 55 of 125
building tree 56 of 125
building tree 57 of 125
building tree 58 of 125
building tree 59 of 125
building tree 60 of 125
building tree 61 of 125
building tree 62 of 125
building tree 63 of 125
building tree 64 of 125
building tree 65 of 125
building tree 66

[Parallel(n_jobs=-1)]: Done 125 out of 125 | elapsed:    4.0s finished


In [23]:
train_preds = extratrees_classifier.predict(w2v300_train)
dev_preds = extratrees_classifier.predict(w2v300_dev)
test_preds = extratrees_classifier.predict(w2v300_test)

[Parallel(n_jobs=12)]: Using backend ThreadingBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   8 tasks      | elapsed:    0.5s
[Parallel(n_jobs=12)]: Done 125 out of 125 | elapsed:    0.7s finished
[Parallel(n_jobs=12)]: Using backend ThreadingBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   8 tasks      | elapsed:    0.4s
[Parallel(n_jobs=12)]: Done 125 out of 125 | elapsed:    0.4s finished
[Parallel(n_jobs=12)]: Using backend ThreadingBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   8 tasks      | elapsed:    0.5s
[Parallel(n_jobs=12)]: Done 125 out of 125 | elapsed:    0.5s finished


In [24]:
computeAllScores(train_preds, dev_preds, test_preds)

Accuracy Train:  0.9768913100782005
Accuracy Dev:  0.9110094970102005
Accuracy Test:  0.9149683766690091
F1 Train:  0.8711531577969766
F1 Dev:  0.3730377141935493
F1 Test:  0.3663843047558257
Precision Train:  0.9911249127056342
Precision Dev:  0.5911942749307649
Precision Test:  0.543857869736464
Recall Train:  0.7912439983270015
Recall Dev:  0.3634515795418985
Recall Test:  0.358843038951022
Confusion Matrix Train: 
[[ 1447   515     0]
 [    3 20775     0]
 [    0     8    14]]
Confusion Matrix Dev: 
[[  25  247    0]
 [   4 2565    0]
 [   0    2    0]]
Confusion Matrix Test: 
[[  20  230    0]
 [   8 2584    1]
 [   0    3    0]]


  _warn_prf(average, modifier, msg_start, len(result))


### TF-IDF PCA (1000 Dims)

In [25]:
tfidf_pca_train, tfidf_pca_dev, tfidf_pca_test = load_tfidf_pca()

In [26]:
grid_results = gridsearch.fit(tfidf_pca_train, train_labels)
best_params = grid_results.best_params_
extratrees_classifier = grid_results.best_estimator_
print(best_params)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:    1.9s finished
[Parallel(n_jobs=12)]: Using backend ThreadingBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   8 tasks      | elapsed:    0.4s
[Parallel(n_jobs=12)]: Done 100 out of 100 | elapsed:    0.4s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:    2.0s finished
[Parallel(n_jobs=12)]: Using backend ThreadingBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   8 tasks      | elapsed:    0.6s
[Parallel(n_jobs=12)]: Done 100 out of 100 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    0.3s
[Parallel(n

building tree 1 of 150building tree 2 of 150building tree 3 of 150
building tree 4 of 150


building tree 5 of 150
building tree 6 of 150
building tree 7 of 150
building tree 8 of 150
building tree 9 of 150
building tree 10 of 150
building tree 11 of 150
building tree 12 of 150
building tree 13 of 150
building tree 14 of 150
building tree 15 of 150building tree 16 of 150

building tree 17 of 150
building tree 18 of 150
building tree 19 of 150

[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    1.9s


building tree 20 of 150
building tree 21 of 150
building tree 22 of 150
building tree 23 of 150
building tree 24 of 150

building tree 25 of 150
building tree 26 of 150
building tree 27 of 150
building tree 28 of 150
building tree 29 of 150
building tree 30 of 150
building tree 31 of 150
building tree 32 of 150
building tree 33 of 150
building tree 34 of 150
building tree 35 of 150
building tree 36 of 150
building tree 37 of 150
building tree 38 of 150
building tree 39 of 150
building tree 40 of 150
building tree 41 of 150
building tree 42 of 150
building tree 43 of 150
building tree 44 of 150
building tree 45 of 150
building tree 46 of 150
building tree 47 of 150
building tree 48 of 150
building tree 49 of 150
building tree 50 of 150
building tree 51 of 150
building tree 52 of 150
building tree 53 of 150
building tree 54 of 150
building tree 55 of 150
building tree 56 of 150
building tree 57 of 150
building tree 58 of 150
building tree 59 of 150
building tree 60 of 150
building tree 6

[Parallel(n_jobs=-1)]: Done 104 tasks      | elapsed:    8.8s


building tree 118 of 150
building tree 119 of 150
building tree 120 of 150
building tree 121 of 150
building tree 122 of 150
building tree 123 of 150
building tree 124 of 150
building tree 125 of 150
building tree 126 of 150
building tree 127 of 150
building tree 128 of 150
building tree 129 of 150
building tree 130 of 150
building tree 131 of 150
building tree 132 of 150
building tree 133 of 150
building tree 134 of 150
building tree 135 of 150
building tree 136 of 150
building tree 137 of 150
building tree 138 of 150
building tree 139 of 150
building tree 140 of 150
building tree 141 of 150
building tree 142 of 150
building tree 143 of 150
building tree 144 of 150
building tree 145 of 150
building tree 146 of 150
building tree 147 of 150
building tree 148 of 150
building tree 149 of 150
building tree 150 of 150
{'bootstrap': False, 'max_depth': 20, 'min_samples_split': 2, 'n_estimators': 150}


[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:   12.2s finished


In [27]:
extratrees_classifier = extratrees_classifier.fit(tfidf_pca_train, train_labels)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 12 concurrent workers.


building tree 1 of 150
building tree 2 of 150
building tree 3 of 150
building tree 4 of 150
building tree 5 of 150
building tree 6 of 150
building tree 7 of 150
building tree 8 of 150
building tree 9 of 150
building tree 10 of 150
building tree 11 of 150
building tree 12 of 150
building tree 13 of 150
building tree 14 of 150
building tree 15 of 150
building tree 16 of 150
building tree 17 of 150
building tree 18 of 150
building tree 19 of 150
building tree 20 of 150
building tree 21 of 150
building tree 22 of 150
building tree 23 of 150
building tree 24 of 150
building tree 25 of 150


[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    1.9s


building tree 26 of 150
building tree 27 of 150
building tree 28 of 150
building tree 29 of 150
building tree 30 of 150building tree 31 of 150

building tree 32 of 150
building tree 33 of 150
building tree 34 of 150
building tree 35 of 150
building tree 36 of 150
building tree 37 of 150
building tree 38 of 150building tree 39 of 150

building tree 40 of 150
building tree 41 of 150
building tree 42 of 150
building tree 43 of 150
building tree 44 of 150
building tree 45 of 150
building tree 46 of 150
building tree 47 of 150
building tree 48 of 150
building tree 49 of 150
building tree 50 of 150
building tree 51 of 150
building tree 52 of 150
building tree 53 of 150
building tree 54 of 150
building tree 55 of 150
building tree 56 of 150
building tree 57 of 150
building tree 58 of 150
building tree 59 of 150
building tree 60 of 150
building tree 61 of 150
building tree 62 of 150
building tree 63 of 150
building tree 64 of 150
building tree 65 of 150
building tree 66 of 150
building tree 67

[Parallel(n_jobs=-1)]: Done 104 tasks      | elapsed:    8.9s


building tree 119 of 150
building tree 120 of 150
building tree 121 of 150
building tree 122 of 150
building tree 123 of 150
building tree 124 of 150
building tree 125 of 150
building tree 126 of 150
building tree 127 of 150
building tree 128 of 150
building tree 129 of 150
building tree 130 of 150
building tree 131 of 150
building tree 132 of 150
building tree 133 of 150
building tree 134 of 150
building tree 135 of 150
building tree 136 of 150
building tree 137 of 150
building tree 138 of 150
building tree 139 of 150
building tree 140 of 150
building tree 141 of 150
building tree 142 of 150
building tree 143 of 150
building tree 144 of 150
building tree 145 of 150
building tree 146 of 150
building tree 147 of 150
building tree 148 of 150
building tree 149 of 150
building tree 150 of 150


[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:   12.5s finished


In [28]:
train_preds = extratrees_classifier.predict(tfidf_pca_train)
dev_preds = extratrees_classifier.predict(tfidf_pca_dev)
test_preds = extratrees_classifier.predict(tfidf_pca_test)

[Parallel(n_jobs=12)]: Using backend ThreadingBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   8 tasks      | elapsed:    0.8s
[Parallel(n_jobs=12)]: Done 104 tasks      | elapsed:    1.0s
[Parallel(n_jobs=12)]: Done 150 out of 150 | elapsed:    1.1s finished
[Parallel(n_jobs=12)]: Using backend ThreadingBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   8 tasks      | elapsed:    0.1s
[Parallel(n_jobs=12)]: Done 104 tasks      | elapsed:    0.1s
[Parallel(n_jobs=12)]: Done 150 out of 150 | elapsed:    0.2s finished
[Parallel(n_jobs=12)]: Using backend ThreadingBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   8 tasks      | elapsed:    0.0s
[Parallel(n_jobs=12)]: Done 104 tasks      | elapsed:    0.0s
[Parallel(n_jobs=12)]: Done 150 out of 150 | elapsed:    0.0s finished


In [29]:
computeAllScores(train_preds, dev_preds, test_preds)

Accuracy Train:  0.9670942799402513
Accuracy Dev:  0.9099542736545903
Accuracy Test:  0.9125087842586086
F1 Train:  0.6138695484135739
F1 Dev:  0.3628530199958771
F1 Test:  0.3476464839077867
Precision Train:  0.9881468827170935
Precision Dev:  0.6063506386087031
Precision Test:  0.5049068176456711
Recall Train:  0.5582887175304526
Recall Dev:  0.3575836329082041
Recall Test:  0.3483049235120195
Confusion Matrix Train: 
[[ 1235   727     0]
 [    1 20777     0]
 [    0    21     1]]
Confusion Matrix Dev: 
[[  20  252    0]
 [   2 2567    0]
 [   0    2    0]]
Confusion Matrix Test: 
[[  12  238    0]
 [   8 2585    0]
 [   0    3    0]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### Sentence Transformer Faster No PCA

In [7]:
train, dev, test = load_sent_trans_fast_no_pca()

In [8]:
grid_results = gridsearch.fit(train, train_labels)
best_params = grid_results.best_params_
extratrees_classifier = grid_results.best_estimator_
print(best_params)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    3.0s
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:    4.0s finished
[Parallel(n_jobs=12)]: Using backend ThreadingBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   8 tasks      | elapsed:    0.2s
[Parallel(n_jobs=12)]: Done 100 out of 100 | elapsed:    0.3s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:    1.2s finished
[Parallel(n_jobs=12)]: Using backend ThreadingBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   8 tasks      | elapsed:    0.0s
[Parallel(n_jobs=12)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    0.2s
[Parallel(n

building tree 1 of 100building tree 2 of 100
building tree 3 of 100
building tree 4 of 100building tree 5 of 100


building tree 6 of 100
building tree 7 of 100
building tree 8 of 100
building tree 9 of 100building tree 10 of 100
building tree 11 of 100building tree 12 of 100


building tree 13 of 100
building tree 14 of 100
building tree 15 of 100
building tree 16 of 100
building tree 17 of 100
building tree 18 of 100
building tree 19 of 100
building tree 20 of 100building tree 21 of 100
building tree 22 of 100
building tree 23 of 100

building tree 24 of 100
building tree 25 of 100
building tree 26 of 100
building tree 27 of 100


[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    0.6s


building tree 28 of 100
building tree 29 of 100
building tree 30 of 100
building tree 31 of 100
building tree 32 of 100
building tree 33 of 100
building tree 34 of 100
building tree 35 of 100
building tree 36 of 100
building tree 37 of 100
building tree 38 of 100
building tree 39 of 100
building tree 40 of 100
building tree 41 of 100
building tree 42 of 100
building tree 43 of 100
building tree 44 of 100
building tree 45 of 100
building tree 46 of 100
building tree 47 of 100
building tree 48 of 100
building tree 49 of 100
building tree 50 of 100
building tree 51 of 100
building tree 52 of 100building tree 53 of 100

building tree 54 of 100
building tree 55 of 100
building tree 56 of 100
building tree 57 of 100
building tree 58 of 100
building tree 59 of 100
building tree 60 of 100
building tree 61 of 100
building tree 62 of 100
building tree 63 of 100
building tree 64 of 100
building tree 65 of 100
building tree 66 of 100
building tree 67 of 100
building tree 68 of 100
building tree 69

[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:    4.0s finished


In [9]:
extratrees_classifier = extratrees_classifier.fit(train, train_labels)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 12 concurrent workers.


building tree 1 of 100
building tree 2 of 100
building tree 3 of 100
building tree 4 of 100
building tree 5 of 100
building tree 6 of 100
building tree 7 of 100building tree 8 of 100
building tree 9 of 100
building tree 10 of 100
building tree 11 of 100

building tree 12 of 100
building tree 13 of 100
building tree 14 of 100
building tree 15 of 100
building tree 16 of 100
building tree 17 of 100
building tree 18 of 100
building tree 19 of 100
building tree 20 of 100
building tree 21 of 100
building tree 22 of 100
building tree 23 of 100
building tree 24 of 100


[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    0.3s


building tree 25 of 100
building tree 26 of 100
building tree 27 of 100
building tree 28 of 100
building tree 29 of 100
building tree 30 of 100
building tree 31 of 100
building tree 32 of 100
building tree 33 of 100
building tree 34 of 100
building tree 35 of 100
building tree 36 of 100
building tree 37 of 100
building tree 38 of 100
building tree 39 of 100
building tree 40 of 100
building tree 41 of 100
building tree 42 of 100
building tree 43 of 100
building tree 44 of 100
building tree 45 of 100
building tree 46 of 100
building tree 47 of 100
building tree 48 of 100
building tree 49 of 100
building tree 50 of 100
building tree 51 of 100
building tree 52 of 100
building tree 53 of 100
building tree 54 of 100
building tree 55 of 100
building tree 56 of 100
building tree 57 of 100
building tree 58 of 100
building tree 59 of 100
building tree 60 of 100
building tree 61 of 100
building tree 62 of 100
building tree 63 of 100
building tree 64 of 100
building tree 65 of 100
building tree 66

[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:    2.6s finished


In [10]:
train_preds = extratrees_classifier.predict(train)
dev_preds = extratrees_classifier.predict(dev)
test_preds = extratrees_classifier.predict(test)

[Parallel(n_jobs=12)]: Using backend ThreadingBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   8 tasks      | elapsed:    0.0s
[Parallel(n_jobs=12)]: Done 100 out of 100 | elapsed:    0.2s finished
[Parallel(n_jobs=12)]: Using backend ThreadingBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   8 tasks      | elapsed:    0.0s
[Parallel(n_jobs=12)]: Done 100 out of 100 | elapsed:    0.1s finished
[Parallel(n_jobs=12)]: Using backend ThreadingBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   8 tasks      | elapsed:    0.0s
[Parallel(n_jobs=12)]: Done 100 out of 100 | elapsed:    0.1s finished


In [11]:
computeAllScores(train_preds, dev_preds, test_preds)

  _warn_prf(average, modifier, msg_start, len(result))


Accuracy Train:  0.9972761620244267
Accuracy Dev:  0.9166373549067885
Accuracy Test:  0.9191848208011244
F1 Train:  0.9518625478475662
F1 Dev:  0.4123681286614806
F1 Test:  0.3996463747182826
Precision Train:  0.9985327439694615
Precision Dev:  0.5848615277596626
Precision Test:  0.5574722917411513
Recall Train:  0.915019984488309
Recall Dev:  0.3885381127928011
Recall Test:  0.3796621673736984
Confusion Matrix Train: 
[[ 1908    54     0]
 [    3 20775     0]
 [    0     5    17]]
Confusion Matrix Dev: 
[[  46  226    0]
 [   9 2560    0]
 [   0    2    0]]
Confusion Matrix Test: 
[[  36  214    0]
 [  12 2580    1]
 [   0    3    0]]


### Sentence Transformer Faster PCA

In [12]:
train, dev, test = load_sent_trans_fast_pca()

In [13]:
grid_results = gridsearch.fit(train, train_labels)
best_params = grid_results.best_params_
extratrees_classifier = grid_results.best_estimator_
print(best_params)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:    0.9s finished
[Parallel(n_jobs=12)]: Using backend ThreadingBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   8 tasks      | elapsed:    0.0s
[Parallel(n_jobs=12)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:    0.8s finished
[Parallel(n_jobs=12)]: Using backend ThreadingBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   8 tasks      | elapsed:    0.1s
[Parallel(n_jobs=12)]: Done 100 out of 100 | elapsed:    0.2s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    0.2s
[Parallel(n

building tree 1 of 125
building tree 2 of 125
building tree 3 of 125
building tree 4 of 125
building tree 5 of 125
building tree 6 of 125
building tree 7 of 125building tree 8 of 125

building tree 9 of 125building tree 10 of 125

building tree 11 of 125
building tree 12 of 125
building tree 13 of 125
building tree 14 of 125
building tree 15 of 125
building tree 16 of 125
building tree 17 of 125
building tree 18 of 125
building tree 19 of 125
building tree 20 of 125building tree 21 of 125

building tree 22 of 125
building tree 23 of 125
building tree 24 of 125


[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    0.1s


building tree 25 of 125
building tree 26 of 125
building tree 27 of 125
building tree 28 of 125
building tree 29 of 125
building tree 30 of 125
building tree 31 of 125
building tree 32 of 125
building tree 33 of 125
building tree 34 of 125
building tree 35 of 125
building tree 36 of 125
building tree 37 of 125
building tree 38 of 125
building tree 39 of 125
building tree 40 of 125
building tree 41 of 125
building tree 42 of 125
building tree 43 of 125
building tree 44 of 125
building tree 45 of 125
building tree 46 of 125
building tree 47 of 125
building tree 48 of 125
building tree 49 of 125
building tree 50 of 125
building tree 51 of 125
building tree 52 of 125
building tree 53 of 125
building tree 54 of 125
building tree 55 of 125building tree 56 of 125

building tree 57 of 125
building tree 58 of 125
building tree 59 of 125
building tree 60 of 125
building tree 61 of 125
building tree 62 of 125
building tree 63 of 125
building tree 64 of 125
building tree 65 of 125
building tree 66

[Parallel(n_jobs=-1)]: Done 125 out of 125 | elapsed:    1.5s finished


In [14]:
extratrees_classifier = extratrees_classifier.fit(train, train_labels)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    0.1s


building tree 1 of 125building tree 2 of 125

building tree 3 of 125
building tree 4 of 125
building tree 5 of 125
building tree 6 of 125building tree 7 of 125

building tree 8 of 125
building tree 9 of 125
building tree 10 of 125
building tree 11 of 125
building tree 12 of 125
building tree 13 of 125
building tree 14 of 125
building tree 15 of 125
building tree 16 of 125
building tree 17 of 125
building tree 18 of 125
building tree 19 of 125
building tree 20 of 125
building tree 21 of 125building tree 22 of 125

building tree 23 of 125
building tree 24 of 125
building tree 25 of 125
building tree 26 of 125
building tree 27 of 125
building tree 28 of 125
building tree 29 of 125
building tree 30 of 125building tree 31 of 125

building tree 32 of 125
building tree 33 of 125
building tree 34 of 125
building tree 35 of 125
building tree 36 of 125
building tree 37 of 125
building tree 38 of 125
building tree 39 of 125
building tree 40 of 125
building tree 41 of 125
building tree 42 of 125
b

[Parallel(n_jobs=-1)]: Done 125 out of 125 | elapsed:    1.5s finished


In [15]:
train_preds = extratrees_classifier.predict(train)
dev_preds = extratrees_classifier.predict(dev)
test_preds = extratrees_classifier.predict(test)

[Parallel(n_jobs=12)]: Using backend ThreadingBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   8 tasks      | elapsed:    0.0s
[Parallel(n_jobs=12)]: Done 125 out of 125 | elapsed:    0.0s finished
[Parallel(n_jobs=12)]: Using backend ThreadingBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   8 tasks      | elapsed:    0.0s
[Parallel(n_jobs=12)]: Done 125 out of 125 | elapsed:    0.0s finished
[Parallel(n_jobs=12)]: Using backend ThreadingBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   8 tasks      | elapsed:    0.0s
[Parallel(n_jobs=12)]: Done 125 out of 125 | elapsed:    0.0s finished


In [16]:
computeAllScores(train_preds, dev_preds, test_preds)

Accuracy Train:  0.9824707846410684
Accuracy Dev:  0.9096025325360535
Accuracy Test:  0.9146170063246662
F1 Train:  0.8353992476773123
F1 Dev:  0.36647734271568594
F1 Test:  0.359737414993496
Precision Train:  0.9933276934556271
Precision Dev:  0.5751087261503928
Precision Test:  0.5415682337191905
Recall Train:  0.7527402868200871
Recall Dev:  0.35964535678031434
Recall Test:  0.35510014140635043
Confusion Matrix Train: 
[[ 1577   385     0]
 [    2 20776     0]
 [    0    12    10]]
Confusion Matrix Dev: 
[[  22  250    0]
 [   5 2564    0]
 [   0    2    0]]
Confusion Matrix Test: 
[[  17  233    0]
 [   7 2586    0]
 [   0    3    0]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### Sentence Transformer Better No PCA

In [17]:
train, dev, test = load_sent_trans_better_no_pca()

In [18]:
grid_results = gridsearch.fit(train, train_labels)
best_params = grid_results.best_params_
extratrees_classifier = grid_results.best_estimator_
print(best_params)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:    1.0s finished
[Parallel(n_jobs=12)]: Using backend ThreadingBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   8 tasks      | elapsed:    0.0s
[Parallel(n_jobs=12)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:    1.1s finished
[Parallel(n_jobs=12)]: Using backend ThreadingBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   8 tasks      | elapsed:    0.0s
[Parallel(n_jobs=12)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    0.1s
[Parallel(n

building tree 1 of 125
building tree 2 of 125
building tree 3 of 125building tree 4 of 125

building tree 5 of 125
building tree 6 of 125
building tree 7 of 125
building tree 8 of 125
building tree 9 of 125
building tree 10 of 125
building tree 11 of 125
building tree 12 of 125
building tree 13 of 125
building tree 14 of 125
building tree 15 of 125
building tree 16 of 125
building tree 17 of 125
building tree 18 of 125
building tree 19 of 125
building tree 20 of 125
building tree 21 of 125
building tree 22 of 125
building tree 23 of 125


[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    0.4s


building tree 24 of 125
building tree 25 of 125
building tree 26 of 125
building tree 27 of 125
building tree 28 of 125
building tree 29 of 125
building tree 30 of 125
building tree 31 of 125
building tree 32 of 125
building tree 33 of 125
building tree 34 of 125
building tree 35 of 125
building tree 36 of 125
building tree 37 of 125
building tree 38 of 125
building tree 39 of 125
building tree 40 of 125
building tree 41 of 125
building tree 42 of 125
building tree 43 of 125
building tree 44 of 125
building tree 45 of 125
building tree 46 of 125
building tree 47 of 125
building tree 48 of 125
building tree 49 of 125
building tree 50 of 125
building tree 51 of 125
building tree 52 of 125
building tree 53 of 125building tree 54 of 125

building tree 55 of 125
building tree 56 of 125
building tree 57 of 125
building tree 58 of 125
building tree 59 of 125
building tree 60 of 125
building tree 61 of 125
building tree 62 of 125
building tree 63 of 125
building tree 64 of 125
building tree 65

[Parallel(n_jobs=-1)]: Done 125 out of 125 | elapsed:    4.3s finished


In [19]:
extratrees_classifier = extratrees_classifier.fit(train, train_labels)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 12 concurrent workers.


building tree 1 of 125
building tree 2 of 125
building tree 3 of 125
building tree 4 of 125
building tree 5 of 125
building tree 6 of 125
building tree 7 of 125
building tree 8 of 125
building tree 9 of 125
building tree 10 of 125
building tree 11 of 125
building tree 12 of 125
building tree 13 of 125
building tree 14 of 125
building tree 15 of 125
building tree 16 of 125
building tree 17 of 125
building tree 18 of 125
building tree 19 of 125
building tree 20 of 125
building tree 21 of 125
building tree 22 of 125
building tree 23 of 125building tree 24 of 125



[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    0.3s


building tree 25 of 125
building tree 26 of 125
building tree 27 of 125
building tree 28 of 125
building tree 29 of 125
building tree 30 of 125
building tree 31 of 125
building tree 32 of 125
building tree 33 of 125
building tree 34 of 125
building tree 35 of 125building tree 36 of 125

building tree 37 of 125
building tree 38 of 125
building tree 39 of 125
building tree 40 of 125
building tree 41 of 125
building tree 42 of 125
building tree 43 of 125
building tree 44 of 125
building tree 45 of 125
building tree 46 of 125
building tree 47 of 125
building tree 48 of 125
building tree 49 of 125
building tree 50 of 125
building tree 51 of 125
building tree 52 of 125
building tree 53 of 125
building tree 54 of 125
building tree 55 of 125
building tree 56 of 125
building tree 57 of 125
building tree 58 of 125
building tree 59 of 125
building tree 60 of 125
building tree 61 of 125
building tree 62 of 125
building tree 63 of 125
building tree 64 of 125
building tree 65 of 125
building tree 66

[Parallel(n_jobs=-1)]: Done 125 out of 125 | elapsed:    3.9s finished


In [20]:
train_preds = extratrees_classifier.predict(train)
dev_preds = extratrees_classifier.predict(dev)
test_preds = extratrees_classifier.predict(test)

[Parallel(n_jobs=12)]: Using backend ThreadingBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   8 tasks      | elapsed:    0.0s
[Parallel(n_jobs=12)]: Done 125 out of 125 | elapsed:    0.1s finished
[Parallel(n_jobs=12)]: Using backend ThreadingBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   8 tasks      | elapsed:    0.0s
[Parallel(n_jobs=12)]: Done 125 out of 125 | elapsed:    0.0s finished
[Parallel(n_jobs=12)]: Using backend ThreadingBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   8 tasks      | elapsed:    0.0s
[Parallel(n_jobs=12)]: Done 125 out of 125 | elapsed:    0.0s finished


In [21]:
computeAllScores(train_preds, dev_preds, test_preds)

Accuracy Train:  0.9973200948950004
Accuracy Dev:  0.9162856137882518
Accuracy Test:  0.9258608573436402
F1 Train:  0.9519529233144347
F1 Dev:  0.41489575641879933
F1 Test:  0.4345289055506811
Precision Train:  0.9988654659253765
Precision Dev:  0.5729308899269374
Precision Test:  0.5863954989854271
Recall Train:  0.9148821750413413
Recall Dev:  0.3905998366649112
Recall Test:  0.4037907186013627
Confusion Matrix Train: 
[[ 1907    55     0]
 [    1 20777     0]
 [    0     5    17]]
Confusion Matrix Dev: 
[[  48  224    0]
 [  12 2557    0]
 [   0    2    0]]
Confusion Matrix Test: 
[[  54  196    0]
 [  11 2581    1]
 [   0    3    0]]


  _warn_prf(average, modifier, msg_start, len(result))


### Sentence Transformer Better PCA

In [22]:
train, dev, test = load_sent_trans_better_pca()

In [23]:
grid_results = gridsearch.fit(train, train_labels)
best_params = grid_results.best_params_
extratrees_classifier = grid_results.best_estimator_
print(best_params)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:    0.6s finished
[Parallel(n_jobs=12)]: Using backend ThreadingBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   8 tasks      | elapsed:    0.0s
[Parallel(n_jobs=12)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done  77 out of 100 | elapsed:    0.5s remaining:    0.1s
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:    0.6s finished
[Parallel(n_jobs=12)]: Using backend ThreadingBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   8 tasks      | elapsed:    0.0s
[Parallel(n_jobs=12)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent w

building tree 1 of 100
building tree 2 of 100
building tree 3 of 100
building tree 4 of 100
building tree 5 of 100
building tree 6 of 100
building tree 7 of 100
building tree 8 of 100
building tree 9 of 100
building tree 10 of 100
building tree 11 of 100
building tree 12 of 100
building tree 13 of 100
building tree 14 of 100
building tree 15 of 100
building tree 16 of 100
building tree 17 of 100
building tree 18 of 100
building tree 19 of 100
building tree 20 of 100
building tree 21 of 100
building tree 22 of 100
building tree 23 of 100
building tree 24 of 100
building tree 25 of 100
building tree 26 of 100
building tree 27 of 100
building tree 28 of 100
building tree 29 of 100
building tree 30 of 100
building tree 31 of 100


[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    0.1s


building tree 32 of 100
building tree 33 of 100
building tree 34 of 100
building tree 35 of 100
building tree 36 of 100
building tree 37 of 100
building tree 38 of 100
building tree 39 of 100
building tree 40 of 100
building tree 41 of 100building tree 42 of 100

building tree 43 of 100
building tree 44 of 100
building tree 45 of 100
building tree 46 of 100
building tree 47 of 100
building tree 48 of 100
building tree 49 of 100
building tree 50 of 100
building tree 51 of 100
building tree 52 of 100
building tree 53 of 100
building tree 54 of 100
building tree 55 of 100
building tree 56 of 100
building tree 57 of 100
building tree 58 of 100
building tree 59 of 100
building tree 60 of 100
building tree 61 of 100
building tree 62 of 100
building tree 63 of 100
building tree 64 of 100
building tree 65 of 100
building tree 66 of 100
building tree 67 of 100
building tree 68 of 100
building tree 69 of 100
building tree 70 of 100
building tree 71 of 100
building tree 72 of 100
building tree 73

[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:    1.3s finished


In [24]:
extratrees_classifier = extratrees_classifier.fit(train, train_labels)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 12 concurrent workers.


building tree 1 of 100
building tree 2 of 100
building tree 3 of 100
building tree 4 of 100
building tree 5 of 100
building tree 6 of 100building tree 7 of 100

building tree 8 of 100
building tree 9 of 100
building tree 10 of 100
building tree 11 of 100
building tree 12 of 100
building tree 13 of 100
building tree 14 of 100
building tree 15 of 100building tree 16 of 100

building tree 17 of 100
building tree 18 of 100
building tree 19 of 100
building tree 20 of 100building tree 21 of 100

building tree 22 of 100
building tree 23 of 100building tree 24 of 100

building tree 25 of 100
building tree 26 of 100
building tree 27 of 100
building tree 28 of 100building tree 29 of 100

building tree 30 of 100
building tree 31 of 100
building tree 32 of 100
building tree 33 of 100
building tree 34 of 100building tree 35 of 100

building tree 36 of 100


[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    0.1s


building tree 37 of 100
building tree 38 of 100
building tree 39 of 100
building tree 40 of 100
building tree 41 of 100
building tree 42 of 100
building tree 43 of 100building tree 44 of 100building tree 45 of 100building tree 46 of 100



building tree 47 of 100
building tree 48 of 100
building tree 49 of 100
building tree 50 of 100
building tree 51 of 100
building tree 52 of 100
building tree 53 of 100
building tree 54 of 100
building tree 55 of 100
building tree 56 of 100
building tree 57 of 100
building tree 58 of 100
building tree 59 of 100
building tree 60 of 100
building tree 61 of 100
building tree 62 of 100
building tree 63 of 100
building tree 64 of 100
building tree 65 of 100
building tree 66 of 100
building tree 67 of 100
building tree 68 of 100
building tree 69 of 100
building tree 70 of 100
building tree 71 of 100
building tree 72 of 100
building tree 73 of 100
building tree 74 of 100
building tree 75 of 100
building tree 76 of 100
building tree 77 of 100
building tree 78

[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:    1.3s finished


In [25]:
train_preds = extratrees_classifier.predict(train)
dev_preds = extratrees_classifier.predict(dev)
test_preds = extratrees_classifier.predict(test)

[Parallel(n_jobs=12)]: Using backend ThreadingBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   8 tasks      | elapsed:    0.0s
[Parallel(n_jobs=12)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=12)]: Using backend ThreadingBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   8 tasks      | elapsed:    0.0s
[Parallel(n_jobs=12)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=12)]: Using backend ThreadingBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   8 tasks      | elapsed:    0.0s
[Parallel(n_jobs=12)]: Done 100 out of 100 | elapsed:    0.0s finished


In [26]:
computeAllScores(train_preds, dev_preds, test_preds)

Accuracy Train:  0.9817239258413145
Accuracy Dev:  0.910306014773127
Accuracy Test:  0.9170765987350668
F1 Train:  0.8334602018937569
F1 Dev:  0.3688307978001572
F1 Test:  0.37572648402788017
Precision Train:  0.9932587043006068
Precision Dev:  0.5875727763748597
Precision Test:  0.5642812123990145
Recall Train:  0.7496982254537219
Recall Dev:  0.36100059914974164
Recall Test:  0.3644334747396838
Confusion Matrix Train: 
[[ 1559   403     0]
 [    1 20777     0]
 [    0    12    10]]
Confusion Matrix Dev: 
[[  23  249    0]
 [   4 2565    0]
 [   0    2    0]]
Confusion Matrix Test: 
[[  24  226    0]
 [   7 2586    0]
 [   0    3    0]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
