In [None]:
# imports
from embeddings_loader import *
from sklearn import MLPClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import roc_auc_score
import numpy as np

In [None]:
train_labels, dev_labels = load_labels()

In [None]:
label_replacement = {
    'NOT': 0,
    'OFF': 1,
}

In [None]:
# Replace labels with numbers
train_labels = [label_replacement[label] for label in train_labels]
dev_labels = [label_replacement[label] for label in dev_labels]

In [None]:
mlp_classifier = MLPClassifier(max_iter=100)
gridsearch = GridSearchCV(mlp_classifier, param_grid = {
	'hidden_layer_sizes' = [(100,50,),(50,20,)],
	'activation' = ['tanh', 'relu'],
	'solver' = ['sgd', 'adam'],
	'alpha' = [0.0001, 0.05],
	'learning_rate' = ['constant', 'adaptive']
}, scoring = "f1_macro")

### Glove Twitter 25

In [None]:
gt25_train, gt25_dev, gt25_test = load_glove_twitter_25()

In [None]:
# Set all NaN values to 0
gt25_train = np.nan_to_num(gt25_train)
gt25_dev = np.nan_to_num(gt25_dev)
gt25_test = np.nan_to_num(gt25_test)

In [None]:
grid_results = gridsearch.fit(gt25_train, train_labels)
best_params = grid_results.best_params_
mlp_classifier = grid_results.best_estimator_

In [None]:
best_params

In [None]:
mlp_classifier = mlp_classifier.fit(gt25_train, train_labels)
save_model(mlp_classifier, "mlp_classifier_gt25.joblib")

In [None]:
train_preds = mlp_classifier.predict(gt25_train)
dev_preds = mlp_classifier.predict(gt25_dev)
test_preds = mlp_classifier.predict(gt25_test)

In [None]:
computeAllScores(train_preds, dev_preds)

### FastText 300 

In [None]:
ft300_train, ft300_dev, ft300_test = load_fasttext_300()

In [None]:
# Set all NaN values to 0
ft300_train = np.nan_to_num(ft300_train)
ft300_dev = np.nan_to_num(ft300_dev)
ft300_test = np.nan_to_num(ft300_test)

In [None]:
grid_results = gridsearch.fit(ft300_train, train_labels)
best_params = grid_results.best_params_
mlp_classifier = grid_results.best_estimator_

In [None]:
best_params

In [None]:
mlp_classifier = mlp_classifier.fit(ft300_train, train_labels)
save_model(mlp_classifier, "mlp_classifier_ft300.joblib")

In [None]:
train_preds = mlp_classifier.predict(ft300_train)
dev_preds = mlp_classifier.predict(ft300_dev)
test_preds = mlp_classifier.predict(ft300_test)

In [None]:
computeAllScores(train_preds, dev_preds)

### Word2Vec 300

In [None]:
w2v300_train, w2v300_dev, w2v300_test = load_word2vec_300()

In [None]:
# Set all NaN values to 0
w2v300_train = np.nan_to_num(w2v300_train)
w2v300_dev = np.nan_to_num(w2v300_dev)
w2v300_test = np.nan_to_num(w2v300_test)

In [None]:
grid_results = gridsearch.fit(w2v300_train, train_labels)
best_params = grid_results.best_params_
mlp_classifier = grid_results.best_estimator_

In [None]:
best_params

In [None]:
mlp_classifier = mlp_classifier.fit(w2v300_train, train_labels)
save_model(mlp_classifier, "mlp_classifier_w2v300.joblib")

In [None]:
train_preds = mlp_classifier.predict(w2v300_train)
dev_preds = mlp_classifier.predict(w2v300_dev)
test_preds = mlp_classifier.predict(w2v300_test)

In [None]:
computeAllScores(train_preds, dev_preds)

### Sentence Transformer

In [None]:
train, dev, test = load_sent_trans()

In [None]:
grid_results = gridsearch.fit(train, train_labels)
best_params = grid_results.best_params_
mlp_classifier = grid_results.best_estimator_

In [None]:
best_params

In [None]:
mlp_classifier = mlp_classifier.fit(train, train_labels)
save_model(mlp_classifier, "mlp_classifier_better_no_pca.joblib")

In [None]:
train_preds = mlp_classifier.predict(train)
dev_preds = mlp_classifier.predict(dev)
test_preds = mlp_classifier.predict(test)

In [None]:
computeAllScores(train_preds, dev_preds)