In [None]:
# imports
from embeddings_loader import *
from sklearn.linear_model import Perceptron
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import roc_auc_score
import numpy as np
from utils import *

In [None]:
train_labels, dev_labels = load_labels()

In [None]:
label_replacement = {
    'OFF': 0,
    'NOT': 1,
}

In [None]:
# Replace labels with numbers
train_labels = [label_replacement[label] for label in train_labels]
dev_labels = [label_replacement[label] for label in dev_labels]

In [None]:
perceptron = Perceptron(max_iter=1000)
gridsearch = GridSearchCV(perceptron, param_grid = {
	'eta0': [1e-4, 1e-3, 1e-2, 1e-1],
    'penalty': ['l1', 'l2'],
	'alpha': [0.0001, 0.05],
    'early_stopping': [True, False]
}, scoring = "f1_macro")

### Glove Twitter 25

In [None]:
gt25_train, gt25_dev, gt25_test = load_glove_twitter_25()

In [None]:
# Set all NaN values to 0
gt25_train = np.nan_to_num(gt25_train)
gt25_dev = np.nan_to_num(gt25_dev)
gt25_test = np.nan_to_num(gt25_test)

In [None]:
# grid_results = gridsearch.fit(gt25_train, train_labels)
# best_params = grid_results.best_params_
# perceptron = grid_results.best_estimator_

In [None]:
# best_params

In [None]:
perceptron = load_model("perceptron_gt25.joblib")

In [None]:
# perceptron = perceptron.fit(gt25_train, train_labels)
# save_model(perceptron, "perceptron_gt25.joblib")

In [None]:
train_preds = perceptron.predict(gt25_train)
dev_preds = perceptron.predict(gt25_dev)
test_preds = perceptron.predict(gt25_test)

In [None]:
computeAllScores(train_preds, dev_preds)

### FastText 300 

In [None]:
ft300_train, ft300_dev, ft300_test = load_fasttext_300()

In [None]:
# Set all NaN values to 0
ft300_train = np.nan_to_num(ft300_train)
ft300_dev = np.nan_to_num(ft300_dev)
ft300_test = np.nan_to_num(ft300_test)

In [None]:
# grid_results = gridsearch.fit(ft300_train, train_labels)
# best_params = grid_results.best_params_
# perceptron = grid_results.best_estimator_

In [None]:
# best_params

In [None]:
perceptron = load_model("perceptron_ft300.joblib")

In [None]:
# perceptron = perceptron.fit(ft300_train, train_labels)
# save_model(perceptron, "perceptron_ft300.joblib")

In [None]:
train_preds = perceptron.predict(ft300_train)
dev_preds = perceptron.predict(ft300_dev)
test_preds = perceptron.predict(ft300_test)

In [None]:
computeAllScores(train_preds, dev_preds)

### Word2Vec 300

In [None]:
w2v300_train, w2v300_dev, w2v300_test = load_word2vec_300()

In [None]:
# Set all NaN values to 0
w2v300_train = np.nan_to_num(w2v300_train)
w2v300_dev = np.nan_to_num(w2v300_dev)
w2v300_test = np.nan_to_num(w2v300_test)

In [None]:
# grid_results = gridsearch.fit(w2v300_train, train_labels)
# best_params = grid_results.best_params_
# perceptron = grid_results.best_estimator_

In [None]:
# best_params

In [None]:
perceptron = load_model("perceptron_w2v300.joblib")

In [None]:
# perceptron = perceptron.fit(w2v300_train, train_labels)
# save_model(perceptron, "perceptron_w2v300.joblib")

In [None]:
train_preds = perceptron.predict(w2v300_train)
dev_preds = perceptron.predict(w2v300_dev)
test_preds = perceptron.predict(w2v300_test)

In [None]:
computeAllScores(train_preds, dev_preds)

### Sentence Transformer

In [None]:
train, dev, test = load_sent_trans()

In [None]:
# grid_results = gridsearch.fit(train, train_labels)
# best_params = grid_results.best_params_
# perceptron = grid_results.best_estimator_

In [None]:
# best_params

In [None]:
perceptron = load_model("perceptron_better_no_pca.joblib")

In [None]:
# perceptron = perceptron.fit(train, train_labels)
# save_model(perceptron, "perceptron_better_no_pca.joblib")

In [None]:
train_preds = perceptron.predict(train)
dev_preds = perceptron.predict(dev)
test_preds = perceptron.predict(test)

In [None]:
computeAllScores(train_preds, dev_preds)

In [None]:
df = pd.DataFrame({"id": range(test_preds.shape[0]), "label": test_preds})

In [None]:
df.head()

In [None]:
df.to_csv("../Results/Perceptron_SBERT.csv", index = False)