In [1]:
from embeddings_loader import *
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import roc_auc_score
from utils import *


--ip=127.0.0.1


In [4]:
train_labels, dev_labels = load_labels()

In [5]:
label_replacement = {
    'NOT': 0,
    'OFF': 1,
}

In [6]:
# Replace labels with numbers
train_labels = [label_replacement[label] for label in train_labels]
dev_labels = [label_replacement[label] for label in dev_labels]

### Glove Twitter 25

In [7]:
gt25_train, gt25_dev, gt25_test = load_glove_twitter_25()

In [8]:
# Set all NaN values to 0
gt25_train = np.nan_to_num(gt25_train)
gt25_dev = np.nan_to_num(gt25_dev)
gt25_test = np.nan_to_num(gt25_test)

In [9]:
linear_discriminant_analysis = LinearDiscriminantAnalysis().fit(gt25_train, train_labels)
save_model(linear_discriminant_analysis, "linear_discriminant_analysis_gt25.joblib")

In [10]:
train_preds = linear_discriminant_analysis.predict(gt25_train)
dev_preds = linear_discriminant_analysis.predict(gt25_dev)
test_preds = linear_discriminant_analysis.predict(gt25_test)

In [11]:
computeAllScores(train_preds, dev_preds)

Accuracy Train:  0.2762462235649547
Accuracy Dev:  0.2824773413897281
Weighted F1 Train:  0.22440217492293035
Weighted F1 Dev:  0.2282581096998092
Macro F1 Train:  0.26086414466689384
Macro F1 Dev:  0.2641410640059206
Micro F1 Train:  0.2762462235649547
Micro F1 Dev:  0.2824773413897281
Weighted Recall Train:  0.2762462235649547
Weighted Recall Dev:  0.2824773413897281
Macro Recall Train:  0.3686890629986925
Macro Recall Dev:  0.36618454368589165
Micro Recall Train:  0.2762462235649547
Micro Recall Dev:  0.2824773413897281
Confusion Matrix Train: 
[[2227 1258]
 [6408  699]]
Confusion Matrix Dev: 
[[ 583  332]
 [1568  165]]


### FastText 300 

In [12]:
ft300_train, ft300_dev, ft300_test = load_fasttext_300()

In [13]:
# Set all NaN values to 0
ft300_train = np.nan_to_num(ft300_train)
ft300_dev = np.nan_to_num(ft300_dev)
ft300_test = np.nan_to_num(ft300_test)

In [14]:
linear_discriminant_analysis = LinearDiscriminantAnalysis().fit(ft300_train, train_labels)
save_model(linear_discriminant_analysis, "linear_discriminant_analysis_ft300.joblib")

In [15]:
train_preds = linear_discriminant_analysis.predict(ft300_train)
dev_preds = linear_discriminant_analysis.predict(ft300_dev)
test_preds = linear_discriminant_analysis.predict(ft300_test)

In [16]:
computeAllScores(train_preds, dev_preds)

Accuracy Train:  0.22611404833836857
Accuracy Dev:  0.24886706948640483
Weighted F1 Train:  0.18285339929591965
Weighted F1 Dev:  0.20171623664028454
Macro F1 Train:  0.2149160288911212
Macro F1 Dev:  0.23431926391680724
Micro F1 Train:  0.22611404833836857
Micro F1 Dev:  0.24886706948640483
Weighted Recall Train:  0.22611404833836857
Weighted Recall Dev:  0.24886706948640483
Macro Recall Train:  0.3023033447129843
Macro Recall Dev:  0.3221934230731635
Micro Recall Train:  0.22611404833836857
Micro Recall Dev:  0.24886706948640483
Confusion Matrix Train: 
[[1830 1655]
 [6542  565]]
Confusion Matrix Dev: 
[[ 512  403]
 [1586  147]]


### Word2Vec 300

In [17]:
w2v300_train, w2v300_dev, w2v300_test = load_word2vec_300()

In [18]:
# Set all NaN values to 0
w2v300_train = np.nan_to_num(w2v300_train)
w2v300_dev = np.nan_to_num(w2v300_dev)
w2v300_test = np.nan_to_num(w2v300_test)

In [19]:
linear_discriminant_analysis = LinearDiscriminantAnalysis().fit(w2v300_train, train_labels)
save_model(linear_discriminant_analysis, "linear_discriminant_analysis_w2v300.joblib")

In [20]:
train_preds = linear_discriminant_analysis.predict(w2v300_train)
dev_preds = linear_discriminant_analysis.predict(w2v300_dev)
test_preds = linear_discriminant_analysis.predict(w2v300_test)

In [21]:
computeAllScores(train_preds, dev_preds)

Accuracy Train:  0.225547583081571
Accuracy Dev:  0.24395770392749244
Weighted F1 Train:  0.18469706666259514
Weighted F1 Dev:  0.20345609483511498
Macro F1 Train:  0.21532438408539178
Macro F1 Dev:  0.2324668796315815
Micro F1 Train:  0.225547583081571
Micro F1 Dev:  0.24395770392749244
Weighted Recall Train:  0.225547583081571
Weighted Recall Dev:  0.24395770392749244
Macro Recall Train:  0.29961454132456555
Macro Recall Dev:  0.3114785630275684
Micro Recall Train:  0.225547583081571
Micro Recall Dev:  0.24395770392749244
Confusion Matrix Train: 
[[1799 1686]
 [6517  590]]
Confusion Matrix Dev: 
[[ 485  430]
 [1572  161]]


### Sentence Transformer 

In [22]:
train, dev, test = load_sent_trans()

In [23]:
linear_discriminant_analysis = LinearDiscriminantAnalysis().fit(train, train_labels)
save_model(linear_discriminant_analysis, "linear_discriminant_analysis_sent_trans.joblib")

In [24]:
train_preds = linear_discriminant_analysis.predict(train)
dev_preds = linear_discriminant_analysis.predict(dev)
test_preds = linear_discriminant_analysis.predict(test)

In [26]:
computeAllScores(train_preds, dev_preds)

Accuracy Train:  0.198357250755287
Accuracy Dev:  0.2311178247734139
Weighted F1 Train:  0.17697099650364573
Weighted F1 Dev:  0.21236685707248024
Macro F1 Train:  0.19493088820303006
Macro F1 Dev:  0.2278600808900194
Micro F1 Train:  0.198357250755287
Micro F1 Dev:  0.2311178247734139
Weighted Recall Train:  0.198357250755287
Weighted Recall Dev:  0.2311178247734139
Macro Recall Train:  0.24988593096022088
Macro Recall Dev:  0.2776813952241762
Micro Recall Train:  0.198357250755287
Micro Recall Dev:  0.2311178247734139
Confusion Matrix Train: 
[[1396 2089]
 [6402  705]]
Confusion Matrix Dev: 
[[ 392  523]
 [1513  220]]
