In [1]:
from embeddings_loader import *
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import roc_auc_score
from utils import *


--ip=127.0.0.1


In [2]:
train_labels, dev_labels, test_labels = load_labels()

In [3]:
label_replacement = {
    'NOT': 0,
    'OFF': 1,
}

In [4]:
# Replace labels with numbers
train_labels = [label_replacement[label] for label in train_labels]
dev_labels = [label_replacement[label] for label in dev_labels]
test_labels = [label_replacement[label] for label in test_labels]

### Glove Twitter 25

In [5]:
gt25_train, gt25_dev, gt25_test = load_glove_twitter_25()

In [6]:
# Set all NaN values to 0
gt25_train = np.nan_to_num(gt25_train)
gt25_dev = np.nan_to_num(gt25_dev)
gt25_test = np.nan_to_num(gt25_test)

In [7]:
linear_discriminant_analysis = LinearDiscriminantAnalysis().fit(gt25_train, train_labels)
save_model(linear_discriminant_analysis, "linear_discriminant_analysis_gt25.joblib")

In [8]:
train_preds = linear_discriminant_analysis.predict(gt25_train)
dev_preds = linear_discriminant_analysis.predict(gt25_dev)
test_preds = linear_discriminant_analysis.predict(gt25_test)

In [9]:
computeAllScores(train_preds, dev_preds, test_preds)

Accuracy Train:  0.7237537764350453
Accuracy Dev:  0.7175226586102719
Accuracy Test:  0.772093023255814
Weighted F1 Train:  0.6983786340717069
Weighted F1 Dev:  0.6909114590579719
Weighted F1 Test:  0.7535072027116091
Macro F1 Train:  0.6382289183387152
Macro F1 Dev:  0.6388349967761981
Macro F1 Test:  0.6750337381916329
Micro F1 Train:  0.7237537764350453
Micro F1 Dev:  0.7175226586102719
Micro F1 Test:  0.772093023255814
Weighted Recall Train:  0.7237537764350453
Weighted Recall Dev:  0.7175226586102719
Weighted Recall Test:  0.772093023255814
Macro Recall Train:  0.6313109370013075
Macro Recall Dev:  0.6338154563141083
Macro Recall Test:  0.6593413978494624
Micro Recall Train:  0.7237537764350453
Micro Recall Dev:  0.7175226586102719
Micro Recall Test:  0.772093023255814
Confusion Matrix Train: 
[[6408  699]
 [2227 1258]]
Confusion Matrix Dev: 
[[1568  165]
 [ 583  332]]
Confusion Matrix Test: 
[[567  53]
 [143  97]]


### FastText 300 

In [10]:
ft300_train, ft300_dev, ft300_test = load_fasttext_300()

In [11]:
# Set all NaN values to 0
ft300_train = np.nan_to_num(ft300_train)
ft300_dev = np.nan_to_num(ft300_dev)
ft300_test = np.nan_to_num(ft300_test)

In [12]:
linear_discriminant_analysis = LinearDiscriminantAnalysis().fit(ft300_train, train_labels)
save_model(linear_discriminant_analysis, "linear_discriminant_analysis_ft300.joblib")

In [13]:
train_preds = linear_discriminant_analysis.predict(ft300_train)
dev_preds = linear_discriminant_analysis.predict(ft300_dev)
test_preds = linear_discriminant_analysis.predict(ft300_test)

In [14]:
computeAllScores(train_preds, dev_preds, test_preds)

Accuracy Train:  0.7738859516616314
Accuracy Dev:  0.7511329305135952
Accuracy Test:  0.7953488372093023
Weighted F1 Train:  0.7580566461509345
Weighted F1 Dev:  0.7319861717174473
Weighted F1 Test:  0.7796503125827395
Macro F1 Train:  0.7127335279218873
Macro F1 Dev:  0.6890764492647103
Macro F1 Test:  0.7102847386514153
Micro F1 Train:  0.7738859516616314
Micro F1 Dev:  0.7511329305135952
Micro F1 Test:  0.7953488372093023
Weighted Recall Train:  0.7738859516616314
Weighted Recall Dev:  0.7511329305135952
Weighted Recall Test:  0.7953488372093023
Macro Recall Train:  0.6976966552870157
Macro Recall Dev:  0.6778065769268364
Macro Recall Test:  0.6907930107526882
Micro Recall Train:  0.7738859516616314
Micro Recall Dev:  0.7511329305135952
Micro Recall Test:  0.7953488372093023
Confusion Matrix Train: 
[[6542  565]
 [1830 1655]]
Confusion Matrix Dev: 
[[1586  147]
 [ 512  403]]
Confusion Matrix Test: 
[[575  45]
 [131 109]]


### Word2Vec 300

In [15]:
w2v300_train, w2v300_dev, w2v300_test = load_word2vec_300()

In [16]:
# Set all NaN values to 0
w2v300_train = np.nan_to_num(w2v300_train)
w2v300_dev = np.nan_to_num(w2v300_dev)
w2v300_test = np.nan_to_num(w2v300_test)

In [17]:
linear_discriminant_analysis = LinearDiscriminantAnalysis().fit(w2v300_train, train_labels)
save_model(linear_discriminant_analysis, "linear_discriminant_analysis_w2v300.joblib")

In [18]:
train_preds = linear_discriminant_analysis.predict(w2v300_train)
dev_preds = linear_discriminant_analysis.predict(w2v300_dev)
test_preds = linear_discriminant_analysis.predict(w2v300_test)

In [19]:
computeAllScores(train_preds, dev_preds, test_preds)

Accuracy Train:  0.774452416918429
Accuracy Dev:  0.7560422960725075
Accuracy Test:  0.8174418604651162
Weighted F1 Train:  0.7596260486862788
Weighted F1 Dev:  0.7402275774422259
Weighted F1 Test:  0.8040848294745883
Macro F1 Train:  0.7152082606491839
Macro F1 Dev:  0.7003002939867619
Macro F1 Test:  0.7429321574697609
Micro F1 Train:  0.774452416918429
Micro F1 Dev:  0.7560422960725075
Micro F1 Test:  0.8174418604651161
Weighted Recall Train:  0.774452416918429
Weighted Recall Dev:  0.7560422960725075
Weighted Recall Test:  0.8174418604651162
Macro Recall Train:  0.7003854586754344
Macro Recall Dev:  0.6885214369724316
Macro Recall Test:  0.7201612903225807
Micro Recall Train:  0.774452416918429
Micro Recall Dev:  0.7560422960725075
Micro Recall Test:  0.8174418604651162
Confusion Matrix Train: 
[[6517  590]
 [1799 1686]]
Confusion Matrix Dev: 
[[1572  161]
 [ 485  430]]
Confusion Matrix Test: 
[[583  37]
 [120 120]]


### Sentence Transformer 

In [20]:
train, dev, test = load_sent_trans()

In [21]:
linear_discriminant_analysis = LinearDiscriminantAnalysis().fit(train, train_labels)
save_model(linear_discriminant_analysis, "linear_discriminant_analysis_sent_trans.joblib")

In [22]:
train_preds = linear_discriminant_analysis.predict(train)
dev_preds = linear_discriminant_analysis.predict(dev)
test_preds = linear_discriminant_analysis.predict(test)

In [23]:
dev.shape

(2648, 768)

In [24]:
computeAllScores(train_preds, dev_preds, test_preds)

Accuracy Train:  0.801642749244713
Accuracy Dev:  0.7688821752265861
Accuracy Test:  0.7941860465116279
Weighted F1 Train:  0.7953261529328188
Weighted F1 Dev:  0.7623576357683383
Weighted F1 Test:  0.7856312008037818
Macro F1 Train:  0.7622165844209803
Macro F1 Dev:  0.731328139972718
Macro F1 Test:  0.7242945712003565
Micro F1 Train:  0.801642749244713
Micro F1 Dev:  0.7688821752265861
Micro F1 Test:  0.7941860465116279
Weighted Recall Train:  0.801642749244713
Weighted Recall Dev:  0.7688821752265861
Weighted Recall Test:  0.7941860465116279
Macro Recall Train:  0.7501140690397791
Macro Recall Dev:  0.7223186047758239
Macro Recall Test:  0.7104166666666667
Micro Recall Train:  0.801642749244713
Micro Recall Dev:  0.7688821752265861
Micro Recall Test:  0.7941860465116279
Confusion Matrix Train: 
[[6402  705]
 [1396 2089]]
Confusion Matrix Dev: 
[[1513  220]
 [ 392  523]]
Confusion Matrix Test: 
[[558  62]
 [115 125]]
