In [11]:
# import relevant packages
import numpy as np
from collections import defaultdict
import numpy as np
import pandas as pd
from scipy.spatial.distance import cosine
from datasets import load_dataset
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.svm import SVC
import numpy as np
from tensorflow.keras.layers import Dense
import tensorflow as tf
from tensorflow.keras.models import Sequential
from sklearn.feature_extraction.text import TfidfVectorizer
import tensorflow_addons as tfa

In [12]:
dataset = load_dataset("paws", "labeled_final")

In [13]:
train_data = dataset['train']
valid_data = dataset['validation']
test_data = dataset['test']

In [14]:
def load_glove_embeddings(path):
    with open(path, 'r', encoding='utf-8') as f:
        embeddings = {}
        for line in f:
            values = line.split()
            word = values[0]
            vector = np.asarray(values[1:], dtype='float32')
            embeddings[word] = vector
    return embeddings

In [15]:
path = 'glove.6B/glove.6B.300d.txt'
glove_embeddings = load_glove_embeddings(path=path)

In [16]:
def get_cosine_similarity(vec1, vec2):
    return 1 - cosine(vec1, vec2)

In [17]:
def get_sentence_embedding(sentence, embeddings_dict):
    embeddings = [embeddings_dict[word.lower()] for word in sentence.split() if word.lower() in embeddings_dict]
    if embeddings:
        return np.mean(embeddings, axis=0)
    else:
        return np.zeros(next(iter(embeddings_dict.values())).shape)

In [18]:
def get_embeddings_and_labels(data, glove_embeddings):
    embeddings = []
    labels = []
    for example in data:
        sentence1_embedding = get_sentence_embedding(example['sentence1'], glove_embeddings)
        sentence2_embedding = get_sentence_embedding(example['sentence2'], glove_embeddings)
        embedding = np.concatenate((sentence1_embedding, sentence2_embedding))
        embeddings.append(embedding)
        labels.append(example['label'])
    return np.array(embeddings), np.array(labels)

In [19]:
word_freq = defaultdict(int)
for sentences in (train_data['sentence1'] + train_data['sentence2'] + \
                  test_data['sentence1'] + test_data['sentence2'] + \
                    valid_data['sentence1'] + valid_data['sentence2']):
    for word in sentences.split():
        word_freq[word.lower()] += 1
total_words = sum(word_freq.values())
word_prob = {word: freq / total_words for word, freq in word_freq.items()}

In [20]:
def get_sentence_embedding_sif(sentence, embeddings_dict, word_prob, a=1e-3):
    embeddings = []
    weights = []
    for word in sentence.split():
        if word.lower() in embeddings_dict and word.lower() in word_prob:
            embeddings.append(embeddings_dict[word.lower()])
            weight = a / (a + word_prob[word.lower()])
            weights.append(weight)
    if embeddings:
        weighted_embeddings = np.average(embeddings, axis=0, weights=weights)
        return weighted_embeddings
    else:
        return np.zeros(next(iter(embeddings_dict.values())).shape)

In [29]:
def get_embeddings_and_labels_sif(data, glove_embeddings):
    embeddings = []
    labels = []
    for example in data:
        sentence1_embedding = get_sentence_embedding_sif(example['sentence1'], glove_embeddings, word_prob=word_prob)
        sentence2_embedding = get_sentence_embedding_sif(example['sentence2'], glove_embeddings, word_prob=word_prob)
        embedding = np.concatenate((sentence1_embedding, sentence2_embedding))
        embeddings.append(embedding)
        labels.append(example['label'])
    return np.array(embeddings), np.array(labels)

In [21]:
all_sentences = train_data['sentence1'] + train_data['sentence2'] + \
                valid_data['sentence1'] + valid_data['sentence2'] + \
                test_data['sentence1'] + test_data['sentence2']
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(all_sentences)
feature_names = vectorizer.get_feature_names_out()
tfidf_scores = dict(zip(feature_names, vectorizer.idf_))

In [22]:
def get_sentence_embedding_tfidf(sentence, embeddings_dict, tfidf_scores):
    embeddings = []
    weights = []
    for word in sentence.split():
        if word.lower() in embeddings_dict and word.lower() in tfidf_scores:
            embeddings.append(embeddings_dict[word.lower()])
            weight = tfidf_scores[word.lower()]
            weights.append(weight)
    if embeddings:
        if not weights:
            weights = [1] * len(embeddings)
        weighted_embeddings = np.average(embeddings, axis=0, weights=weights)
        return weighted_embeddings
    else:
        return np.zeros(next(iter(embeddings_dict.values())).shape)

In [23]:
def get_embeddings_and_labels_tfidf(data, embeddings_dict, a=1e-3):
    embeddings = []
    labels = []

    for sentence1, sentence2, label in zip(data['sentence1'], data['sentence2'], data['label']):
        sentence1_embedding = get_sentence_embedding_tfidf(sentence1, embeddings_dict, tfidf_scores)
        sentence2_embedding = get_sentence_embedding_tfidf(sentence2, embeddings_dict, tfidf_scores)

        embedding = np.concatenate((sentence1_embedding, sentence2_embedding))
        embeddings.append(embedding)
        labels.append(label)

    return np.array(embeddings), np.array(labels)

In [24]:
def get_eval_metrics(actual, predictions):
    accuracy = accuracy_score(actual, predictions)
    precision = precision_score(actual, predictions)
    recall = recall_score(actual, predictions)
    f1 = f1_score(actual, predictions)

    return accuracy, precision, recall, f1

Using a simple cosine similarity function and figuring out the threshold by optimising for the best threshold

In [44]:
len(train_data['label'])

49401

In [45]:
count = 0

for label in train_data['label']:
    if label == 0:
        count = count + 1

In [46]:
count

27572

In [41]:
best_threshold = None
best_metric_f1 = -1
best_metric_accuracy = -1
best_metric_recall = -1
best_metric_precision = -1

thresholds = [i * 0.05 for i in range(1, 39)]
for threshold in thresholds:
    predictions = []
    for p1, p2 in zip(train_data['sentence1'], train_data['sentence2']):
        emb1 = get_sentence_embedding_tfidf(p1, glove_embeddings, word_prob)
        emb2 = get_sentence_embedding_tfidf(p2, glove_embeddings, word_prob)
        similarity = get_cosine_similarity(emb1, emb2)
        pred = 1 if similarity > threshold else 0
        predictions.append(pred)
    accuracy, precision, recall, f1 = get_eval_metrics(train_data['label'], predictions)
    if accuracy > best_metric_accuracy:
        best_metric_f1 = f1
        best_metric_accuracy = accuracy
        best_metric_precision = precision
        best_metric_recall = recall
        best_threshold = threshold
print(f"The best threshold is {best_threshold}")

print(f"Training Accuracy: {best_metric_accuracy:.4f}")
print(f"Training Precision: {best_metric_precision:.4f}")
print(f"Training Recall: {best_metric_recall:.4f}")
print(f"Training F1-score: {best_metric_f1:.4f}")

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


The best threshold is 1.0
Training Accuracy: 0.5581
Training Precision: 0.0000
Training Recall: 0.0000
Training F1-score: 0.0000


  _warn_prf(average, modifier, msg_start, len(result))


The best threshold with training set is coming out to be 0.05. Lets use this to see what sort of a score we would get with the validation set.

In [42]:
predictions = []
threshold = 0.65

for p1, p2 in zip(test_data['sentence1'], test_data['sentence2']):
    emb1 = get_sentence_embedding_tfidf(p1, glove_embeddings, word_prob)
    emb2 = get_sentence_embedding_tfidf(p2, glove_embeddings, word_prob)
    similarity = get_cosine_similarity(emb1, emb2)
    pred = 1 if similarity > threshold else 0
    predictions.append(pred)

actual = test_data['label']
accuracy, precision, recall, f1 = get_eval_metrics(actual, predictions)
print(f"Test Accuracy: {accuracy:.4f}")
print(f"Test Precision: {precision:.4f}")
print(f"Test Recall: {recall:.4f}")
print(f"Test F1-score: {f1:.4f}")

Test Accuracy: 0.4420
Test Precision: 0.4420
Test Recall: 1.0000
Test F1-score: 0.6130


In [28]:
train_embeddings, train_labels = get_embeddings_and_labels(train_data, glove_embeddings)
model = LogisticRegression(
    C=10,
    penalty='l1',
    solver='liblinear',
    max_iter=500,
    tol=1e-5,
    class_weight='balanced',
    random_state=42
)
model.fit(train_embeddings, train_labels)

train_predictions = model.predict(train_embeddings)
accuracy, precision, recall, f1 = get_eval_metrics(train_labels, train_predictions)
print(f"Training Accuracy: {accuracy:.4f}")
print(f"Training Precision: {precision:.4f}")
print(f"Training Recall: {recall:.4f}")
print(f"Training F1-score: {f1:.4f}")

valid_embeddings, valid_labels = get_embeddings_and_labels(valid_data, glove_embeddings)
valid_predictions = model.predict(valid_embeddings)
accuracy, precision, recall, f1 = get_eval_metrics(valid_labels, valid_predictions)
print(f"Validation Accuracy: {accuracy:.4f}")
print(f"Validation Precision: {precision:.4f}")
print(f"Validation Recall: {recall:.4f}")
print(f"Validation F1-score: {f1:.4f}")

test_embeddings, test_labels = get_embeddings_and_labels(test_data, glove_embeddings)
test_predictions = model.predict(test_embeddings)
accuracy, precision, recall, f1 = get_eval_metrics(test_labels, test_predictions)
print(f"Test Accuracy: {accuracy:.4f}")
print(f"Test Precision: {precision:.4f}")
print(f"Test Recall: {recall:.4f}")
print(f"Test F1-score: {f1:.4f}")

Training Accuracy: 0.5600
Training Precision: 0.5020
Training Recall: 0.5307
Training F1-score: 0.5159
Validation Accuracy: 0.5292
Validation Precision: 0.4700
Validation Recall: 0.5030
Validation F1-score: 0.4859
Test Accuracy: 0.5235
Test Precision: 0.4628
Test Recall: 0.4861
Test F1-score: 0.4742


In [30]:
train_embeddings, train_labels = get_embeddings_and_labels_sif(train_data, glove_embeddings)
model = LogisticRegression(
    C=10,
    penalty='l1',
    solver='liblinear',
    max_iter=500,
    tol=1e-5,
    class_weight='balanced',
    random_state=42
)
model.fit(train_embeddings, train_labels)

train_predictions = model.predict(train_embeddings)
accuracy, precision, recall, f1 = get_eval_metrics(train_labels, train_predictions)
print(f"Training Accuracy: {accuracy:.4f}")
print(f"Training Precision: {precision:.4f}")
print(f"Training Recall: {recall:.4f}")
print(f"Training F1-score: {f1:.4f}")

valid_embeddings, valid_labels = get_embeddings_and_labels_sif(valid_data, glove_embeddings)
valid_predictions = model.predict(valid_embeddings)
accuracy, precision, recall, f1 = get_eval_metrics(valid_labels, valid_predictions)
print(f"Validation Accuracy: {accuracy:.4f}")
print(f"Validation Precision: {precision:.4f}")
print(f"Validation Recall: {recall:.4f}")
print(f"Validation F1-score: {f1:.4f}")

test_embeddings, test_labels = get_embeddings_and_labels_sif(test_data, glove_embeddings)
test_predictions = model.predict(test_embeddings)
accuracy, precision, recall, f1 = get_eval_metrics(test_labels, test_predictions)
print(f"Test Accuracy: {accuracy:.4f}")
print(f"Test Precision: {precision:.4f}")
print(f"Test Recall: {recall:.4f}")
print(f"Test F1-score: {f1:.4f}")

Training Accuracy: 0.5546
Training Precision: 0.4963
Training Recall: 0.5355
Training F1-score: 0.5152
Validation Accuracy: 0.5244
Validation Precision: 0.4655
Validation Recall: 0.5066
Validation F1-score: 0.4852
Test Accuracy: 0.5272
Test Precision: 0.4675
Test Recall: 0.5008
Test F1-score: 0.4836


In [31]:
train_embeddings, train_labels = get_embeddings_and_labels_tfidf(train_data, glove_embeddings)
model = LogisticRegression(
    C=10,
    penalty='l1',
    solver='liblinear',
    max_iter=500,
    tol=1e-5,
    class_weight='balanced',
    random_state=42
)
model.fit(train_embeddings, train_labels)

train_predictions = model.predict(train_embeddings)
accuracy, precision, recall, f1 = get_eval_metrics(train_labels, train_predictions)
print(f"Training Accuracy: {accuracy:.4f}")
print(f"Training Precision: {precision:.4f}")
print(f"Training Recall: {recall:.4f}")
print(f"Training F1-score: {f1:.4f}")

valid_embeddings, valid_labels = get_embeddings_and_labels_tfidf(valid_data, glove_embeddings)
valid_predictions = model.predict(valid_embeddings)
accuracy, precision, recall, f1 = get_eval_metrics(valid_labels, valid_predictions)
print(f"Validation Accuracy: {accuracy:.4f}")
print(f"Validation Precision: {precision:.4f}")
print(f"Validation Recall: {recall:.4f}")
print(f"Validation F1-score: {f1:.4f}")

test_embeddings, test_labels = get_embeddings_and_labels_tfidf(test_data, glove_embeddings)
test_predictions = model.predict(test_embeddings)
accuracy, precision, recall, f1 = get_eval_metrics(test_labels, test_predictions)
print(f"Test Accuracy: {accuracy:.4f}")
print(f"Test Precision: {precision:.4f}")
print(f"Test Recall: {recall:.4f}")
print(f"Test F1-score: {f1:.4f}")

Training Accuracy: 0.5532
Training Precision: 0.4948
Training Recall: 0.5320
Training F1-score: 0.5127
Validation Accuracy: 0.5280
Validation Precision: 0.4693
Validation Recall: 0.5126
Validation F1-score: 0.4900
Test Accuracy: 0.5261
Test Precision: 0.4663
Test Recall: 0.4986
Test F1-score: 0.4819


In [39]:
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import make_scorer, accuracy_score, precision_score, recall_score, f1_score

train_embeddings, train_labels = get_embeddings_and_labels_tfidf(train_data, glove_embeddings)

param_grid = {
    'C': [0.01, 0.1, 1, 10, 100],
    'penalty': ['l1', 'l2']
}

scoring = {'accuracy': make_scorer(accuracy_score),
           'precision': make_scorer(precision_score, average='macro'),
           'recall': make_scorer(recall_score, average='macro'),
           'f1_score': make_scorer(f1_score, average='macro')}

model = LogisticRegression(
    solver='saga',
    max_iter=1000,
    tol=1e-5,
    class_weight='balanced',
    random_state=42
)

grid_search = GridSearchCV(model, param_grid, scoring=scoring, refit='f1_score', cv=5, verbose=1)
grid_search.fit(train_embeddings, train_labels)
print("Best parameters found: ", grid_search.best_params_)
print("Best F1-score achieved: ", grid_search.best_score_)
best_model = grid_search.best_estimator_
train_predictions = best_model.predict(train_embeddings)
accuracy, precision, recall, f1 = get_eval_metrics(train_labels, train_predictions)
print(f"Training Accuracy: {accuracy:.4f}")
print(f"Training Precision: {precision:.4f}")
print(f"Training Recall: {recall:.4f}")
print(f"Training F1-score: {f1:.4f}")

valid_embeddings, valid_labels = get_embeddings_and_labels_tfidf(valid_data, glove_embeddings)
valid_predictions = best_model.predict(valid_embeddings)
accuracy, precision, recall, f1 = get_eval_metrics(valid_labels, valid_predictions)
print(f"Validation Accuracy: {accuracy:.4f}")
print(f"Validation Precision: {precision:.4f}")
print(f"Validation Recall: {recall:.4f}")
print(f"Validation F1-score: {f1:.4f}")

test_embeddings, test_labels = get_embeddings_and_labels_tfidf(test_data, glove_embeddings)
test_predictions = best_model.predict(test_embeddings)
accuracy, precision, recall, f1 = get_eval_metrics(test_labels, test_predictions)
print(f"Test Accuracy: {accuracy:.4f}")
print(f"Test Precision: {precision:.4f}")
print(f"Test Recall: {recall:.4f}")
print(f"Test F1-score: {f1:.4f}")


Fitting 5 folds for each of 10 candidates, totalling 50 fits




Best parameters found:  {'C': 0.1, 'penalty': 'l1'}
Best F1-score achieved:  0.5264764044620778
Training Accuracy: 0.5430
Training Precision: 0.4840
Training Recall: 0.5175
Training F1-score: 0.5002
Validation Accuracy: 0.5259
Validation Precision: 0.4648
Validation Recall: 0.4733
Validation F1-score: 0.4690
Test Accuracy: 0.5230
Test Precision: 0.4621
Test Recall: 0.4833
Test F1-score: 0.4725


In [32]:
train_embeddings, train_labels = get_embeddings_and_labels(train_data, glove_embeddings)
model = SVC(
    C=1.0,
    kernel='sigmoid',
    degree=3,
    gamma='auto',
    coef0=0.0,
    shrinking=True,
    probability=False,
    tol=1e-3,
    cache_size=200,
    class_weight='balanced',
    verbose=False,
    max_iter=-1,
    decision_function_shape='ovr',
    break_ties=False,
    random_state=42
)
model.fit(train_embeddings, train_labels)

train_predictions = model.predict(train_embeddings)
accuracy, precision, recall, f1 = get_eval_metrics(train_labels, train_predictions)
print(f"Training Accuracy: {accuracy:.4f}")
print(f"Training Precision: {precision:.4f}")
print(f"Training Recall: {recall:.4f}")
print(f"Training F1-score: {f1:.4f}")

valid_embeddings, valid_labels = get_embeddings_and_labels(valid_data, glove_embeddings)
valid_predictions = model.predict(valid_embeddings)
accuracy, precision, recall, f1 = get_eval_metrics(valid_labels, valid_predictions)
print(f"Validation Accuracy: {accuracy:.4f}")
print(f"Validation Precision: {precision:.4f}")
print(f"Validation Recall: {recall:.4f}")
print(f"Validation F1-score: {f1:.4f}")

test_embeddings, test_labels = get_embeddings_and_labels(test_data, glove_embeddings)
test_predictions = model.predict(test_embeddings)
accuracy, precision, recall, f1 = get_eval_metrics(test_labels, test_predictions)
print(f"Test Accuracy: {accuracy:.4f}")
print(f"Test Precision: {precision:.4f}")
print(f"Test Recall: {recall:.4f}")
print(f"Test F1-score: {f1:.4f}")

Training Accuracy: 0.5581
Training Precision: 0.4999
Training Recall: 0.3944
Training F1-score: 0.4410
Validation Accuracy: 0.5489
Validation Precision: 0.4867
Validation Recall: 0.3606
Validation F1-score: 0.4142
Test Accuracy: 0.5465
Test Precision: 0.4837
Test Recall: 0.3849
Test F1-score: 0.4287


In [33]:
train_embeddings, train_labels = get_embeddings_and_labels_sif(train_data, glove_embeddings)
model = SVC(
    C=1.0,
    kernel='sigmoid',
    degree=3,
    gamma='auto',
    coef0=0.0,
    shrinking=True,
    probability=False,
    tol=1e-3,
    cache_size=200,
    class_weight='balanced',
    verbose=False,
    max_iter=-1,
    decision_function_shape='ovr',
    break_ties=False,
    random_state=42
)
model.fit(train_embeddings, train_labels)

train_predictions = model.predict(train_embeddings)
accuracy, precision, recall, f1 = get_eval_metrics(train_labels, train_predictions)
print(f"Training Accuracy: {accuracy:.4f}")
print(f"Training Precision: {precision:.4f}")
print(f"Training Recall: {recall:.4f}")
print(f"Training F1-score: {f1:.4f}")

valid_embeddings, valid_labels = get_embeddings_and_labels_sif(valid_data, glove_embeddings)
valid_predictions = model.predict(valid_embeddings)
accuracy, precision, recall, f1 = get_eval_metrics(valid_labels, valid_predictions)
print(f"Validation Accuracy: {accuracy:.4f}")
print(f"Validation Precision: {precision:.4f}")
print(f"Validation Recall: {recall:.4f}")
print(f"Validation F1-score: {f1:.4f}")

test_embeddings, test_labels = get_embeddings_and_labels_sif(test_data, glove_embeddings)
test_predictions = model.predict(test_embeddings)
accuracy, precision, recall, f1 = get_eval_metrics(test_labels, test_predictions)
print(f"Test Accuracy: {accuracy:.4f}")
print(f"Test Precision: {precision:.4f}")
print(f"Test Recall: {recall:.4f}")
print(f"Test F1-score: {f1:.4f}")

Training Accuracy: 0.5477
Training Precision: 0.4869
Training Recall: 0.4399
Training F1-score: 0.4622
Validation Accuracy: 0.5389
Validation Precision: 0.4750
Validation Recall: 0.4032
Validation F1-score: 0.4362
Test Accuracy: 0.5369
Test Precision: 0.4740
Test Recall: 0.4361
Test F1-score: 0.4543


In [34]:
train_embeddings, train_labels = get_embeddings_and_labels_tfidf(train_data, glove_embeddings)

model = SVC(
    C=1.0,
    kernel='sigmoid',
    degree=3,
    gamma='auto',
    coef0=0.0,
    shrinking=True,
    probability=False,
    tol=1e-3,
    cache_size=200,
    class_weight='balanced',
    verbose=False,
    max_iter=-1,
    decision_function_shape='ovr',
    break_ties=False,
    random_state=42
)
model.fit(train_embeddings, train_labels)

train_predictions = model.predict(train_embeddings)
accuracy, precision, recall, f1 = get_eval_metrics(train_labels, train_predictions)
print(f"Training Accuracy: {accuracy:.4f}")
print(f"Training Precision: {precision:.4f}")
print(f"Training Recall: {recall:.4f}")
print(f"Training F1-score: {f1:.4f}")

valid_embeddings, valid_labels = get_embeddings_and_labels_tfidf(valid_data, glove_embeddings)
valid_predictions = model.predict(valid_embeddings)
accuracy, precision, recall, f1 = get_eval_metrics(valid_labels, valid_predictions)
print(f"Validation Accuracy: {accuracy:.4f}")
print(f"Validation Precision: {precision:.4f}")
print(f"Validation Recall: {recall:.4f}")
print(f"Validation F1-score: {f1:.4f}")

test_embeddings, test_labels = get_embeddings_and_labels_tfidf(test_data, glove_embeddings)
test_predictions = model.predict(test_embeddings)
accuracy, precision, recall, f1 = get_eval_metrics(test_labels, test_predictions)
print(f"Test Accuracy: {accuracy:.4f}")
print(f"Test Precision: {precision:.4f}")
print(f"Test Recall: {recall:.4f}")
print(f"Test F1-score: {f1:.4f}")

Training Accuracy: 0.5461
Training Precision: 0.4854
Training Recall: 0.4547
Training F1-score: 0.4696
Validation Accuracy: 0.5383
Validation Precision: 0.4756
Validation Recall: 0.4267
Validation F1-score: 0.4498
Test Accuracy: 0.5344
Test Precision: 0.4718
Test Recall: 0.4465
Test F1-score: 0.4588


In [40]:
train_embeddings, train_labels = get_embeddings_and_labels_sif(train_data, glove_embeddings)
valid_embeddings, valid_labels = get_embeddings_and_labels_sif(valid_data, glove_embeddings)



METRICS = [
    tf.keras.metrics.AUC(name='roc-auc'),
    tf.keras.metrics.BinaryAccuracy(name='accuracy'),
    tf.keras.metrics.Precision(name='precision'),
    tf.keras.metrics.Recall(name="recall"),
    tfa.metrics.F1Score(name='f1_score', threshold=0.5, num_classes=1)  # Add F1 score
]

model = Sequential([
    Dense(64, activation='relu', input_shape=(train_embeddings.shape[1],)),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=METRICS)

history = model.fit(train_embeddings, train_labels, epochs=100,
                    validation_data=(valid_embeddings, valid_labels))

model.evaluate(valid_embeddings, valid_labels)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

[0.8604294061660767,
 0.5149820446968079,
 0.5394999980926514,
 0.47146791219711304,
 0.33851370215415955,
 array([0.39407897], dtype=float32)]

In [36]:
train_embeddings, train_labels = get_embeddings_and_labels_tfidf(train_data, glove_embeddings)
valid_embeddings, valid_labels = get_embeddings_and_labels_tfidf(valid_data, glove_embeddings)


METRICS = [
    tf.keras.metrics.AUC(name='roc-auc'),
    tf.keras.metrics.BinaryAccuracy(name='accuracy'),
    tf.keras.metrics.Precision(name='precision'),
    tf.keras.metrics.Recall(name="recall"),
    tfa.metrics.F1Score(name='f1_score', threshold=0.5, num_classes=1)
]

model = Sequential([
    Dense(64, activation='relu', input_shape=(train_embeddings.shape[1],)),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=METRICS)

history = model.fit(train_embeddings, train_labels, epochs=10,
                    validation_data=(valid_embeddings, valid_labels))

model.evaluate(valid_embeddings, valid_labels)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


[0.6978882551193237,
 0.5460283160209656,
 0.562250018119812,
 0.5119896531105042,
 0.2232269048690796,
 array([0.31090122], dtype=float32)]

In [37]:
train_embeddings, train_labels = get_embeddings_and_labels(train_data, glove_embeddings)
valid_embeddings, valid_labels = get_embeddings_and_labels(valid_data, glove_embeddings)


METRICS = [
    tf.keras.metrics.AUC(name='roc-auc'),
    tf.keras.metrics.BinaryAccuracy(name='accuracy'),
    tf.keras.metrics.Precision(name='precision'),
    tf.keras.metrics.Recall(name="recall"),
    tfa.metrics.F1Score(name='f1_score', threshold=0.5, num_classes=1)
]

model = Sequential([
    Dense(64, activation='relu', input_shape=(train_embeddings.shape[1],)),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=METRICS)

history = model.fit(train_embeddings, train_labels, epochs=10,
                    validation_data=(valid_embeddings, valid_labels))

model.evaluate(valid_embeddings, valid_labels)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


[0.6892088651657104,
 0.5548983216285706,
 0.5732499957084656,
 0.5812743902206421,
 0.12630686163902283,
 array([0.20752089], dtype=float32)]