## F1-Embedding Evaluation for Semantic-Matching:

- precision: how many matched keyphrases with the ground-truth in the output 
- recall: how many matched keyphrases in the output w.r.t all ground-truth keyphrases
- compute semantic similarity match between two words if cosine_sim > 5.0 ==> consider as a matching

In [1]:
from sentence_transformers import SentenceTransformer, util
import glob
import csv

from pathlib import Path

model = SentenceTransformer('all-MiniLM-L6-v2')


In [2]:
Path("./Output/").mkdir(parents=True, exist_ok=True)
Path("./Output/Evaluation/").mkdir(parents=True, exist_ok=True)

file_writer = csv.writer(open('./Output/Inspec-evaluation.csv',
                         'w', encoding='UTF8', newline=''), delimiter='\t')
file_writer.writerow(['Top@', 'precision', 'recall', 'F1'])


# Get id of the test documents from the benchmarking path (../ake-datasets/../test)
# Modify the path (i.e., redirect to the ground-truth folder) to geth the gold standard keywords
test_docs = glob.glob('../ake-datasets/datasets/Inspec/test/*.xml')
test_docs = ['../Inspec/keys/' +
             doc.split('/')[-1][:-3]+'key' for doc in test_docs]


In [3]:
def compute_evaluation(cosine_scores):
    
    num_of_Similar=0 #number of similar keyphrases, we add 1 to avoid division by zero (i.e., smoothing)

    for similarty_score in cosine_scores:
        if any(similarity_threshould > 0.5 for similarity_threshould in similarty_score):                
            num_of_Similar+=1
        
    recall= num_of_Similar/len(cosine_scores[0]) # cosine_scores[0] size of ground-truth (see cosine_scores.shape)
    precision= num_of_Similar/len(cosine_scores)

        
    return round(precision, 3), round(recall, 3)   

In [6]:
babelNet_path='./Output/AKE-babelnet/' # change this path for the predicted keyphrases
present_path='./Output/PKE/'
dbpedia_path= './Output/AKE-DBpedia/'

In [8]:
sum_of_recall = 0
sum_of_precision = 0

#--- K= {3, 5, 10} ---#
k = 10

file_counts = 0

for file in test_docs:

    # reading the ground-truth keyphrases as a list
    with open(file) as fileIn:
        groundtruth_keyphrases = fileIn.readlines()[:k]
        groundtruth_keyphrases = [keyphrase.replace(
            '\n', '') for keyphrase in groundtruth_keyphrases]

        groundtruth_embedding = model.encode(
            groundtruth_keyphrases, convert_to_tensor=True)
    fileIn.close()

    # reading the generated keyphrases from babelNet as a list
    fileName = file.split('/')[-1][:-3]+'txt'
    babelNet_file = Path(babelNet_path+fileName)
    if not babelNet_file.is_file():
        continue

    with open(babelNet_path+fileName) as fileIn:
        babelNet_keyphrases = fileIn.readlines()[:k]
        babelNet_keyphrases = [keyphrase.replace(
            '\n', '') for keyphrase in babelNet_keyphrases]

    # reading the generated keyphrases from DBpedia as a list
    dbpedia_file = Path(dbpedia_path+fileName)
    if not dbpedia_file.is_file():
        continue

    file_counts += 1

    with open(dbpedia_path+fileName) as fileIn:
        dbpedia_keyphrases = fileIn.readlines()[:k]
        dbpedia_keyphrases = [keyphrase.replace(
            '\n', '') for keyphrase in dbpedia_keyphrases]

    # reading the presentk keyphrases as a list
    with open(present_path+fileName) as fileIn:
        present_keyphrases = fileIn.readlines()[:k]
        present_keyphrases = [keyphrase.replace(
            '\n', '') for keyphrase in present_keyphrases]

    # combine all keyphrases into a final list
    final_keyphrases = present_keyphrases+babelNet_keyphrases+dbpedia_keyphrases

    keyphrase_embedding = model.encode(
        final_keyphrases[:k], convert_to_tensor=True)

    cosine_scores = util.pytorch_cos_sim(
        keyphrase_embedding, groundtruth_embedding)

    # compute evaluation and save into file
    precision, recall = compute_evaluation(cosine_scores)

    sum_of_recall += recall
    sum_of_precision += precision

avg_recall = sum_of_recall/file_counts 
avg_precision = sum_of_precision/file_counts 

F1 = 2*(avg_recall*avg_precision)/(avg_precision+avg_recall)

print(['Top@'+str(k), round(avg_precision, 3), round(avg_recall, 3), round(F1, 3)])

file_writer.writerow(
    ['Top@'+str(k), round(avg_precision, 3), round(avg_recall, 3), round(F1, 3)])

file_writer.flush()

['Top@20', 0.883, 0.584, 0.703]


26