# Results

In [8]:
import re
import ast
import json
import math
import functools
import operator
import collections
import numpy as np
import pandas as pd
from pathlib import Path
from collections import defaultdict
from metrics import recall, mrr, ndcg, score

In [9]:
def clean_string(string):
    return re.sub(r'\W+','', string).lower() 

def compute_results(trues, preds_annots):
    data = {}
    for k, preds in preds_annots.items():
        data[k] = score(trues, preds, [recall, ndcg, mrr], k=20)
    df = pd.DataFrame(data).transpose()
    df.loc['Mean'] = df.mean()
    return df.round(1)

def load_preds_annotators(path, method):
    preds = defaultdict(lambda: defaultdict(dict))
    for annotator_i in [1,2,3]:
        path_annots = Path(f'{path}/{method}/preds_annot{annotator_i}.json')
        preds[f"A{annotator_i}"] = { id_:[clean_string(ref["title"]) for ref in references] for id_, references in json.loads(path_annots.read_text()).items()}
    return json.loads(json.dumps(preds))

# Comparison of basic approaches and search engine on ACL Anthology dataset

In [10]:
reading_lists = pd.read_csv("../reading_lists.csv")
reading_lists['reading_list'] = reading_lists['reading_list'].apply(ast.literal_eval)

trues = { id_:[clean_string(ref["title"]) for ref in references if ref["acl_id"]] for id_, references in zip(reading_lists["id"], reading_lists["reading_list"]) }

In [11]:
df_semantic_scholar_all = compute_results(trues, load_preds_annotators("classic_methods/preds", "semantic_scholar_any"))
df_semantic_scholar_acl = compute_results(trues, load_preds_annotators("classic_methods/preds", "semantic_scholar_acl"))
df_bm25 = compute_results(trues, load_preds_annotators("classic_methods/preds", "bm25"))
df_specterv2 = compute_results(trues, load_preds_annotators("classic_methods/preds", "specterv2"))
table = pd.concat([df_semantic_scholar_all, df_semantic_scholar_acl, df_bm25, df_specterv2], axis=1, keys=["S2 (Any)","S2 (Acl)", "BM25", "SPECTER2"]); table

#print(table.to_latex(float_format="{:0.1f}".format))

Unnamed: 0_level_0,S2 (Any),S2 (Any),S2 (Any),S2 (Acl),S2 (Acl),S2 (Acl),BM25,BM25,BM25,SPECTER2,SPECTER2,SPECTER2
Unnamed: 0_level_1,recall,ndcg,mrr,recall,ndcg,mrr,recall,ndcg,mrr,recall,ndcg,mrr
A1,4.2,2.1,2.5,8.4,5.5,7.6,8.9,5.7,8.8,5.8,3.1,3.3
A2,2.7,1.5,2.0,5.2,3.7,5.3,9.2,5.5,6.3,5.7,3.4,4.6
A3,6.7,3.9,5.6,10.2,7.5,11.2,11.2,6.6,9.2,8.2,4.7,5.8
Mean,4.5,2.5,3.3,7.9,5.6,8.0,9.7,5.9,8.1,6.6,3.7,4.6


## Matches of correct prediction

In [12]:
def match(trues, preds1, preds2):
    match = 0
    size = 0
    for k, trues_v in trues.items():
        if trues_v != []:
            match1 = set(dict.fromkeys(trues_v)) & set(dict.fromkeys(preds1[k]))
            match2 = set(dict.fromkeys(trues_v)) & set(dict.fromkeys(preds2[k]))
            match+=len(match1 & match2)
            size+=1
    return match/size

In [13]:
bm25_preds = load_preds_annotators("classic_methods/preds", "bm25")
specter2_preds = load_preds_annotators("classic_methods/preds", "specterv2")

(match(trues, bm25_preds["A1"], specter2_preds["A1"]) +\
match(trues, bm25_preds["A2"], specter2_preds["A2"]) +\
match(trues, bm25_preds["A3"], specter2_preds["A3"])) /3

0.1050228310502283