In [13]:
import json
import pandas as pd
from pathlib import Path
import os

In [14]:
# data_json = Path('falcon_links') / '2' / 'link_21549.json'
# data_json = Path('falcon_links') / '2' / 'link_24066.json'
data_json = Path('falcon_links') / '4' / 'link_24066.json'

with open(data_json) as f:
  data = json.load(f)


In [15]:
data_filtered = []

for entry in data:

  linked, annotated, converted = entry
  link_ents = {e['id'] for e in linked['ents']}
  link_rels = {r['id'] for r in linked['rels']}
  gold_query = converted['labels']
  gold_ents = set()
  gold_rels = set()
  for tok in gold_query.split(" "):
    if not tok:
      continue
    if tok[0].upper() == "Q":
      gold_ents.add(tok.upper())
    elif tok[0].upper() == "P":
      gold_rels.add(tok.upper())
  res = {
      "utterance": linked["utterance"],
      "link_ents": link_ents,
      "gold_ents": gold_ents,
      "link_rels": link_rels,
      "gold_rels": gold_rels,
  }
  data_filtered.append(res)

In [16]:
results = []

def recall(real, pred):
  if len(real) == 0:
    return 1.0
  count = 0
  for x in real:
    if x in pred:
      count += 1 
  return count / len(real)

for i, data in enumerate(data_filtered):
  ent_recall = recall(data["gold_ents"], data["link_ents"])
  rel_recall = recall(data["gold_rels"], data["link_rels"])

  results_dict= {
    "utterance": data["utterance"],
    "ent_recall": ent_recall,
    "rel_recall": rel_recall,
    "pos": i,
  }
  results.append(results_dict)

In [17]:
data_filtered[40]

{'utterance': 'Let me know the title of a fantastique sort that begins with the letter s.',
 'link_ents': {'Q349292', 'Q462313', 'Q9788'},
 'gold_ents': {'Q20076756'},
 'link_rels': {'P1476', 'P4794', 'P793'},
 'gold_rels': {'P31'}}

In [18]:
df = pd.DataFrame.from_dict(results)
print("Mean Ent Recall:", df.ent_recall.mean())
print("Correct:", len(df[df.ent_recall >= 0.99]) / len(df))
print("Mean Rel Recall:", df.rel_recall.mean())
print("Correct:", len(df[df.rel_recall >= 0.99]) / len(df))
df


Mean Ent Recall: 0.5164076351950355
Correct: 0.4052526595744681
Mean Rel Recall: 0.21664104055851063
Correct: 0.10625831117021277


Unnamed: 0,utterance,ent_recall,rel_recall,pos
0,What is Delta Air Lines periodical literature ...,0.500000,0.0,0
1,What is the name of Ranavalona Is husbands child?,0.000000,0.0,1
2,Are Jeff Bridges and Lane Chandler both photog...,0.666667,0.0,2
3,What range are the papers at the Monique Genon...,0.000000,0.0,3
4,Which is the operating income for Qantas?,1.000000,0.0,4
...,...,...,...,...
24059,Which infectious disease that afflicts liver a...,0.500000,0.5,24059
24060,IS THE Bubbling POINT OF THE METHANOL Breaks e...,1.000000,0.0,24060
24061,Which is the College de France professor ID (1...,1.000000,0.0,24061
24062,"What sort of individuals live in Fresno, Calif...",0.000000,0.0,24062
