In [68]:
import json
import pandas as pd
from pathlib import Path
import os

In [69]:
data_json = Path('falcon_links') / '4' / 'link_249.json'

with open(data_json) as f:
  data = json.load(f)


In [70]:
data_filtered = []

for entry in data:

  linked, annotated, converted = entry
  link_ents = {e['id'] for e in linked['ents']}
  link_rels = {r['id'] for r in linked['rels']}
  gold_query = converted['labels']
  gold_ents = set()
  gold_rels = set()
  for tok in gold_query.split(" "):
    if tok[0].upper() == "Q":
      gold_ents.add(tok.upper())
    elif tok[0].upper() == "P":
      gold_rels.add(tok.upper())
  res = {
      "utterance": linked["utterance"],
      "link_ents": link_ents,
      "gold_ents": gold_ents,
      "link_rels": link_rels,
      "gold_rels": gold_rels,
  }
  data_filtered.append(res)

In [71]:
results = []

def recall(real, pred):
  if len(real) == 0:
    return 1.0
  count = 0
  for x in real:
    if x in pred:
      count += 1 
  return count / len(real)

for i, data in enumerate(data_filtered):
  ent_recall = recall(data["gold_ents"], data["link_ents"])
  rel_recall = recall(data["gold_rels"], data["link_rels"])

  results_dict= {
    "utterance": data["utterance"],
    "ent_recall": ent_recall,
    "rel_recall": rel_recall,
    "pos": i,
  }
  results.append(results_dict)

In [72]:
data_filtered[40]

{'utterance': 'What sister city was born in of Zakhar Oskotsky?',
 'link_ents': {'Q1363983', 'Q144664', 'Q1530559', 'Q919422', 'Q982753'},
 'gold_ents': {'Q4338004'},
 'link_rels': {'P131',
  'P1464',
  'P1477',
  'P16',
  'P19',
  'P190',
  'P2556',
  'P36',
  'P569'},
 'gold_rels': {'P19', 'P190'}}

In [73]:
df = pd.DataFrame.from_dict(results)
print("Mean R:", df.ent_recall.mean())
print("Correct:", len(df[df.ent_recall >= 0.9]) / len(df))
df


Mean R: 0.6721014492753623
Correct: 0.5543478260869565


Unnamed: 0,utterance,ent_recall,rel_recall,pos
0,What periodical literature does Delta Air Line...,1.000000,0.0,0
1,Who is the child of Ranavalona I's husband?,1.000000,1.0,1
2,Is it true Jeff_Bridges occupation Lane Chandl...,0.666667,1.0,2
3,What is the pre-requisite of phase matter of G...,1.000000,0.0,3
4,Which is the operating income for Qantas?,1.000000,1.0,4
...,...,...,...,...
179,How far west are we?,0.000000,0.0,179
180,Tell me national association football team who...,0.000000,0.0,180
181,Tell me the most notable work of Christopher H...,1.000000,1.0,181
182,What is the INSEE countries and foreign territ...,1.000000,0.0,182
