In [1]:
import json
import pandas as pd
from pathlib import Path
import os

In [38]:
data_json = Path('falcon_links') / '2' / 'link_21549.json'

with open(data_json) as f:
  data = json.load(f)


In [39]:
data_filtered = []

for entry in data:

  linked, annotated, converted = entry
  link_ents = {e['id'] for e in linked['ents']}
  link_rels = {r['id'] for r in linked['rels']}
  gold_query = converted['labels']
  gold_ents = set()
  gold_rels = set()
  for tok in gold_query.split(" "):
    if tok[0].upper() == "Q":
      gold_ents.add(tok.upper())
    elif tok[0].upper() == "P":
      gold_rels.add(tok.upper())
  res = {
      "utterance": linked["utterance"],
      "link_ents": link_ents,
      "gold_ents": gold_ents,
      "link_rels": link_rels,
      "gold_rels": gold_rels,
  }
  data_filtered.append(res)

In [40]:
results = []

def recall(real, pred):
  if len(real) == 0:
    return 1.0
  count = 0
  for x in real:
    if x in pred:
      count += 1 
  return count / len(real)

for i, data in enumerate(data_filtered):
  ent_recall = recall(data["gold_ents"], data["link_ents"])
  rel_recall = recall(data["gold_rels"], data["link_rels"])

  results_dict= {
    "utterance": data["utterance"],
    "ent_recall": ent_recall,
    "rel_recall": rel_recall,
    "pos": i,
  }
  results.append(results_dict)

In [41]:
data_filtered[40]

{'utterance': 'Let me know the title of a fantastique sort that begins with the letter s.',
 'link_ents': {'Q1089150',
  'Q1168233',
  'Q12690',
  'Q133492',
  'Q1579140',
  'Q181593',
  'Q1870029',
  'Q293618',
  'Q347599',
  'Q349292',
  'Q432334',
  'Q462313',
  'Q660494',
  'Q736955',
  'Q9788'},
 'gold_ents': {'Q20076756'},
 'link_rels': {'P1274',
  'P1476',
  'P150',
  'P166',
  'P2561',
  'P3006',
  'P3036',
  'P3823',
  'P4794',
  'P580',
  'P793',
  'P800',
  'P958',
  'P97'},
 'gold_rels': {'P31'}}

In [42]:
df = pd.DataFrame.from_dict(results)
print("Mean R:", df.ent_recall.mean())
print("Correct:", len(df[df.ent_recall >= 0.9]) / len(df))
df


Mean R: 0.6098142046069831
Correct: 0.4966816726226389


Unnamed: 0,utterance,ent_recall,rel_recall,pos
0,What is Delta Air Lines periodical literature ...,1.000000,0.0,0
1,What is the name of Ranavalona Is husbands child?,1.000000,0.0,1
2,Are Jeff Bridges and Lane Chandler both photog...,0.666667,0.0,2
3,What range are the papers at the Monique Genon...,0.000000,0.0,3
4,Which is the operating income for Qantas?,1.000000,1.0,4
...,...,...,...,...
21542,What is the replaced theory of science that be...,0.000000,0.0,21542
21543,Which is the family name of mayonnaise?,1.000000,1.0,21543
21544,Member of the Botafogo F.R. sports team Nilton...,1.000000,0.5,21544
21545,"For Javanese, how many writing systems are there?",0.000000,0.0,21545
