In [13]:
import json
import pandas as pd
from pathlib import Path
import os

In [14]:
data_json = Path('weekend.json')

with open(data_json) as f:
  data = json.load(f)


In [15]:
data_filtered = []

for entry in data:

  linked, annotated, converted = entry
  link_ents = {e['id'] for e in linked['ents']}
  link_rels = {r['id'] for r in linked['rels']}
  gold_query = converted['labels']
  gold_ents = set()
  gold_rels = set()
  for tok in gold_query.split(" "):
    if tok[0].upper() == "Q":
      gold_ents.add(tok.upper())
    elif tok[0].upper() == "P":
      gold_rels.add(tok.upper())
  res = {
      "utterance": linked["utterance"],
      "link_ents": link_ents,
      "gold_ents": gold_ents,
      "link_rels": link_rels,
      "gold_rels": gold_rels,
  }
  data_filtered.append(res)

In [20]:
results = []

def recall(real, pred):
  count = 0
  for x in real:
    if x in pred:
      count += 1 
  return count / len(real)

for i, data in enumerate(data_filtered):
  ent_recall = recall(data["gold_ents"], data["link_ents"])
  rel_recall = recall(data["gold_rels"], data["link_rels"])

  results_dict= {
    "utterance": data["utterance"],
    "ent_recall": ent_recall,
    "rel_recall": rel_recall,
    "pos": i,
  }
  results.append(results_dict)

In [22]:
df = pd.DataFrame.from_dict(results)
df

Unnamed: 0,utterance,ent_recall,rel_recall,pos
0,What periodical literature does Delta Air Line...,1.0,0.0,0
1,Who is the child of Ranavalona I's husband?,1.0,1.0,1
2,Is it true Jeff_Bridges occupation Lane Chandl...,0.666667,1.0,2
3,What is the pre-requisite of phase matter of G...,1.0,0.0,3
4,Which is the operating income for Qantas?,1.0,1.0,4
5,which cola starts with the letter p,1.0,0.0,5
6,Is the right ascension of malin 1 less than 15...,0.0,0.0,6
7,What is the complete list of records released ...,0.5,0.0,7
8,What's Mary Lou Retton's International Olympic...,1.0,0.0,8
9,Who won the prize at the sequel of the 1885 Wi...,0.0,0.0,9
