# Pipeline 1

In [1]:
from pipeline import Falcon2Linker, SerialAnnotator, T5Converter
from pprint import pprint

In [2]:
linker = Falcon2Linker()
annotator = SerialAnnotator()
converter = T5Converter()

In [3]:
utterance = "Who is the wife of Barack Obama"

In [4]:
linked = linker.link(utterance)
pprint("Linked")
pprint(linked)

pprint("Annotated")
annotated = annotator.annotate(**linked)
pprint(annotated)

pprint("Converted")
converted = converter.preprocess_inputs(**annotated)
pprint(converted)

'Linked'
{'ents': [{'id': 'Q76',
           'prefix': 'wd:',
           'uri': 'http://www.wikidata.org/entity/Q76'},
          {'id': 'Q649593',
           'prefix': 'wd:',
           'uri': 'http://www.wikidata.org/entity/Q649593'},
          {'id': 'Q4808526',
           'prefix': 'wd:',
           'uri': 'http://www.wikidata.org/entity/Q4808526'},
          {'id': 'Q4858106',
           'prefix': 'wd:',
           'uri': 'http://www.wikidata.org/entity/Q4858106'},
          {'id': 'Q643049',
           'prefix': 'wd:',
           'uri': 'http://www.wikidata.org/entity/Q643049'}],
 'rels': [{'id': 'P26',
           'prefix': 'wdt:',
           'uri': 'http://www.wikidata.org/prop/direct/P26'},
          {'id': 'P2848',
           'prefix': 'wdt:',
           'uri': 'http://www.wikidata.org/prop/direct/P2848'},
          {'id': 'P140',
           'prefix': 'wdt:',
           'uri': 'http://www.wikidata.org/prop/direct/P140'},
          {'id': 'P451',
           'prefix': 'wdt:',
    

{'fragments': ['[DEF]',
               'wd:',
               'Q76 Barack Obama',
               '[DEF]',
               'wd:',
               'Q649593 Barack Obama Sr.',
               '[DEF]',
               'wd:',
               'Q4808526 assassination threats against Barack Obama',
               '[DEF]',
               'wd:',
               'Q4858106 Barack Obama Academy',
               '[DEF]',
               'wd:',
               'Q643049 family of Barack Obama',
               '[DEF]',
               'wdt:',
               'P26 spouse',
               '[DEF]',
               'wdt:',
               'P2848 Wi-Fi access',
               '[DEF]',
               'wdt:',
               'P140 religion or worldview',
               '[DEF]',
               'wdt:',
               'P451 unmarried partner',
               '[DEF]',
               'wdt:',
               'P600 Wine AppDB ID'],
 'utterance': 'Who is the wife of Barack Obama'}
'Converted'
('Who is the wife of Barack Obama <extr

In [5]:
def pipe(utterance, wikisparql):
    linked = linker.link(utterance)
    annotated = annotator.annotate(**linked)
    converted = converter.preprocess(**annotated, wikisparql=wikisparql)
    return linked, annotated, converted

## Data

In [6]:
from pathlib import Path
import pandas as pd
import json

data_path = Path("data") / "lcquad2" / "train.json"

In [7]:
with open(data_path) as f:
  data_dict = json.load(f)
df = pd.DataFrame.from_dict(data_dict)
df.head()

Unnamed: 0,NNQT_question,uid,subgraph,template_index,question,sparql_wikidata,sparql_dbpedia18,template,answer,template_id,paraphrased_question
0,What is the {periodical literature} for {mouth...,19719,simple question right,65,What periodical literature does Delta Air Line...,select distinct ?obj where { wd:Q188920 wdt:P...,select distinct ?obj where { ?statement <http:...,<S P ?O ; ?O instanceOf Type>,[],1.0,What is Delta Air Line's periodical literature...
1,What is {child of} of {husband} of {Ranavalona...,15554,left-subgraph,8,Who is the child of Ranavalona I's husband?,SELECT ?answer WHERE { wd:Q169794 wdt:P26 ?X ....,SELECT ?answer WHERE { ?statement1 <http://www...,C RCD xD . xD RDE ?E,[],5.0,What is the name of Ranavalona I's husband's c...
2,Did {Jeff_Bridges} {occupation} {Lane Chandler...,974,boolean double one_hop right subgraph,474,Is it true Jeff_Bridges occupation Lane Chandl...,ASK WHERE { wd:Q174843 wdt:P106 wd:Q1804811 . ...,ASK { ?statement1 <http://www.w3.org/1999/02/...,Ask (ent-pred-obj1` . ent-pred-obj2),[],2.0,Are Jeff Bridges and Lane Chandler both photog...
3,What is {prequel of} of {phase of matter} of {...,15803,right-subgraph,33,What is the pre-requisite of phase matter of G...,SELECT ?answer WHERE { wd:Q675176 wdt:P515 ?X ...,SELECT ?answer WHERE { ?statement1 <http://www...,E REF xF . xF RFG ?G,[],2.0,What range are the papers at the Monique Genon...
4,What is <operating income> of <Qantas> ?,27610,center,1907,Which is the operating income for Qantas?,select distinct ?answer where { wd:Q32491 wdt:...,select distinct ?answer where { ?statement <ht...,E REF ?F,[],1.1,[]


In [None]:
responses = []
for data in data_dict[0:200]:
    question = data["question"]
    answer = data["sparql_wikidata"]
    try:
        linked, annotated, converted = pipe(question, answer)
    except Exception as e:
        print(e)
        continue
    responses.append([linked, annotated, converted])

In [None]:
responses

In [None]:
with open("weekend.json", "w") as f:
  json.dump(responses, f, separators=(',', ': '))