# Pipeline 1

In [1]:
from pipeline import Falcon2Linker, SerialAnnotator, T5Converter
import time
from pprint import pprint

In [2]:
linker = Falcon2Linker()
annotator = SerialAnnotator()
converter = T5Converter()

In [3]:
utterance = "Who is the wife of Barack Obama"

In [4]:
linked = linker.link(utterance)
# rules = [1,2,3,4,5,8,9,10,12,13,14]
# linked = process_text_E_R(utterance, rules)
pprint("Linked")
pprint(linked)

pprint("Annotated")
annotated = annotator.annotate(**linked)
pprint(annotated)

pprint("Converted")
converted = converter.preprocess_inputs(**annotated)
pprint(converted)

recieved utterance
0


[reRank_relations]: who is the wife of Barack Obama 
 SPARQL Requests Made: 1 
SPARQL Requests Total Time: 0.7615091800689697 
SPARQL Avg Time per Qn: 0.7615091800689697
['Who is the wife of Barack Obama', [['<http://www.wikidata.org/entity/P26>', 'wife'], ['<http://www.wikidata.org/entity/P2848>', 'wife'], ['<http://www.wikidata.org/entity/P140>', 'wife'], ['<http://www.wikidata.org/entity/P451>', 'wife'], ['<http://www.wikidata.org/entity/P600>', 'wife']], [['<http://www.wikidata.org/entity/Q76>', 'Barack obama'], ['<http://www.wikidata.org/entity/Q649593>', 'Barack obama'], ['<http://www.wikidata.org/entity/Q4808526>', 'Barack obama'], ['<http://www.wikidata.org/entity/Q4858106>', 'Barack obama'], ['<http://www.wikidata.org/entity/Q643049>', 'Barack obama']], 0, 0, 0, 0]
'Linked'
{'ents': [{'id': 'Q76',
           'prefix': 'wd:',
           'uri': 'http://www.wikidata.org/entity/Q76'},
          {'id': 'Q649593',
           'prefix': 'wd:',
           'uri': 'http://www.wikidata.or

{'fragments': ['[DEF]',
               'wd:',
               'Q76 Barack Obama',
               '[DEF]',
               'wd:',
               'Q649593 Barack Obama Sr.',
               '[DEF]',
               'wd:',
               'Q4808526 assassination threats against Barack Obama',
               '[DEF]',
               'wd:',
               'Q4858106 Barack Obama Academy',
               '[DEF]',
               'wd:',
               'Q643049 family of Barack Obama',
               '[DEF]',
               'wdt:',
               'P26 spouse',
               '[DEF]',
               'wdt:',
               'P2848 Wi-Fi access',
               '[DEF]',
               'wdt:',
               'P140 religion or worldview',
               '[DEF]',
               'wdt:',
               'P451 unmarried partner',
               '[DEF]',
               'wdt:',
               'P600 Wine AppDB ID'],
 'utterance': 'Who is the wife of Barack Obama'}
'Converted'
('Who is the wife of Barack Obama <extr

In [5]:
def pipe(utterance, wikisparql):
    linked = linker.link(utterance)
    annotated = annotator.annotate(**linked)
    converted = converter.preprocess(**annotated, wikisparql=wikisparql)
    return linked, annotated, converted

In [6]:
def pipe_batch(utterances, wikisparqls):
  batched = []
  link_batch_start = time.time()
  linked = linker.link_batch(utterances)
  link_batch_end = time.time()
  annotator_time = 0
  converter_time = 0
  for i, single_linked in enumerate(linked):
    s = time.time()
    annotated = annotator.annotate(**single_linked)
    e = time.time()
    annotator_time += e - s
    s = time.time()
    converted = converter.preprocess(**annotated, wikisparql=wikisparqls[i])
    e = time.time()
    converter_time += e - s
    batched.append([single_linked, annotated, converted])
  print("Link batch time:", link_batch_end - link_batch_start)
  print("Anno batch time:", annotator_time)
  print("Conv batch time:", converter_time)
  return batched

## Data

In [7]:
from pathlib import Path
import pandas as pd
import json

data_path = Path("..") / "t5-for-sparql" / "data" / "lcquad2" / "train.json"

In [8]:
with open(data_path) as f:
  data_dict = json.load(f)
df = pd.DataFrame.from_dict(data_dict)
df.head()

Unnamed: 0,NNQT_question,uid,subgraph,template_index,question,sparql_wikidata,sparql_dbpedia18,template,answer,template_id,paraphrased_question
0,What is the {periodical literature} for {mouth...,19719,simple question right,65,What periodical literature does Delta Air Line...,select distinct ?obj where { wd:Q188920 wdt:P...,select distinct ?obj where { ?statement <http:...,<S P ?O ; ?O instanceOf Type>,[],1.0,What is Delta Air Line's periodical literature...
1,What is {child of} of {husband} of {Ranavalona...,15554,left-subgraph,8,Who is the child of Ranavalona I's husband?,SELECT ?answer WHERE { wd:Q169794 wdt:P26 ?X ....,SELECT ?answer WHERE { ?statement1 <http://www...,C RCD xD . xD RDE ?E,[],5.0,What is the name of Ranavalona I's husband's c...
2,Did {Jeff_Bridges} {occupation} {Lane Chandler...,974,boolean double one_hop right subgraph,474,Is it true Jeff_Bridges occupation Lane Chandl...,ASK WHERE { wd:Q174843 wdt:P106 wd:Q1804811 . ...,ASK { ?statement1 <http://www.w3.org/1999/02/...,Ask (ent-pred-obj1` . ent-pred-obj2),[],2.0,Are Jeff Bridges and Lane Chandler both photog...
3,What is {prequel of} of {phase of matter} of {...,15803,right-subgraph,33,What is the pre-requisite of phase matter of G...,SELECT ?answer WHERE { wd:Q675176 wdt:P515 ?X ...,SELECT ?answer WHERE { ?statement1 <http://www...,E REF xF . xF RFG ?G,[],2.0,What range are the papers at the Monique Genon...
4,What is <operating income> of <Qantas> ?,27610,center,1907,Which is the operating income for Qantas?,select distinct ?answer where { wd:Q32491 wdt:...,select distinct ?answer where { ?statement <ht...,E REF ?F,[],1.1,[]


In [9]:
import time

responses = []
batch_qns = []
batch_ans = []
truncated_data = data_dict[0:1000]
total_len = len(truncated_data)
last = 0
for i, data in enumerate(truncated_data):
    print("Pipeline iter", i)
    question = data["question"]
    answer = data["sparql_wikidata"]
    # try:
    #     linked, annotated, converted = pipe(question, answer)
    # except Exception as e:
    #     print(e)
    #     continue
    # responses.append([linked, annotated, converted])
    batch_qns.append(question)
    batch_ans.append(answer)
    if ((i + 1) % 50) == 0 or i == total_len - 1:
        print("[Pipeline1]:", f"Linking {last}-{i}")
        try:
            for linked, annotated, converted in pipe_batch(batch_qns, batch_ans):
                responses.append([linked, annotated, converted])
            batch_qns = []
            batch_ans = []
            last = i
        except Exception as e:
            print(e)
            continue
        with open(f"../t5-for-sparql/falcon_links/0/link_{i}.json", "w") as f:
            json.dump(responses, f, indent=2, separators=(',',':'))

Pipeline iter 0
Pipeline iter 1
Pipeline iter 2
Pipeline iter 3
Pipeline iter 4
Pipeline iter 5
Pipeline iter 6
Pipeline iter 7
Pipeline iter 8
Pipeline iter 9
Pipeline iter 10
Pipeline iter 11
Pipeline iter 12
Pipeline iter 13
Pipeline iter 14
Pipeline iter 15
Pipeline iter 16
Pipeline iter 17
Pipeline iter 18
Pipeline iter 19
Pipeline iter 20
Pipeline iter 21
Pipeline iter 22
Pipeline iter 23
Pipeline iter 24
Pipeline iter 25
Pipeline iter 26
Pipeline iter 27
Pipeline iter 28
Pipeline iter 29
Pipeline iter 30
Pipeline iter 31
Pipeline iter 32
Pipeline iter 33
Pipeline iter 34
Pipeline iter 35
Pipeline iter 36
Pipeline iter 37
Pipeline iter 38
Pipeline iter 39
Pipeline iter 40
Pipeline iter 41
Pipeline iter 42
Pipeline iter 43
Pipeline iter 44
Pipeline iter 45
Pipeline iter 46
Pipeline iter 47
Pipeline iter 48
Pipeline iter 49
[Pipeline1]: Linking 0-49
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1


2
[reRank_relations]: which is HanCinema person ID for Zhang Ziyi 
 SPARQL Requests Made: 1 
SPARQL Requests Total Time: 0.6948697566986084 
SPARQL Avg Time per Qn: 0.6948697566986084
[reRank_relations]: which is HanCinema person ID for Zhang Ziyi 
 SPARQL Requests Made: 0 
SPARQL Requests Total Time: 0 
SPARQL Avg Time per Qn: 0
3


4


5


6


[reRank_relations]: who Sleepwalking succeeded in playing Sleepwalking 
 SPARQL Requests Made: 0 
SPARQL Requests Total Time: 0 
SPARQL Avg Time per Qn: 0


[reRank_relations]: what is the boiling point of pressure copper as 47030 
 SPARQL Requests Made: 4 
SPARQL Requests Total Time: 5.751039505004883 
SPARQL Avg Time per Qn: 1.4377598762512207
7
8


9


[reRank_relations]: who won the prize at the sequel of the 1885 Wimbledon Championships- Gentlemens Singles 
 SPARQL Requests Made: 5 
SPARQL Requests Total Time: 9.244554042816162 
SPARQL Avg Time per Qn: 1.8489108085632324
10


[reRank_relations]: which is the operating income for Qantas 
 SPARQL Requests Made: 8 
SPARQL Requests Total Time: 12.922450065612793 
SPARQL Avg Time per Qn: 1.6153062582015991


[reRank_relations]: who is the student that coined the Euler-Lagrange equation 
 SPARQL Requests Made: 1 
SPARQL Requests Total Time: 2.1194007396698 
SPARQL Avg Time per Qn: 2.1194007396698


[reRank_relations]: which is the operating income for Qantas 
 SPARQL Requests Made: 1 
SPARQL Requests Total Time: 2.050887107849121 
SPARQL Avg Time per Qn: 2.050887107849121
11
[reRank_relations]: which website does Twitch own 
 SPARQL Requests Made: 0 
SPARQL Requests Total Time: 0 
SPARQL Avg Time per Qn: 0
[reRank_relations]: which website does Twitch own 
 SPARQL Requests Made: 0 
SPARQL Requests Total Time: 0 
SPARQL Avg Time per Qn: 0
12


[reRank_relations]: is Kevin Costner owner of Fielders Stadium 
 SPARQL Requests Made: 6 
SPARQL Requests Total Time: 12.381792068481445 
SPARQL Avg Time per Qn: 2.063632011413574
13


[reRank_relations]: is the minimal lethal dose of the benzene equal to 170000 
 SPARQL Requests Made: 1 
SPARQL Requests Total Time: 2.0035016536712646 
SPARQL Avg Time per Qn: 2.0035016536712646
14


[reRank_relations]: who is the child of Ranavalona Is husband 
 SPARQL Requests Made: 10 
SPARQL Requests Total Time: 17.160552740097046 
SPARQL Avg Time per Qn: 1.7160552740097046
[reRank_relations]: who is the child of Ranavalona Is husband 
 SPARQL Requests Made: 0 
SPARQL Requests Total Time: 0 
SPARQL Avg Time per Qn: 0


In [None]:
# qns = ['What is the operating income for Qantas?', 'What is Mary Lou Rettons International Olympic Committee athlete ID.']
# l = linker.link_batch(qns)
# print(l)

In [None]:
with open("hpc.json", "w") as f:
  json.dump(responses, f, indent=2 ,separators=(',', ': '))