# Pipeline 1

In [1]:
from pipeline import Falcon2Linker, SerialAnnotator, T5Converter
from pprint import pprint

In [2]:
linker = Falcon2Linker()
annotator = SerialAnnotator()
converter = T5Converter()

In [3]:
utterance = "Who is the wife of Barack Obama"

In [None]:
linked = linker.link(utterance)
pprint("Linked")
pprint(linked)

pprint("Annotated")
annotated = annotator.annotate(**linked)
pprint(annotated)

pprint("Converted")
converted = converter.preprocess_inputs(**annotated)
pprint(converted)

In [None]:
def pipe(utterance, wikisparql):
    linked = linker.link(utterance)
    annotated = annotator.annotate(**linked)
    converted = converter.preprocess(**annotated, wikisparql=wikisparql)
    return linked, annotated, converted

## Data

In [None]:
from pathlib import Path
import pandas as pd
import json

data_path = Path("data") / "lcquad2" / "train.json"

In [None]:
with open(data_path) as f:
  data_dict = json.load(f)
df = pd.DataFrame.from_dict(data_dict)
df.head()

In [None]:
responses = []
for data in data_dict:
    question = data["question"]
    answer = data["sparql_wikidata"]
    try:
        linked, annotated, converted = pipe(question, answer)
    except Exception as e:
        print(e)
        continue
    responses.append([linked, annotated, converted])

In [None]:
responses

In [None]:
with open("weekend.json", "w") as f:
  json.dump(responses, f, separators=(',', ': '))