In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pickle
from genre import GENRE
from genre.trie import Trie
from genre.entity_linking import get_end_to_end_prefix_allowed_tokens_fn_fariseq

In [None]:
# loading trie from KILT Wikipedia titles
with open("data/kilt_titles_trie.pkl", "rb") as f:
    trie = pickle.load(f)
    
# defining the funciton to apply the constraints with the entities trie
def prefix_allowed_tokens_fn(batch_id, sent):
    return trie.get(sent.tolist())

# Entity Disambiguation

In [None]:
# loading model
model = (
    GENRE.from_pretrained("models/fairseq_entity_disambiguation_aidayago")
    .eval()
    .to("cuda:0")
)

In [None]:
# create inputs
sentences = [" [START_ENT] London [END_ENT] is the capital of the UK."]

# generating from the model
model.sample(
    sentences,
    prefix_allowed_tokens_fn=prefix_allowed_tokens_fn,
)

# Document Retieval

In [None]:
# loading model
model = (
    GENRE.from_pretrained("models/fairseq_wikipage_retrieval")
    .eval()
    .to("cuda:0")
)

In [None]:
# create inputs
sentences = ["Stripes had Conrad Dunn featured in it"]

# generating from the model
model.sample(
    sentences,
#     prefix_allowed_tokens_fn=prefix_allowed_tokens_fn,
)

# End-to-End Entity Linking

In [None]:
# loading model
model = (
    GENRE.from_pretrained("models/fairseq_e2e_entity_linking_wiki_abs")
    .eval()
    .to("cuda:0")
)

In [None]:
# create inputs
sentences = [" London is the capital of the UK "]

# no constrains on mention and candidates
prefix_allowed_tokens_fn = get_end_to_end_prefix_allowed_tokens_fn_fariseq(model, sentences)

# generating from the model
model.sample(
    sentences,
    prefix_allowed_tokens_fn=prefix_allowed_tokens_fn,
)

In [None]:
# constraining the mentions with a prefix tree - no constrains on candidates
prefix_allowed_tokens_fn = get_end_to_end_prefix_allowed_tokens_fn_fariseq(
    model,
    sentences,
    mention_trie=Trie([
        model.encode(e).tolist()[1:]
        for e in [" London"]
    ])
)

# generating from the model
model.sample(
    sentences,
    prefix_allowed_tokens_fn=prefix_allowed_tokens_fn,
)

In [None]:
# constraining the candidate sets given a mention
prefix_allowed_tokens_fn = get_end_to_end_prefix_allowed_tokens_fn_fariseq(
    model,
    sentences,
    mention_to_candidates_dict={
        "London": ["London"],
        "UK": ["UK"],
    }
)

# generating from the model
model.sample(
    sentences,
    prefix_allowed_tokens_fn=prefix_allowed_tokens_fn,
)

In [None]:
# constraining the candidates with a prefix tree - no constrains on mentions
prefix_allowed_tokens_fn = get_end_to_end_prefix_allowed_tokens_fn_fariseq(
    model,
    sentences,
    mention_trie=mention_trie,
)

# generating from the model
model.sample(
    sentences,
    prefix_allowed_tokens_fn=prefix_allowed_tokens_fn,
)

In [None]:
from genre.utils import get_entity_spans_fairseq

In [None]:
sentences = ["London is the capital of the UK"]

In [None]:
get_entity_spans_fairseq(model, sentences)

In [None]:
import requests

In [None]:
res = requests.post(
    "http://localhost:55555/",
    data="{ \"text\": \"Obama will visit Germany and have a meeting with Merkel tomorrow.\" , \"spans\": []}"
)

In [None]:
res.__dict__