In [None]:
import argparse
import pickle

import numpy as np
from spacy.tokenizer import Tokenizer
from spacy.lang.en import English
from spacy import displacy
import tensorflow as tf
from tensorflow.keras import backend as K
from food_tools.training.dataset_utils import tokens_to_indices


In [None]:
def output_to_displacy(tokens, labels):
    text = ""
    start = 0
    ents = []
    curr_label = ""
    new_ent = {}
    for token, label in zip(tokens, labels):
        text += token + " "
        end = start + len(token)
        if label.startswith("B-"):
            if new_ent:
                ents.append(new_ent)
            curr_label = label[2:]
            new_ent = {"start": start, "end": end,
                       "label": curr_label}
        elif label.startswith("I-"):
            assert label[2:] == curr_label
            new_ent['end'] = end
        elif label == "O":
            if new_ent:
                ents.append(new_ent)
                new_ent = {}
        else:
            raise Exception("Found non-BIO label {}!".format(label))
        start += len(token) + 1
    if new_ent:
        ents.append(new_ent)
    doc = {"text": text,
           "ents": ents,
           "title": None}
    return doc


def load_model(model_path):
    """
    https://github.com/tensorflow/tensorflow/issues/14356
    https://github.com/tensorflow/tensorflow/issues/28287
    """
    session = tf.Session(graph=tf.Graph())
    with session.graph.as_default():
        K.set_session(session)
        loaded_model = tf.keras.models.load_model(model_path)
        loaded_model.summary()
    return loaded_model, session


def load_mappings(filepath):
    return pickle.load(open(filepath, "rb"))


def load_sentencizer_and_tokenizer():
    nlp = English()
    sentencizer = nlp.create_pipe("sentencizer")
    nlp.add_pipe(sentencizer)
    tokenizer = nlp.Defaults.create_tokenizer(nlp)
    return nlp, tokenizer


def form_matrix(tokens):
    tokens = np.expand_dims(tokens, axis=0)
    return np.array(tokens)




In [None]:
saved_model = "/Users/Carol/Google Drive/nlp_data/output/20200503_16_50_50/20200503_16_50_50_food_ner_epoch_3_dev_f1_0.9867637173043644.h5"

saved_mappings = "/Users/Carol/Google Drive/nlp_data/output/20200503_16_50_50/20200503_16_50_50_food_ner_mappings.pkl"

model, session = load_model(saved_model)
mappings = load_mappings(saved_mappings)
index_to_label = {v: k for k, v in mappings['label_to_index'].items()}
token_to_index = mappings['token_to_index']
sentencizer, tokenizer  = load_sentencizer_and_tokenizer()

In [None]:
text =  "Heat garlic and rosemary with oil. Drizzle oil over dip and serve with " \
                    "vegetables."
text = "Combine pineapple, banana, cream of coconut, rolled " \
                "oats, quick-cooking oats, baking powder, mint, " \
                "chia seeds, and poppy seeds in a blender; blend until " \
                "smooth. Pour into 2 mugs."



sents = sentencizer(text)
all_tokens = []
for sent in sents.sents:
    tokens = tokenizer(sent.text)
    all_tokens.append([t.text for t in tokens])


In [None]:
final_doc = None   # collects results from all sentences
with session.graph.as_default():
    K.set_session(session)
    for tokens in all_tokens:
        token_indices = tokens_to_indices(tokens, token_to_index)
        preds = model.predict([tokens_to_indices(tokens, token_to_index)])
        preds = np.argmax(preds, axis=-1)
        labels = [index_to_label[ind[0]] for ind in preds]
        labels = correct_BIO_encodings(labels)
        doc = output_to_displacy(tokens, labels)
        if not final_doc:  # first sentence
            final_doc = doc
            continue
        shift = len(final_doc['text'])
        for ent in doc['ents']:
            ent['start'] += shift
            ent['end'] += shift
            final_doc['ents'].append(ent)
        final_doc['text'] += doc['text']





In [None]:
colors = {"FOOD": "#87CEEB"}
options = {"ents": ["FOOD"], "colors": colors}
displacy.render(final_doc, style="ent", options={"colors":colors},
                       manual=True)


In [None]:
# get examples from dev set
import os
import random
import sys
sys.path.append("../../..")
from food_tools.training.dataset_utils import read_conll_file, compile_vocabulary, make_label_map, get_token_embeddings, examples_to_indices, tokens_to_indices
from food_tools.training.train_utils import get_current_time, form_ner_train_matrices


In [None]:
base_path = "/Users/Carol/Google Drive/"
dev_file = os.path.join(base_path, "nlp_data/recipe_data/20200523_food_gold_test.conll")
dev_dataset = read_conll_file(dev_file)

In [None]:
examples = random.sample(dev_dataset, 10)

In [None]:
examples

In [None]:
for example in examples:
    text = ""
    for token in example:

        text += (token[0] + " ")
        
    print(text)
    print("========")

# Update 2023, make a couple of examples for new gradio app

In [None]:
from pathlib import Path

from spacy import displacy

In [None]:
colors = {"FOOD": "#87CEEB"}
options = {"ents": ["FOOD"], "colors": colors}

doc = {"text": "Salt the water and butter the bread",
       "ents": [{"start" : 9, "end": 14, "label" : "FOOD"},
               {"start" : 30, "end": 35, "label" : "FOOD"}],
       "title": None}
svg = displacy.render(doc, style="ent", options={"colors":colors},
                       manual=True)

# output_path = Path("/Users/carolanderson/Dropbox/repos/food_gradio_app/app_images/salt_butter_new.svg")
# output_path.open("w", encoding="utf-8").write(svg)

In [None]:
doc = {"text": "Salt the water and butter the bread",
       "ents": [{"start" : 0, "end": 4, "label" : "FOOD"},
               {"start" : 9, "end": 14, "label" : "FOOD"},
                {"start": 19, "end": 25, "label": "FOOD"},
               {"start" : 30, "end": 35, "label" : "FOOD"}],
       "title": None}
displacy.render(doc, style="ent", options={"colors":colors},
                       manual=True)