In [18]:
import argparse
import pickle

import numpy as np
from spacy.tokenizer import Tokenizer
from spacy.lang.en import English
from spacy import displacy
import tensorflow as tf
from tensorflow.keras import backend as K
from food_tools.training.dataset_utils import tokens_to_indices


ImportError: cannot import name 'correct_BIO_encodings'

In [3]:
def output_to_displacy(tokens, labels):
    text = ""
    start = 0
    ents = []
    curr_label = ""
    new_ent = {}
    for token, label in zip(tokens, labels):
        text += token + " "
        end = start + len(token)
        if label.startswith("B-"):
            if new_ent:
                ents.append(new_ent)
            curr_label = label[2:]
            new_ent = {"start": start, "end": end,
                       "label": curr_label}
        elif label.startswith("I-"):
            assert label[2:] == curr_label
            new_ent['end'] = end
        elif label == "O":
            if new_ent:
                ents.append(new_ent)
                new_ent = {}
        else:
            raise Exception("Found non-BIO label {}!".format(label))
        start += len(token) + 1
    if new_ent:
        ents.append(new_ent)
    doc = {"text": text,
           "ents": ents,
           "title": None}
    return doc


def load_model(model_path):
    """
    https://github.com/tensorflow/tensorflow/issues/14356
    https://github.com/tensorflow/tensorflow/issues/28287
    """
    session = tf.Session(graph=tf.Graph())
    with session.graph.as_default():
        K.set_session(session)
        loaded_model = tf.keras.models.load_model(model_path)
        loaded_model.summary()
    return loaded_model, session


def load_mappings(filepath):
    return pickle.load(open(filepath, "rb"))


def load_sentencizer_and_tokenizer():
    nlp = English()
    sentencizer = nlp.create_pipe("sentencizer")
    nlp.add_pipe(sentencizer)
    tokenizer = nlp.Defaults.create_tokenizer(nlp)
    return nlp, tokenizer


def form_matrix(tokens):
    tokens = np.expand_dims(tokens, axis=0)
    return np.array(tokens)




In [4]:
saved_model = "/Users/Carol/Google Drive/nlp_data/output/20200503_16_50_50/20200503_16_50_50_food_ner_epoch_3_dev_f1_0.9867637173043644.h5"

saved_mappings = "/Users/Carol/Google Drive/nlp_data/output/20200503_16_50_50/20200503_16_50_50_food_ner_mappings.pkl"

model, session = load_model(saved_model)
mappings = load_mappings(saved_mappings)
index_to_label = {v: k for k, v in mappings['label_to_index'].items()}
token_to_index = mappings['token_to_index']
sentencizer, tokenizer  = load_sentencizer_and_tokenizer()

W0503 12:20:41.921288 140736447558592 deprecation.py:506] From /Users/Carol/anaconda/envs/nlp/lib/python3.6/site-packages/tensorflow_core/python/keras/initializers.py:119: calling RandomUniform.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
W0503 12:20:41.933290 140736447558592 deprecation.py:506] From /Users/Carol/anaconda/envs/nlp/lib/python3.6/site-packages/tensorflow_core/python/ops/init_ops.py:97: calling GlorotUniform.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
W0503 12:20:41.934798 140736447558592 deprecation.py:506] From /Users/Carol/anaconda/envs/nlp/lib/python3.6/site-packages/tensorflow_core/python/ops/init_ops

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
token_input (InputLayer)     [(None, None)]            0         
_________________________________________________________________
word_embeddings (Embedding)  (None, None, 100)         40000500  
_________________________________________________________________
BiLSTM (Bidirectional)       (None, None, 200)         160800    
_________________________________________________________________
output_softmax (TimeDistribu (None, None, 3)           603       
Total params: 40,161,903
Trainable params: 40,161,903
Non-trainable params: 0
_________________________________________________________________


In [8]:
text =  "Heat garlic and rosemary with oil. Drizzle oil over dip and serve with " \
                    "vegetables."
text = "Combine pineapple, banana, cream of coconut, rolled " \
                "oats, quick-cooking oats, baking powder, mint, " \
                "chia seeds, and poppy seeds in a blender; blend until " \
                "smooth. Pour into 2 mugs."



sents = sentencizer(text)
all_tokens = []
for sent in sents.sents:
    tokens = tokenizer(sent.text)
    all_tokens.append([t.text for t in tokens])


In [19]:
final_doc = None   # collects results from all sentences
with session.graph.as_default():
    K.set_session(session)
    for tokens in all_tokens:
        token_indices = tokens_to_indices(tokens, token_to_index)
        preds = model.predict([tokens_to_indices(tokens, token_to_index)])
        preds = np.argmax(preds, axis=-1)
        labels = [index_to_label[ind[0]] for ind in preds]
        labels = correct_BIO_encodings(labels)
        doc = output_to_displacy(tokens, labels)
        if not final_doc:  # first sentence
            final_doc = doc
            continue
        shift = len(final_doc['text'])
        for ent in doc['ents']:
            ent['start'] += shift
            ent['end'] += shift
            final_doc['ents'].append(ent)
        final_doc['text'] += doc['text']





In [20]:
colors = {"FOOD": "#87CEEB"}
options = {"ents": ["FOOD"], "colors": colors}
displacy.render(final_doc, style="ent", options={"colors":colors},
                       manual=True)


In [28]:
# get examples from dev set
import os
import random
import sys
sys.path.append("../../..")
from food_tools.training.dataset_utils import read_conll_file, compile_vocabulary, make_label_map, get_token_embeddings, examples_to_indices, tokens_to_indices
from food_tools.training.train_utils import get_current_time, form_ner_train_matrices


In [30]:
base_path = "/Users/Carol/Google Drive/"
dev_file = os.path.join(base_path, "nlp_data/recipe_data/food_gold_dev.conll")
dev_dataset = read_conll_file(dev_file)

In [39]:
examples = random.sample(dev_dataset, 10)

In [40]:
examples

[[['In', 'O'],
  ['a', 'O'],
  ['cocktail', 'O'],
  ['shaker', 'O'],
  [',', 'O'],
  ['combine', 'O'],
  ['gin', 'B-FOOD'],
  [',', 'O'],
  ['Chambord', 'B-FOOD'],
  [',', 'O'],
  ['cranberry', 'B-FOOD'],
  ['juice', 'I-FOOD'],
  ['and', 'O'],
  ['egg', 'B-FOOD'],
  ['white', 'I-FOOD'],
  [',', 'O'],
  ['shake', 'O'],
  ['the', 'O'],
  ['drink', 'O'],
  ['vigorously', 'O'],
  [',', 'O'],
  ['and', 'O'],
  ['strain', 'O'],
  ['it', 'O'],
  ['into', 'O'],
  ['a', 'O'],
  ['chilled', 'O'],
  ['cocktail', 'O'],
  ['glass', 'O'],
  ['.', 'O']],
 [['2', 'O'],
  ['.', 'O'],
  ['Toss', 'O'],
  ['the', 'O'],
  ['asparagus', 'B-FOOD'],
  ['with', 'O'],
  ['1', 'O'],
  ['tablespoon', 'O'],
  ['of', 'O'],
  ['the', 'O'],
  ['olive', 'B-FOOD'],
  ['oil', 'I-FOOD'],
  [',', 'O'],
  ['1', 'O'],
  ['teaspoon', 'O'],
  ['of', 'O'],
  ['the', 'O'],
  ['salt', 'B-FOOD'],
  [',', 'O'],
  ['and', 'O'],
  ['1/4', 'O'],
  ['teaspoon', 'O'],
  ['of', 'O'],
  ['the', 'O'],
  ['pepper', 'B-FOOD'],
  ['in', 'O']

In [41]:
for example in examples:
    text = ""
    for token in example:

        text += (token[0] + " ")
        
    print(text)
    print("========")

In a cocktail shaker , combine gin , Chambord , cranberry juice and egg white , shake the drink vigorously , and strain it into a chilled cocktail glass . 
2 . Toss the asparagus with 1 tablespoon of the olive oil , 1 teaspoon of the salt , and 1/4 teaspoon of the pepper in a large mixing bowl . Stir in the garlic . Spread out the asparagus on a baking sheet . 
Heat 1 1/2 teaspoons oil in a 12-inch nonstick skillet over moderately high heat until hot but not smoking . Add half of garlic and 1/4 teaspoon red pepper flakes and sauté , stirring , until garlic is golden , about 1 minute . Add half of Broccolini and 2 tablespoons stock and cook , stirring , 2 minutes . Transfer to a serving dish , then repeat with remaining oil , garlic , red pepper flakes , Broccolini , and stock . Season with salt . 
Mash papaya flesh with a fork until smooth or pulse in a food processor until coarsely puréed . Add 5 tablespoons sugar and 2 tablespoons lime juice and stir or pulse to combine . Add more su