In [1]:
import os, re, sys, pickle
import numpy as np, pandas as pd
import matplotlib.pyplot as plt

# tf and keras
import keras, tensorflow as tf
from tensorflow.keras import layers
from keras.preprocessing.sequence import pad_sequences

# spacy
import spacy
import en_core_web_sm

Matplotlib created a temporary config/cache directory at /var/folders/sj/dsbk_7_d7y7d5211gj7flsvh0000gn/T/matplotlib-eabtffwl because the default path (/Users/jaysonp/.matplotlib) is not a writable directory; it is highly recommended to set the MPLCONFIGDIR environment variable to a writable directory, in particular to speed up the import of Matplotlib and to better support multiprocessing.


### Initialize necessary variables

In [2]:
MAX_LEN = 300

### Test input

In [3]:
test_recipe = {"recipe_2": '''preheat oven to 350 degrees f (175 degrees c).**blend butter or margarine, sugar, egg, 
banana, dissolved coffee, and vanilla in food processor for 2 minutes. add flour, salt, baking powder, and soda, 
and blend just until flour disappears. add chocolate chips and mix in with wooden spoon. spoon mixture into 15 to 18 
paper-lined muffin cups.**bake for 25 minutes.  cool on wire racks.**''',
              "recipe_1": '''preheat oven to 350 degrees f (175 degrees c). lightly grease and flour one 9x13 inch cake pan.**
# beat vegetable oil and eggs until foamy. add the sugar, flour, ground cinnamon, baking, soda, salt, 
# and vanilla and mix well. stir in the diced apples. pour batter into prepared pan.**bake at 350 degrees f 
# (175 degrees c) for 30 to 40 minutes. cool cake in pan for 10 minutes. cake needs no frosting.**'''}

### Load model and vocab

In [4]:
# load model

model_path = "../models/fs_lstm_v1.0.h5"
model = keras.models.load_model(model_path)
vocab = pickle.load(open("../models/vocab_v1.0.pkl", "rb"))
# tokens = pickle.load(open("../models/tokens_v1.0.pkl", "rb"))

### Prediction helper functions

In [5]:
# process sequence from test input

def process_sequences(tokens, max_len, vocab={"<UNK>": 1, "<PAD>": 0}):
    X = [[vocab.get(w.text, vocab["<UNK>"]) for w in s] for s in tokens]
    return pad_sequences(maxlen=max_len, sequences=X, padding="post", value=vocab["<PAD>"])

def predict(model, test, test_tokens):
    ingredients = None
    try:
        y_pred = model.predict(test, verbose=1)[0]
        pred = y_pred > 0.05
        ing = [t.text for t, p in zip(test_tokens[0], pred) if p]
        ing = [i for i in ing if i != "\n"]
        ingredients = list(set(ing))
    except Exception as ex:
        print("Problem in predicting input : {}".format(ex))
        
    return ingredients

def format_output_pred(predictions, test_recipe):
    formatted_pred = {}
    for pred in predictions:
        p = ""
        starts = [m.start() for m in re.finditer(pred, test_recipe)]
        if len(starts) == 0:
            starts = [m.start() for m in re.finditer(pred[:-1], test_recipe)]
            if len(starts) == 0: 
                continue
            else:
                p = pred[:-1]
        else:
            p = pred
            
        ends = [i+len(p) for i in starts]
        idxs = [(s,e) for s,e in zip(starts, ends)]
        formatted_pred[p] = idxs    
        
    return formatted_pred

### Predict test input

In [6]:
nlp = en_core_web_sm.load(disable=['parser', 'tagger', 'ner'])

formatted_output = {}

for k in test_recipe.keys():
    test_tokens = [nlp(test_recipe[k].strip())]
    X_test = process_sequences(test_tokens, max_len=MAX_LEN, vocab=vocab)
    pred_ings = predict(model, X_test, test_tokens)
    formatted_output.update({k:format_output_pred(pred_ings, test_recipe[k])})
    
formatted_output 



{'recipe_2': {'vanilla': [(118, 125)],
  'coffee': [(106, 112)],
  'baking': [(176, 182)],
  'margarine': [(64, 73)],
  'sugar': [(75, 80)],
  'butter': [(54, 60)],
  'egg': [(82, 85)],
  'flour': [(163, 168), (223, 228)],
  'salt': [(170, 174)],
  'powder': [(183, 189)],
  'banana': [(88, 94)],
  'soda': [(195, 199)],
  'chocolate': [(245, 254)]},
 'recipe_1': {'vanilla': [(207, 214)],
  'ground': [(163, 169)],
  'baking': [(180, 186)],
  'vegetable': [(105, 114)],
  'cinnamon': [(170, 178)],
  'sugar': [(149, 154)],
  'soda': [(188, 192)],
  'flour': [(66, 71), (156, 161)],
  'salt': [(194, 198)],
  'apples': [(247, 253)],
  'cake': [(86, 90), (355, 359), (383, 387)],
  'oil': [(115, 118)],
  'eggs': [(123, 127)]}}