In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# Built in modules
import json
import os
import pickle
import string
# Python modules
import numpy as np
# Machine Learning Frameworks
from tensorflow.keras.preprocessing.sequence import pad_sequences
import tensorflow as tf
import nltk
from nltk.tokenize import word_tokenize
nltk.download("punkt")

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [3]:
# Change directory to location of the model
os.chdir('/content/drive/MyDrive/Recipe-NER')
# Load the trained model
model = tf.keras.models.load_model('ner_model_20e.h5')

It is crucial to use the same dictionaries and lists used in training the NER model, since it will provide the mapping to the different classes.

In [4]:
with open('word2idx.json', 'r') as fcc_file:
    word2idx = json.load(fcc_file)
with open('tag2idx.json', 'r') as fcc_file:
    tag2idx = json.load(fcc_file)
with open("tags", "rb") as fp:   
    tags = pickle.load(fp)

In [5]:
def preprocess(instruction):
  #  Main highlight of function "preprocessed"
  #  Summary:
  #  This function returns a preprocessed lowercased string that has all of the punctuations, removed except for "-" 
  #  output of the function:
  #  -String
  punctuations_to_be_removed = string.punctuation.replace('-', '')
  return instruction.translate(str.maketrans('', '', punctuations_to_be_removed)).lower()


Text used for this inference is from https://www.tasteofhome.com/recipes/bacon-cheeseburger-pasta/

In [6]:
text = """Cook pasta according to package directions. Meanwhile, in a large skillet, cook beef over medium heat until no longer pink; drain and set aside.
    In the same skillet, cook bacon until crisp; remove with a slotted spoon to paper towels to drain. Discard drippings. Drain pasta; add to the skillet. Stir in the soup, water, beef and bacon; heat through.
    Remove from the heat and sprinkle with cheese. Cover and let stand for 2-3 minutes or until the cheese is melted. Serve with barbecue sauce and mustard if desired. """
test = word_tokenize(preprocess(text))
print(test)

['cook', 'pasta', 'according', 'to', 'package', 'directions', 'meanwhile', 'in', 'a', 'large', 'skillet', 'cook', 'beef', 'over', 'medium', 'heat', 'until', 'no', 'longer', 'pink', 'drain', 'and', 'set', 'aside', 'in', 'the', 'same', 'skillet', 'cook', 'bacon', 'until', 'crisp', 'remove', 'with', 'a', 'slotted', 'spoon', 'to', 'paper', 'towels', 'to', 'drain', 'discard', 'drippings', 'drain', 'pasta', 'add', 'to', 'the', 'skillet', 'stir', 'in', 'the', 'soup', 'water', 'beef', 'and', 'bacon', 'heat', 'through', 'remove', 'from', 'the', 'heat', 'and', 'sprinkle', 'with', 'cheese', 'cover', 'and', 'let', 'stand', 'for', '2-3', 'minutes', 'or', 'until', 'the', 'cheese', 'is', 'melted', 'serve', 'with', 'barbecue', 'sauce', 'and', 'mustard', 'if', 'desired']


In [7]:
# max_len should have the same value as the one used in training
max_len = 900
# Use pad_sequence preprocessing to convert values to the same ones used in training the model
preprocessed_text = pad_sequences(sequences=[[word2idx.get(w, 0) for w in test]], 
                            padding="post", value=0, maxlen=max_len)

In [8]:
# p stands for the probabilities predicted by the model on the input text
p = model.predict(np.array([preprocessed_text[0]]))
# Returns an array of the classes with the max probabilities of the prediction
p = np.argmax(p, axis=-1)
print("{:15}||{}".format("Word", "Prediction"))
print(30 * "=")
for w, pred in zip(test, p[0]):
  # Print the words in a specific format
  print("{:15}: {:5}".format(w, tags[pred]))

Word           ||Prediction
cook           : O    
pasta          : B-IN 
according      : O    
to             : O    
package        : O    
directions     : O    
meanwhile      : O    
in             : O    
a              : O    
large          : O    
skillet        : B-CO 
cook           : O    
beef           : B-IN 
over           : O    
medium         : O    
heat           : O    
until          : O    
no             : O    
longer         : O    
pink           : O    
drain          : O    
and            : O    
set            : O    
aside          : O    
in             : O    
the            : O    
same           : O    
skillet        : B-CO 
cook           : O    
bacon          : B-IN 
until          : O    
crisp          : O    
remove         : O    
with           : O    
a              : O    
slotted        : B-CO 
spoon          : I-CO 
to             : O    
paper          : B-ME 
towels         : I-CO 
to             : O    
drain          : O    
discar