In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from model import EncoderRNN, AttnDecoderRNN
import json
import helpers


encoder_dict = torch.load('./model_v4.pt', map_location=torch.device('cpu'))['encoder_state_dict']
decoder_dict = torch.load('./model_v4.pt', map_location=torch.device('cpu'))['decoder_state_dict']
    
with open('../project_data/project_train_data_instr.json') as json_file:
    train_data = json.load(json_file)

In [2]:
N_EPOCHS = 15
LEARNING_RATE = 0.01
REPORT_EVERY = 1000
HIDDEN_DIM = 256
#BATCH_SIZE = 20
#N_LAYERS = 1
teacher_forcing_ratio = 1
TRAIN_SET_SIZE = 1000
n_words = 43863
MAX_LENGTH = 70

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.set_num_threads(10)

encoder = EncoderRNN(n_words, HIDDEN_DIM).to(device)
decoder = AttnDecoderRNN(HIDDEN_DIM, n_words, max_length=MAX_LENGTH).to(device)

encoder.load_state_dict(encoder_dict)
decoder.load_state_dict(decoder_dict)

<All keys matched successfully>

In [3]:
encoder.eval()
decoder.eval()

recipe_step_pairs, idx2word, word2idx, ml = helpers.get_tensor_data()
n_words = len(word2idx)
print(recipe_step_pairs[0])

Number of short ingredient lists:  130567
Average ingredient list length: 14.175872007959267
No ingredients filtered
Max instruction step length:  70
Number of long instructions:  61032
Average instruction length: 149.95270527301457
Total instruction steps:  489828
Recipes filtered:  61455
Recipes left after filtering:  75241
Recipe step pairs:  223824
<SOS> Preheat oven to 400 degrees F ( 205 degrees C ) . Butter a 9x9x2 inch baking pan . <EOS>
<SOS> Melt 1 tablespoon butter in medium nonstick skillet over medium-low heat . Add onion and saute until tender , about 10 minutes . Cool . <EOS>
(tensor([[43860],
        [   17],
        [   18],
        [   19],
        [   20],
        [   21],
        [   22],
        [   23],
        [   24],
        [   21],
        [   25],
        [   26],
        [   27],
        [   28],
        [   29],
        [   30],
        [   31],
        [   32],
        [   33],
        [   27],
        [43862]]), tensor([[43860],
        [   34],
        

In [19]:
from random import choice
from helpers import idx_to_words
from nltk.tokenize import sent_tokenize, word_tokenize
import re

def evaluate(encoder, decoder, input_tensor, gold_standard):
    with torch.no_grad():
        max_length = MAX_LENGTH
        input_length = input_tensor.size()[0]
        encoder_hidden = encoder.initHidden(device)

        encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)
        loss = 0
        
        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(input_tensor[ei], encoder_hidden)
            encoder_outputs[ei] += encoder_output[0, 0]

        decoder_input = torch.tensor([[word2idx['<SOS>']]], device=device)  # SOS

        decoder_hidden = encoder_hidden

        decoded_words = []
        decoder_attentions = torch.zeros(max_length, max_length)

        for di in range(max_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            decoder_attentions[di] = decoder_attention.data
            topv, topi = decoder_output.data.topk(1)
            if di < len(gold_standard):
                loss += loss_function(decoder_output, gold_standard[di])
            else:
                loss += loss_function(decoder_output, gold_standard[-1])
            if topi.item() == word2idx['<EOS>']:
                if di < len(gold_standard) and gold_standard[di] !=  word2idx['<EOS>']:
                    for dj in range(di, len(gold_standard)):
                        loss += loss_function(decoder_output, gold_standard[dj])
                decoded_words.append('<EOS>')
                break
            else:
                decoded_words.append(idx2word[str(topi.item())])

            decoder_input = topi.squeeze().detach()

        return decoded_words, loss.item()/len(gold_standard), decoder_attentions

    
def random_evaluate(evaluation_data, n=10):
    for i in range(n):
        pair = choice(evaluation_data)
        print('Instruction step', idx_to_words(pair[0], idx2word))
        print('Next step', idx_to_words(pair[1], idx2word))
        output_words, loss, attentions = evaluate(encoder, decoder, pair[0].to(device), pair[1].to(device))
        output_sentence = ' '.join(output_words)
        print('Generated instructions', output_sentence)
        print("Loss: ", loss)
        print('')
        
        
def evaluate_with_given_input(pair):
    #print('Instruction step', idx_to_words(pair[0], idx2word))
    #print('Next step', idx_to_words(pair[1], idx2word))
    output_words, loss, attentions = evaluate(encoder, decoder, pair[0].to(device), pair[1].to(device))
    output_sentence = ' '.join(output_words)
    #print('Generated instructions', output_sentence)
    return output_sentence, loss, attentions

    
def tokenize(instruction_step):
    words_tokenized = word_tokenize(instruction_step)
    return words_tokenized


def add_helper_tokens(step_tokenized):
    new_step = ['<SOS>']
    new_step.extend(step_tokenized)
    new_step.append('<EOS>')
    return new_step

def to_idx_repr(tokenized_instruction):
    idx_list = [word2idx[w] if w in word2idx else word2idx['<LN>'] for w in tokenized_instruction]
    instr_tensors = torch.tensor(idx_list).view(-1, 1)
    return instr_tensors
    

def prepare_input_instruction(text):
    tokenized = tokenize(text)
    tokenized_h = add_helper_tokens(tokenized)
    tensor = to_idx_repr(tokenized_h)
    return tensor


def prepare_input_instruction_eval(text):
    tokenized = tokenize(text)
    tokenized_h = add_helper_tokens(tokenized[3:-3])
    tensor = to_idx_repr(tokenized_h)
    return tensor


def remove_helper_tokens(text):
    helpers_r = r'(<SOS>)|(<EOS>)'
    cleaned_text = re.sub(helpers_r, "", text, count=2)
    return cleaned_text


def preprocess_instruction_data_from_recipes(recipes, limit):
    preprocessed = []
    filtered_out = 0
    for rec in recipes:
        rec_steps = []
        use_rec = True
        for step in rec:
            if len(step) < limit:
                tensor_step = prepare_input_instruction(step)
                rec_steps.append(tensor_step)
            else:
                use_rec = False
                filtered_out = filtered_out + 1
        if use_rec:
            preprocessed.append(rec_steps)
    print(filtered_out, " recipes filtered out")
    return preprocessed



def generate_next_steps(first_step):
    print('Input: ', first_step)
    steps = []
    made_up_instruction = first_step
    i = 1
    while len(steps) < 10 and made_up_instruction != "<SOS> <EOS>":
        tensor = prepare_input_instruction(made_up_instruction)
        made_up_instruction = evaluate_with_given_input(tensor)
        steps.append(made_up_instruction)
        print(i,".", remove_helper_tokens(made_up_instruction))
        i = i + 1
        

def get_instruction_steps(recipes):
    recipe_step_pairs = []
    for recipe in recipes:
        for i, instr_step in enumerate(recipe[:-1]):
            recipe_step_pairs.append((instr_step, recipe[i+1]))
    print("Recipe step pairs: ", len(recipe_step_pairs))
    return recipe_step_pairs




#made_up_instruction = "chicken Italian-seasoned bread crumbs small onion cloves garlic taste oil Mix ground chicken , 1/4 cup bread crumbs , onion , egg , garlic , salt , and black pepper in a bowl . Moisten hands and shape chicken mixture , 2 tablespoons at a time , into flat , oval-shaped patties ."
#generate_next_steps(made_up_instruction)

loss_function = nn.NLLLoss()
random_evaluate(recipe_step_pairs)

Instruction step <SOS> Scoop small balls of dough onto the lined baking sheets using a small cookie scoop . <EOS>
Next step <SOS> Bake in the preheated oven until golden , about 10 minutes . Let cookies sit on the baking sheets for 2 minutes . Transfer to a wire rack to cool completely . <EOS>
Generated instructions <SOS> Bake in preheated oven for 30 minutes , or until the top is golden brown . <EOS>
Loss:  8.10599921731388

Instruction step <SOS> Pour the asparagus mixture into a blender , add the baby spinach and blend to a fine purée . <EOS>
Next step <SOS> Strain the asparagus purée through a fine sieve into a bowl over iced water , to chill and keep the colour . Set aside until needed . <EOS>
Generated instructions <SOS> Add the cream and simmer for 4-5 minutes , or until the cream is thickened . <EOS>
Loss:  9.57408931337554

Instruction step <SOS> In a medium bowl , mix together cereal and butter . Sprinkle evenly on top of hash brown mixture . <EOS>
Next step <SOS> Bake uncove

In [20]:
cookstr = [json.loads(line) for line in open('../../original_data/cookstr-recipes.json', 'r')]

In [21]:
test_recs = [rec['instructions'] for rec in cookstr]
limit = 120
prcessed = preprocess_instruction_data_from_recipes(test_recs, limit)
prcessed = [r for r in prcessed if len(r) > 0]


test_data_steps = get_instruction_steps(prcessed)

26620  recipes filtered out
Recipe step pairs:  469


In [7]:
total_loss = 0
outputs = []

for t in test_data_steps:
    output, loss, attention = evaluate_with_given_input(t)
    total_loss += loss
    outputs.append(output)
    
print("Average loss for test set: ", total_loss/len(test_data_steps))

Average loss for test set:  17.362350101664216


In [8]:
random_evaluate(test_data_steps)

Instruction step <SOS> Toast the waffles to desired crispness . <EOS>
Next step <SOS> Spread the peanut butter on one side of each waffle . <EOS>
Generated instructions <SOS> Place the cooked potatoes on top of the apple mixture and the top of the pastry . <EOS>
Loss:  8.781548720139723

Instruction step <SOS> Add beer ; allow <LN> to continue to ferment for 24 hours more . <EOS>
Next step <SOS> Strain liquid ; discard fruit ; add sugar substitute . <EOS>
Generated instructions <SOS> Stir in the cream cheese , <EOS>
Loss:  6.810345967610677

Instruction step <SOS> Place the mango , juice , and yogurt in a blender , and blend until well combined . <EOS>
Next step <SOS> Pour into individual glasses and serve with a straw . <EOS>
Generated instructions <SOS> Add the cream and mix well . <EOS>
Loss:  9.21328862508138

Instruction step <SOS> Combine the mustard and vinegar in a bowl and stir to mix . Allow the mixture to sit for 15 minutes . <EOS>
Next step <SOS> Place all of the ingredient

In [9]:
evaluate_with_given_input(test_data_steps[205])

('<SOS> Place the bread cubes in a large bowl , and mix in the bread crumbs . <EOS>',
 11.055631510416667,
 tensor([[3.0162e-15, 1.5212e-12, 1.2896e-11,  ..., 6.3481e-11, 8.6996e-11,
          7.9750e-11],
         [1.0734e-16, 1.9691e-10, 1.8610e-12,  ..., 2.5933e-12, 1.3124e-12,
          1.7030e-12],
         [1.0000e+00, 1.3360e-08, 3.7519e-08,  ..., 6.0348e-09, 2.7222e-09,
          5.3871e-09],
         ...,
         [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00, 0.0000e+00,
          0.0000e+00],
         [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00, 0.0000e+00,
          0.0000e+00],
         [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00, 0.0000e+00,
          0.0000e+00]]))

In [18]:
from nltk.translate import bleu_score, meteor_score
from nltk.metrics import scores
from rouge_score import rouge_scorer


avg_prec = 0
avg_recall = 0
avg_fscore = 0
avg_bleu = 0
#avg_rouge = []
avg_meteor = 0
avg_len = 0

N = len(test_data_steps)
results = []
targets = []
#scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=False)


for i, t in enumerate(test_data_steps):
    input_step = [str(s) for s in t[0].flatten().tolist()]
    target = [str(s) for s in t[1].flatten().tolist()]
    targets.append(target)
    result = outputs[i]
    #print(input_step)
    #print(target)
    #print(result)
    result_vec = [str(r) for r in prepare_input_instruction_eval(result).flatten().tolist()]
    results.append(result_vec)
    avg_len += len(result_vec)
    # sanity check
    #prep = idx_to_words(result_vec, idx2word)
    #print(result_vec)
    precision = scores.precision(set(result_vec), set(target))
    avg_prec += precision
    recall = scores.recall(set(result_vec), set(target))
    avg_recall += recall
    f_score = scores.f_measure(set(result_vec), set(target))
    avg_fscore += f_score
    bleu = bleu_score.sentence_bleu([target], result)
    avg_bleu += bleu
    rouge = scorer.score(" ".join(target), " ".join(result))
    #print(rouge)
    #avg_rouge.append(rouge['rougeL']['precision']
    meteor = meteor_score.single_meteor_score(" ".join(target), " ".join(result))
    avg_meteor += meteor

print("Average precision: ", avg_prec/N)
print("Average recall: ", avg_recall/N)
#print("F1-measure: ", avg_fscore/N)

print("Average BLEU: ", avg_bleu/N)
print("Average METEOR: ", avg_meteor/N)
#print("Average ROUGE-L: ", avg_bleu/N)
print("Average step length: ", avg_len/N)

Average precision:  0.38914280386514505
Average recall:  0.3112145435106619
Average BLEU:  8.878445206863794e-234
Average METEOR:  0.0017409686016970072
Average step length:  23.215351812366738


145.95962090650045