In [43]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from model import EncoderRNN, AttnDecoderRNN
import json
import preprocess_for_s2s


encoder_dict = torch.load('./model.pt', map_location=torch.device('cpu'))['encoder_state_dict']
decoder_dict = torch.load('./model.pt', map_location=torch.device('cpu'))['decoder_state_dict']
    
with open('../project_data/project_train_data_instr.json') as json_file:
    train_data = json.load(json_file)

In [44]:
N_EPOCHS = 15
LEARNING_RATE = 0.01
REPORT_EVERY = 1000
HIDDEN_DIM = 256
#BATCH_SIZE = 20
#N_LAYERS = 1
teacher_forcing_ratio = 1
TRAIN_SET_SIZE = 1000
n_words = len(word2idx)
MAX_LENGTH = 493

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.set_num_threads(10)

encoder = EncoderRNN(n_words, HIDDEN_DIM).to(device)
decoder = AttnDecoderRNN(HIDDEN_DIM, n_words, max_length=MAX_LENGTH).to(device)

encoder.load_state_dict(encoder_dict)
decoder.load_state_dict(decoder_dict)

<All keys matched successfully>

In [45]:
encoder.eval()
decoder.eval()

recipe_step_pairs, idx2word, word2idx, ml = preprocess_for_s2s.get_tensor_data()
n_words = len(word2idx)
print(recipe_step_pairs[0])

[43860, 0, 1, 43861, 2, 43861, 3, 43861, 4, 43861, 5, 43861, 6, 43861, 7, 43861, 8, 9, 10, 43861, 11, 12, 13, 14, 15, 43861, 16, 43861, 43862]
Max instruction step length:  70
<SOS> Combine 1 cup flour , sugar , salt , and yeast . Mix well . Heat water and vegetable oil until warm , and add to yeast mixture along with the egg . Blend with an electric mixer at low speed until moistened . Beat for 2 additional minutes . Stir in 1 3/4 cup flour while beating , until dough pulls away from side of bowl . <EOS>
Number of short ingredient lists:  108627
Average ingredient list length: 22.355650494528003
Number of long instructions:  61032
Average instruction length: 149.95270527301457
Training set total size:  489828
223824
(tensor([[43860],
        [   17],
        [   18],
        [   19],
        [   20],
        [   21],
        [   22],
        [   23],
        [   24],
        [   21],
        [   25],
        [   26],
        [   27],
        [   28],
        [   29],
        [   30],


In [154]:
from random import choice
from preprocess_for_s2s import idx_to_words
from nltk.tokenize import sent_tokenize, word_tokenize
import re

def evaluate(encoder, decoder, input_tensor):
    with torch.no_grad():
        max_length = MAX_LENGTH
        input_length = input_tensor.size()[0]
        encoder_hidden = encoder.initHidden(device)

        encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(input_tensor[ei], encoder_hidden)
            encoder_outputs[ei] += encoder_output[0, 0]

        decoder_input = torch.tensor([[word2idx['<SOS>']]], device=device)  # SOS

        decoder_hidden = encoder_hidden

        decoded_words = []
        decoder_attentions = torch.zeros(max_length, max_length)

        for di in range(max_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            decoder_attentions[di] = decoder_attention.data
            topv, topi = decoder_output.data.topk(1)
            if topi.item() == word2idx['<EOS>']:
                decoded_words.append('<EOS>')
                break
            else:
                decoded_words.append(idx2word[str(topi.item())])

            decoder_input = topi.squeeze().detach()

        return decoded_words

    
def random_evaluate(evaluation_data, n=10):
    for i in range(n):
        pair = choice(evaluation_data)
        print('Instruction step', idx_to_words(pair[0], idx2word))
        print('Next step', idx_to_words(pair[1], idx2word))
        output_words = evaluate(encoder, decoder, pair[0].to(device))
        output_sentence = ' '.join(output_words)
        print('Generated instructions', output_sentence)
        print('')
        
        
def evaluate_with_given_input(instruction):
    output_words = evaluate(encoder, decoder, instruction.to(device))
    output_sentence = ' '.join(output_words)
    return output_sentence

    
def tokenize(instruction_step):
    words_tokenized = word_tokenize(instruction_step)
    return words_tokenized


def add_helper_tokens(step_tokenized):
    new_step = ['<SOS>']
    new_step.extend(step_tokenized)
    new_step.append('<EOS>')
    return new_step

def to_idx_repr(tokenized_instruction):
    idx_list = [word2idx[w] if w in word2idx else word2idx['<LN>'] for w in tokenized_instruction]
    instr_tensors = torch.tensor(idx_list).view(-1, 1)
    return instr_tensors
    

def prepare_input_instruction(text):
    tokenized = tokenize(text)
    tokenized_h = add_helper_tokens(tokenized)
    tensor = to_idx_repr(tokenized_h)
    return tensor


def remove_helper_tokens(text):
    helpers_r = r'(<SOS>)|(<EOS>)'
    cleaned_text = re.sub(helpers_r, "", text, count=2)
    return cleaned_text


def preprocess_instruction_data_from_recipes(recipes, limit):
    preprocessed = []
    filtered_out = 0
    for rec in recipes:
        rec_steps = []
        for step in rec:
            if len(step) < limit:
                use_rec = True
            else:
                filtered_out = filtered_out + 1
                use_rec = False
            if use_rec:
                tensor_step = prepare_input_instruction(step)
                rec_steps.append(tensor_step)
        preprocessed.append(rec_steps)
    print(filtered_out, " recipes filtered out")
    return preprocessed
            


def generate_next_steps(first_step):
    print('Input: ', first_step)
    steps = []
    made_up_instruction = first_step
    i = 1
    while len(steps) < 10 and made_up_instruction != "<SOS> <EOS>":
        tensor = prepare_input_instruction(made_up_instruction)
        made_up_instruction = evaluate_with_given_input(tensor)
        steps.append(made_up_instruction)
        print(i,".", remove_helper_tokens(made_up_instruction))
        i = i + 1



made_up_instruction = "Take some rum and sugar. Combine in a glass."
generate_next_steps(made_up_instruction)



Input:  Take some rum and sugar. Combine in a glass.
1 .  Pour the syrup and lime juice and and lime juice . 
2 .  Place the fish and fish in the prepared baking pan and 
3 .  Bake in the preheated oven until fish is easily flaked with about 1 hour , 
4 .  Meanwhile , cook onion , onion , garlic , and salt in the same pan until fragrant , about 1 minute . 
5 .  Stir in the rice and and cook until the onion is translucent , 
6 .  Pour the liquid over the rice . stir the Pour the liquid over the rice . 
7 .  Cover the and refrigerate until chilled , about 1 hour . 
8 .  Serve the 
9 .  Garnish with 
10 .  Place 2 tablespoons of the butter over the top of the bread and 


In [155]:
cookstr = [json.loads(line) for line in open('../../original_data/cookstr-recipes.json', 'r')]


In [156]:
test_recs = [rec['instructions'] for rec in cookstr]
limit = 70
prcessed = preprocess_instruction_data_from_recipes(test_recs, limit)


32299  recipes filtered out


In [157]:
len(prcessed)

7918