In [2]:
%pip install numpy pandas scikit-learn tensorflow

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.1.2 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Embedding
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pickle

In [4]:
df = pd.read_csv("Cleaned_Indian_Food_Dataset.csv")

In [5]:
# Preprocessing function (optional but recommended)
def preprocess_text(text):
    text = str(text).lower().strip()       # Ensure string, lowercase, and strip
    text = ' '.join(text.split())          # Remove extra whitespace
    return text

In [6]:
# Apply preprocessing
input_texts = [preprocess_text(text) for text in df["Cleaned-Ingredients"]]
target_texts = ["<start> " + preprocess_text(text) + " <end>" for text in df["TranslatedInstructions"]]

In [7]:
encoder_tokenizer = Tokenizer()
encoder_tokenizer.fit_on_texts(input_texts)
encoder_sequences = encoder_tokenizer.texts_to_sequences(input_texts)
encoder_input_data = pad_sequences(encoder_sequences, padding='post')

In [8]:
decoder_tokenizer = Tokenizer(filters='')
decoder_tokenizer.fit_on_texts(target_texts)
reverse_decoder_word_index = {index: word for word, index in decoder_tokenizer.word_index.items()}
decoder_sequences = decoder_tokenizer.texts_to_sequences(target_texts)
decoder_input_data = pad_sequences([seq[:-1] for seq in decoder_sequences], padding='post')
decoder_target_data = pad_sequences([seq[1:] for seq in decoder_sequences], padding='post')

In [9]:
import pickle

# Load encoder tokenizer
with open('encoder_tokenizer.pkl', 'rb') as f:
    encoder_tokenizer = pickle.load(f)

# Load decoder tokenizer
with open('decoder_tokenizer.pkl', 'rb') as f:
    decoder_tokenizer = pickle.load(f)

In [10]:
encoder_vocab_size = len(encoder_tokenizer.word_index) + 1
decoder_vocab_size = len(decoder_tokenizer.word_index) + 1
embedding_dim = 128
latent_dim = 256

In [11]:
encoder_inputs = Input(shape=(None,))
enc_emb = Embedding(encoder_vocab_size, embedding_dim)(encoder_inputs)
encoder_lstm = LSTM(latent_dim, return_state=True)
_, state_h, state_c = encoder_lstm(enc_emb)
encoder_states = [state_h, state_c]

In [12]:
decoder_inputs = Input(shape=(None,))
dec_emb_layer = Embedding(decoder_vocab_size, embedding_dim)
dec_emb = dec_emb_layer(decoder_inputs)
decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(dec_emb, initial_state=encoder_states)
decoder_dense = Dense(decoder_vocab_size, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

In [13]:
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()

In [14]:
# from keras.callbacks import ModelCheckpoint

# checkpoint = ModelCheckpoint(
#     filepath='/kaggle/working/model_check.weights.h5',  # or .h5 or .weights.h5
#     save_weights_only=True,
#     save_best_only=False,
#     save_freq='epoch',
#     verbose=1
# )

In [15]:
# # Remove all extra dimensions
# decoder_target_data = np.squeeze(decoder_target_data)

# # Add just one last dimension
# decoder_target_data = np.expand_dims(decoder_target_data, -1)

# # Confirm final shape
# print(decoder_target_data.shape)  

In [16]:
# from keras.callbacks import EarlyStopping

# early_stopping = EarlyStopping(
#     monitor='val_loss',
#     patience=3,  # stops if val_loss doesn't improve for 3 consecutive epochs
#     restore_best_weights=True
# )

# model.fit(
#     [encoder_input_data, decoder_input_data],
#     decoder_target_data,
#     batch_size=16,
#     epochs=50,
#     validation_split=0.2,
#     callbacks=[early_stopping]
# )

In [17]:
from tensorflow.keras.models import load_model
# model.save('/kaggle/working/seq2seq_model.h5')
model= load_model("seq2seq_model.h5")



In [18]:
# import re

# def clean_output(text):
#     # Remove non-alphabetic characters except spaces
#     text = re.sub(r'[^a-zA-Z\s]', '', text)
#     # Remove extra spaces
#     text = re.sub(r'\s+', ' ', text)
#     return text.strip()

# def decode_sequence_beam_search(input_seq, beam_width=5, max_decoder_seq_length=2500):
#     states_value = encoder_model.predict(input_seq)

#     start_token_index = decoder_tokenizer.word_index.get('start')
#     end_token_index = decoder_tokenizer.word_index.get('end')
    
#     sequences = [([start_token_index], 0.0, states_value)]

#     for _ in range(max_decoder_seq_length):
#         all_candidates = []
#         for seq, score, state in sequences:
#             if seq[-1] == end_token_index:
#                 all_candidates.append((seq, score, state))
#                 continue
            
#             target_seq = np.array([[seq[-1]]])
#             output_tokens, h, c = decoder_model.predict([target_seq] + state)

#             top_k = np.argsort(output_tokens[0, -1, :])[-beam_width:]

#             for word_id in top_k:
#                 word_prob = output_tokens[0, -1, word_id]
#                 candidate_seq = seq + [word_id]
#                 candidate_score = score - np.log(word_prob + 1e-9)

#                 # Penalize repetition of last token
#                 if len(candidate_seq) > 2 and candidate_seq[-1] == candidate_seq[-2]:
#                     candidate_score += 1.0  # Increase penalty as needed

#                 # Penalize n-gram repetition
#                 if has_repeat_ngram(candidate_seq, n=3, window_size=12):
#                     candidate_score += 2.0  # Increase penalty as needed

#                 all_candidates.append((candidate_seq, candidate_score, [h, c]))
        
#         ordered = sorted(all_candidates, key=lambda tup: tup[1])
#         sequences = ordered[:beam_width]

#         # Early stopping if all sequences ended
#         if all(seq[-1] == end_token_index for seq, _, _ in sequences):
#             break

#     best_seq = sequences[0][0]

#     # Convert to words, filter out unwanted tokens
#     decoded_tokens = [
#         reverse_decoder_word_index.get(idx, '') 
#         for idx in best_seq 
#         if idx not in [start_token_index, end_token_index] and 
#            reverse_decoder_word_index.get(idx, '').isalpha()
#     ]
#     decoded_sentence = ' '.join(decoded_tokens)
#     decoded_sentence = remove_duplicate_phrases(decoded_sentence.strip())
#     decoded_sentence = clean_output(decoded_sentence)
#     return decoded_sentence

In [19]:
# Encoder inference model
encoder_model = Model(encoder_inputs, encoder_states)

In [20]:
# Decoder inference model
decoder_state_input_h = Input(shape=(latent_dim,))
decoder_state_input_c = Input(shape=(latent_dim,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]

dec_emb2 = dec_emb_layer(decoder_inputs)
decoder_outputs2, state_h2, state_c2 = decoder_lstm(dec_emb2, initial_state=decoder_states_inputs)
decoder_states2 = [state_h2, state_c2]
decoder_outputs2 = decoder_dense(decoder_outputs2)

decoder_model = Model(
    [decoder_inputs] + decoder_states_inputs,
    [decoder_outputs2] + decoder_states2
)

In [21]:
def decode_sequence(input_seq):
    # Encode the input sequence
    states_value = encoder_model.predict(input_seq)

    # Initialize the target sequence with the <start> token
    target_seq = np.array([[decoder_tokenizer.word_index['<start>']]])


    stop_condition = False
    decoded_tokens = []
    max_target_len = 100  # You can increase this

    while not stop_condition:
        output_tokens, h, c = decoder_model.predict([target_seq] + states_value)

        # Get the token with the highest probability
        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        sampled_token = decoder_tokenizer.index_word.get(sampled_token_index, '')

        # Stop if <end> token or max length is reached
        if sampled_token == '<end>' or len(decoded_tokens) > max_target_len:
            stop_condition = True
        else:
            if len(decoded_tokens) == 0 or sampled_token != decoded_tokens[-1]:  # Prevent repetition
                decoded_tokens.append(sampled_token)

            # Update target sequence and states
            target_seq = np.array([[sampled_token_index]])
            states_value = [h, c]
            

    return ' '.join(decoded_tokens)

In [None]:
import re

def clean_output(text):
    # Remove non-alphabetic characters except spaces
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    # Remove extra spaces
    text = re.sub(r'\s+', ' ', text)
    return text.strip()

def has_repeat_ngram(seq, n=3, window_size=12):
    """
    Returns True if any n-gram of length n repeats within the last window_size tokens of seq.
    """
    if len(seq) < n * 2:
        return False
    window = seq[-window_size:] if len(seq) > window_size else seq
    ngrams = set()
    for i in range(len(window) - n + 1):
        ngram = tuple(window[i:i+n])
        if ngram in ngrams:
            return True
        ngrams.add(ngram)
    return False

def decode_sequence_beam_search(input_seq, beam_width=5, max_decoder_seq_length=100):
    states_value = encoder_model.predict(input_seq)

    start_token_index = decoder_tokenizer.word_index.get('start')
    end_token_index = decoder_tokenizer.word_index.get('end')
    
    sequences = [([start_token_index], 0.0, states_value)]

    for _ in range(max_decoder_seq_length):
        all_candidates = []
        for seq, score, state in sequences:
            if seq[-1] == end_token_index:
                all_candidates.append((seq, score, state))
                continue
            
            target_seq = np.array([[seq[-1]]])
            output_tokens, h, c = decoder_model.predict([target_seq] + state)

            top_k = np.argsort(output_tokens[0, -1, :])[-beam_width:]

            for word_id in top_k:
                word_prob = output_tokens[0, -1, word_id]
                candidate_seq = seq + [word_id]
                candidate_score = score - np.log(word_prob + 1e-9)

                # Penalize repetition of last token
                if len(candidate_seq) > 2 and candidate_seq[-1] == candidate_seq[-2]:
                    candidate_score += 1.0  # Increase penalty as needed

                # Penalize n-gram repetition
                if has_repeat_ngram(candidate_seq, n=3, window_size=12):
                    candidate_score += 2.0  # Increase penalty as needed

                all_candidates.append((candidate_seq, candidate_score, [h, c]))
        
        ordered = sorted(all_candidates, key=lambda tup: tup[1])
        sequences = ordered[:beam_width]

        # Early stopping if all sequences ended
        if all(seq[-1] == end_token_index for seq, _, _ in sequences):
            break

    best_seq = sequences[0][0]

    # Convert to words, filter out unwanted tokens
    decoded_tokens = [
        reverse_decoder_word_index.get(idx, '') 
        for idx in best_seq 
        if idx not in [start_token_index, end_token_index] and 
           reverse_decoder_word_index.get(idx, '').isalpha()
    ]
    decoded_sentence = ' '.join(decoded_tokens)
    
    def remove_duplicate_phrases(text, min_phrase_len=3):
        words = text.split()
        result = []
        i = 0
        while i < len(words):
            # Try to find the longest duplicate phrase starting at i
            found_duplicate = False
            for l in range(min_phrase_len, len(words) - i):
                phrase = words[i:i+l]
                rest = words[i+l:]
                if phrase and rest[:l] == phrase:
                    found_duplicate = True
                    i += l  # skip the duplicate
                    break
            if not found_duplicate:
                result.append(words[i])
                i += 1
        return ' '.join(result)

    decoded_sentence = remove_duplicate_phrases(decoded_sentence.strip())
    decoded_sentence = clean_output(decoded_sentence)
    return decoded_sentence

In [None]:
def predict_instruction(input_text, beam_width=5):
    seq = encoder_tokenizer.texts_to_sequences([input_text])
    padded = pad_sequences(seq, maxlen=encoder_input_data.shape[1], padding='post')
        
    return decode_sequence_beam_search(padded, beam_width=beam_width)

In [None]:
# import re
# import random
# from collections import defaultdict

# class RecipeTextReconstructor:
#     """
#     A rule-based system to transform broken recipe outputs into coherent instructions.
#     This is a stopgap solution when the underlying model produces severely damaged text.
#     """
    
#     def __init__(self):
#         # Common Indian cooking ingredients
#         self.indian_ingredients = [
#             "ghee", "cumin", "coriander", "turmeric", "garam masala", "cardamom", 
#             "cloves", "cinnamon", "mustard seeds", "fenugreek", "asafoetida", "curry leaves",
#             "chaat masala", "amchur", "panch phoran", "jaggery", "tamarind", "coconut",
#             "basmati rice", "besan", "dal", "chana", "moong", "urad", "toor", "masoor",
#             "rajma", "paneer", "yogurt", "curd", "chilli", "ginger", "garlic", "onion",
#             "tomato", "potato", "okra", "eggplant", "cauliflower", "peas"
#         ]
        
#         # Common Indian dishes by region
#         self.indian_dishes = {
#             "North Indian": ["butter chicken", "dal makhani", "paneer tikka", "chole bhature", 
#                          "rajma chawal", "kadhi pakora", "aloo gobi", "malai kofta"],
#             "South Indian": ["dosa", "idli", "sambhar", "rasam", "uttapam", "vada", 
#                          "appam", "pongal", "bisi bele bath"],
#             "Bengali": ["machher jhol", "shorshe ilish", "chingri malai curry", 
#                      "kosha mangsho", "aloo posto", "mishti doi"],
#             "Gujarati": ["dhokla", "khandvi", "thepla", "undhiyu", "fafda", "khakhra", "gathiya"],
#             "Maharashtrian": ["vada pav", "pav bhaji", "misal pav", "puran poli", "bharli vangi"],
#             "Hyderabadi": ["biryani", "haleem", "keema", "mirchi ka salan", "bagara baingan"]
#         }
        
#         # Common Indian cooking verbs and phrases
#         self.cooking_verbs = [
#             "add", "stir", "mix", "cook", "heat", "boil", "simmer", "fry", "roast", "toast",
#             "grind", "blend", "pressure cook", "sauté", "temper", "garnish", "marinate", 
#             "knead", "ferment", "steam", "strain"
#         ]
        
#         # Cooking connectors and transitions
#         self.connectors = [
#             "then", "next", "after that", "meanwhile", "now", "finally", "allow to", 
#             "continue to", "once", "when"
#         ]
        
#         # Template structures for recipe steps
#         self.step_templates = [
#             "First, {verb} the {ingredient} {adverb}.",
#             "{Verb} the {ingredient} until {condition}.",
#             "{Connector}, {verb} the {ingredient} with {ingredient2}.",
#             "{Verb} {amount} of {ingredient} and {verb2} it {adverb}.",
#             "In a {utensil}, {verb} the {ingredient} for {time}.",
#             "{Verb} the {ingredient} {adverb}, then {verb2} in the {ingredient2}.",
#             "{Connector} {verb} the mixture until it {condition}.",
#             "Add the {ingredient} and {verb} for another {time}.",
#             "{Verb} the {ingredient} and set aside.",
#             "Garnish with {ingredient} and serve {condition}."
#         ]
        
#         # Common cooking utensils
#         self.utensils = [
#             "pan", "pot", "kadai", "tawa", "pressure cooker", "bowl", "dish", 
#             "skillet", "wok", "mixer", "grinder"
#         ]
        
#         # Cooking conditions
#         self.conditions = [
#             "golden brown", "soft", "cooked through", "fragrant", "thick", 
#             "well combined", "translucent", "tender", "crispy", "hot", "warm"
#         ]
        
#         # Cooking adverbs
#         self.adverbs = [
#             "gently", "thoroughly", "carefully", "continuously", "occasionally", 
#             "frequently", "slowly", "quickly", "evenly", "well"
#         ]
        
#         # Cooking times
#         self.times = [
#             "2-3 minutes", "5 minutes", "10 minutes", "15-20 minutes", 
#             "30 minutes", "an hour", "a few seconds"
#         ]
        
#         # Amounts
#         self.amounts = [
#             "1 tablespoon", "2 tablespoons", "1 teaspoon", "1/2 cup", "1 cup", 
#             "a handful", "a pinch", "a small amount"
#         ]
    
#     def extract_ingredients_from_text(self, text):
#         """Extract potential ingredients from the damaged text"""
#         # Lowercase and replace periods with spaces
#         text = text.lower().replace('.', ' ')
#         words = text.split()
        
#         # Find potential ingredients
#         potential_ingredients = []
        
#         # First check for known ingredients
#         for ingredient in self.indian_ingredients:
#             if ingredient in text:
#                 potential_ingredients.append(ingredient)
        
#         # Then look for potential ingredients by examining each word
#         for word in words:
#             word = word.strip(',.;:')
#             # Skip very short words and common cooking verbs/connectors
#             if (len(word) > 3 and 
#                 word not in self.cooking_verbs and 
#                 word not in self.connectors and
#                 not any(word in ing for ing in potential_ingredients)):
#                 potential_ingredients.append(word)
        
#         # Limit to reasonable number
#         random.shuffle(potential_ingredients)
#         return potential_ingredients[:8]  # Limit to 8 ingredients
    
#     def extract_verbs_from_text(self, text):
#         """Extract potential cooking verbs from the damaged text"""
#         text = text.lower()
#         found_verbs = []
        
#         # Look for standard cooking verbs
#         for verb in self.cooking_verbs:
#             if verb in text:
#                 found_verbs.append(verb)
        
#         # If we found at least 3 verbs, use those; otherwise use default verbs
#         if len(found_verbs) >= 3:
#             return found_verbs
#         else:
#             return self.cooking_verbs
    
#     def identify_cuisine_from_text(self, text):
#         """Try to identify the cuisine type from the damaged text"""
#         text = text.lower()
#         cuisine_scores = defaultdict(int)
        
#         # Score each cuisine based on matching dishes
#         for cuisine, dishes in self.indian_dishes.items():
#             for dish in dishes:
#                 if dish in text:
#                     cuisine_scores[cuisine] += 2
            
#         # If no clear winner, try matching on ingredients characteristic of regions
#         if not cuisine_scores or max(cuisine_scores.values()) < 2:
#             # Simple regional ingredient indicators
#             regional_indicators = {
#                 "North Indian": ["paneer", "ghee", "cream", "butter", "rajma", "chole"],
#                 "South Indian": ["sambhar", "idli", "dosa", "coconut", "rasam", "curry leaves"],
#                 "Bengali": ["fish", "mustard", "posto", "mishti", "ilish", "chingri"],
#                 "Gujarati": ["dhokla", "fafda", "jaggery", "besan", "kadhi"],
#                 "Maharashtrian": ["vada", "misal", "poha", "kokum", "goda masala"],
#                 "Hyderabadi": ["biryani", "haleem", "salan", "keema"]
#             }
            
#             for cuisine, indicators in regional_indicators.items():
#                 for indicator in indicators:
#                     if indicator in text:
#                         cuisine_scores[cuisine] += 1
        
#         # Return most likely cuisine or default
#         if cuisine_scores:
#             return max(cuisine_scores.items(), key=lambda x: x[1])[0]
#         else:
#             return "Indian"  # Default
    
#     def reconstruct_recipe(self, broken_text, dish_name=None):
#         """
#         Transform severely broken recipe text into coherent instructions
#         using both the input text and template-based reconstruction.
#         """
#         # Extract usable content from broken text
#         ingredients = self.extract_ingredients_from_text(broken_text)
#         verbs = self.extract_verbs_from_text(broken_text)
#         cuisine = self.identify_cuisine_from_text(broken_text)
        
#         # If dish name not provided, try to identify it or create generic one
#         if not dish_name:
#             # Try to find a dish name in the text
#             for dishes in self.indian_dishes.values():
#                 for dish in dishes:
#                     if dish in broken_text.lower():
#                         dish_name = dish
#                         break
#                 if dish_name:
#                     break
            
#             # If still no dish name, create one from primary ingredients
#             if not dish_name and ingredients:
#                 primary_ingredient = ingredients[0]
#                 dish_types = ["curry", "masala", "fry", "roast", "stir-fry", "bhaji"]
#                 dish_name = f"{primary_ingredient} {random.choice(dish_types)}"
        
#         # Create a coherent recipe
#         reconstructed_recipe = f"# {dish_name.title()}\n\n"
        
#         # Add ingredients section
#         reconstructed_recipe += "## Ingredients\n\n"
#         for ingredient in ingredients:
#             amount = random.choice(self.amounts)
#             reconstructed_recipe += f"- {amount} {ingredient}\n"
        
#         # Add spices section
#         spices = random.sample(self.indian_ingredients[:12], min(5, len(self.indian_ingredients[:12])))
#         reconstructed_recipe += "- Spices (to taste): " + ", ".join(spices) + "\n\n"
        
#         # Add instructions section
#         reconstructed_recipe += "## Instructions\n\n"
        
#         # Generate 4-6 coherent steps
#         num_steps = random.randint(4, 6)
        
#         # Create a logical flow of steps
#         used_templates = set()
#         verbs_cycle = verbs.copy()
        
#         for i in range(num_steps):
#             # Ensure we don't run out of verbs
#             if not verbs_cycle:
#                 verbs_cycle = verbs.copy()
                
#             # Select template avoiding repetition when possible
#             available_templates = [t for t in self.step_templates if t not in used_templates]
#             if not available_templates:
#                 available_templates = self.step_templates
                
#             template = random.choice(available_templates)
#             used_templates.add(template)
            
#             # Fill template with appropriate content
#             verb = random.choice(verbs_cycle)
#             verbs_cycle.remove(verb)
            
#             verb2 = random.choice(verbs)
#             ingredient = random.choice(ingredients)
#             ingredients_remaining = [ing for ing in ingredients if ing != ingredient]
#             ingredient2 = random.choice(ingredients_remaining) if ingredients_remaining else random.choice(ingredients)
            
#             # Format the step
#             step = template.format(
#                 verb=verb,
#                 Verb=verb.capitalize(),
#                 ingredient=ingredient,
#                 ingredient2=ingredient2,
#                 adverb=random.choice(self.adverbs),
#                 Connector=random.choice(self.connectors).capitalize(),
#                 connector=random.choice(self.connectors),
#                 condition=random.choice(self.conditions),
#                 time=random.choice(self.times),
#                 utensil=random.choice(self.utensils),
#                 amount=random.choice(self.amounts),
#                 verb2=verb2
#             )
            
#             reconstructed_recipe += f"{i+1}. {step}\n"
        
#         # Add final serving instruction
#         serve_with = random.choice(["rice", "roti", "naan", "paratha", "bread"])
#         reconstructed_recipe += f"\nServe hot with {serve_with}. Enjoy your {dish_name}!\n"
        
#         return reconstructed_recipe


In [None]:
# # Initialize it
# reconstructor = RecipeTextReconstructor()

# # Process your model's output
# model_output = "Tender.meanwhile through.make kadappa through.make first.preheat pulikachalsoak even.now flame.start sambhar paratha.cook evaporated.at ivy skillet.grind..."

# # Get coherent recipe (optionally provide dish name)
# fixed_recipe = reconstructor.reconstruct_recipe(
#     model_output, 
#     dish_name="Kadappa with Paratha"  # Optional
# )

# print(fixed_recipe)

# Kadappa With Paratha

## Ingredients

- 1 tablespoon flame
- a small amount even
- 1 tablespoon evaporated
- a small amount start
- a handful first
- 2 tablespoons paratha
- a handful kadappa
- 1 cup through
- Spices (to taste): cumin, asafoetida, curry leaves, coriander, cardamom

## Instructions

1. Garnish with through and serve warm.
2. First, cook the through carefully.
3. Add the first and grind for another 5 minutes.
4. Grind 1 cup of first and cook it slowly.

Serve hot with rice. Enjoy your Kadappa with Paratha!



In [26]:

# Example usage
# reconstructor = RecipeTextReconstructor()
# fixed_recipe = reconstructor.reconstruct_recipe(broken_text, "Paneer Butter Masala")

In [27]:
# def generate_recipe_instructions(input_text, encoder_model, decoder_model, 
#                                 encoder_tokenizer, decoder_tokenizer, 
#                                 reverse_decoder_word_index):
#     """
#     Generate recipe instructions from input text using the improved decoding algorithm.
#     """
#     # Tokenize and pad the input sequence
#     input_seq = encoder_tokenizer.texts_to_sequences([input_text])
#     input_seq = np.array(input_seq)
    
#     # Generate recipe instructions
#     recipe = decode_sequence_improved(
#         input_seq, 
#         encoder_model, 
#         decoder_model, 
#         decoder_tokenizer, 
#         reverse_decoder_word_index,
#         beam_width=5,
#         temperature=1.2,
#         length_norm_alpha=0.7,
#         diversity_penalty=0.3
#     )
    
#     return recipe

# # Example usage:


In [28]:
# recipe = generate_recipe_instructions(
#     "chicken curry with spices and herbs", 
#     encoder_model, 
#     decoder_model, 
#     encoder_tokenizer, 
#     decoder_tokenizer, 
#     reverse_decoder_word_index
# )
# print(recipe)

In [29]:
# # Beam search with width 5
# print("Beam Width 5:")
# print(predict_instruction("chicken tomato onion garlic cumin", beam_width=5))

# Beam search with width 10
print("\nBeam Width 10:")
print(predict_instruction("chicken tomato onion garlic cumin", beam_width=10))

# # Greedy for comparison
# print("\nGreedy (Beam Width = 1):")
# print(predict_instruction("lamb onion garlic", beam_width=1))


Beam Width 10:


NameError: name 'predict_instruction' is not defined

In [None]:
from tensorflow.keras.utils import plot_model
plot_model(model, to_file='model.png', show_shapes=True, show_layer_names=True)