<a href="https://colab.research.google.com/github/VenkatesanNadimuthu/C3AN/blob/main/Model/C3AN_Baseline_Version_v1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import math
import os
import csv
from tokenizers import Tokenizer, models, trainers, pre_tokenizers, decoders, processors


# ==========================================
# 1. Hyperparameters & Configuration
# ==========================================
class Config:
    # Model Architecture
    d_model = 512           # Embedding dimension
    n_head = 8              # Number of attention heads (changed from 6 to 8)
    n_layer = 16             # Number of decoder layers
    vocab_size = 5000       # Size of BPE vocabulary
    dropout = 0.1

    # Training
    batch_size = 32
    learning_rate = 0.0001
    epochs = 10
    train_test_split_ratio = 0.8 # New: Ratio for training set

    # File Paths
    # TODO: UPDATE THIS PATH to your actual dataset file (.csv or .txt)
    dataset_path = '/content/processed_recipes.txt'

    # TODO: IF USING CSV, UPDATE THIS to the specific column header containing the text
    csv_column = 'text_column_name'

    tokenizer_path = 'bpe_tokenizer.json'
    model_save_path = 'decoder_model.pth'

    # Hardware
    device = 'cuda' if torch.cuda.is_available() else 'cpu'

# ==========================================
# 2. Tokenizer Setup (BPE)
# ==========================================
def get_training_corpus(dataset_path, column_name):
    """Yields text from the file (CSV or TXT) line by line."""
    is_csv = dataset_path.lower().endswith('.csv')

    with open(dataset_path, 'r', encoding='utf-8') as f:
        if is_csv:
            reader = csv.DictReader(f)
            for row in reader:
                if row[column_name]:
                    yield row[column_name]
        else:
            # Assume plain text file, one sequence per line
            for line in f:
                if line.strip():
                    yield line.strip()

def train_or_load_tokenizer(config):
    """
    Trains a BPE tokenizer on the dataset if it doesn't exist,
    otherwise loads the existing one.
    """
    if os.path.exists(config.tokenizer_path):
        print(f"Loading existing tokenizer from {config.tokenizer_path}...")
        tokenizer = Tokenizer.from_file(config.tokenizer_path)
    else:
        print(f"Training new BPE tokenizer on {config.dataset_path}...")
        # Initialize BPE tokenizer
        tokenizer = Tokenizer(models.BPE(unk_token="[UNK]"))
        tokenizer.pre_tokenizer = pre_tokenizers.Whitespace()

        # Trainer spec
        trainer = trainers.BpeTrainer(
            vocab_size=config.vocab_size,
            special_tokens=["[PAD]", "[UNK]", "[BOS]", "[EOS]"]
        )

        # Train on the CSV iterator
        tokenizer.train_from_iterator(
            get_training_corpus(config.dataset_path, config.csv_column),
            trainer=trainer
        )

        # Post-processing
        tokenizer.save(config.tokenizer_path)
        print("Tokenizer trained and saved.")

    return tokenizer

# ==========================================
# 3. Dataset Class
# ==========================================
class TextDataset(Dataset):
    def __init__(self, file_path, tokenizer, block_size, column_name):
        self.tokenizer = tokenizer

        # Load data based on file type
        self.data = []
        is_csv = file_path.lower().endswith('.csv')

        with open(file_path, 'r', encoding='utf-8') as f:
            if is_csv:
                reader = csv.DictReader(f)
                self.data = [row[column_name] for row in reader if row[column_name]]
            else:
                self.data = [line.strip() for line in f.readlines() if line.strip()]

        self.block_size = block_size

        # We will handle padding manually in __getitem__ to ensure [BOS] and [EOS] placement
        # self.tokenizer.enable_padding(length=block_size, pad_id=0, pad_token="[PAD]")
        # self.tokenizer.enable_truncation(max_length=block_size)

        # Cache special token IDs
        self.bos_id = self.tokenizer.token_to_id("[BOS]")
        self.eos_id = self.tokenizer.token_to_id("[EOS]")
        self.pad_id = self.tokenizer.token_to_id("[PAD]")

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        text = self.data[idx]

        # Encode (without padding/truncation initially)
        encoded = self.tokenizer.encode(text)
        ids = encoded.ids

        # Add BOS and EOS
        # Ensure we have valid IDs for BOS/EOS
        bos = [self.bos_id] if self.bos_id is not None else []
        eos = [self.eos_id] if self.eos_id is not None else []

        ids = bos + ids + eos

        # Truncate if necessary (keep room for BOS/EOS)
        if len(ids) > self.block_size:
            ids = ids[:self.block_size]
            # Ensure EOS is still at the end if we truncated it (optional, but good practice)
            if self.eos_id is not None:
                ids[-1] = self.eos_id

        # Pad
        padding_len = self.block_size - len(ids)
        if padding_len > 0:
            ids = ids + [self.pad_id] * padding_len

        ids_tensor = torch.tensor(ids, dtype=torch.long)

        return ids_tensor

# ==========================================
# 4. Model Architecture (Decoder Only)
# ==========================================
class CausalSelfAttention(nn.Module):
    def __init__(self, config):
        super().__init__()
        assert config.d_model % config.n_head == 0
        self.c_attn = nn.Linear(config.d_model, 3 * config.d_model)
        self.c_proj = nn.Linear(config.d_model, config.d_model)
        self.n_head = config.n_head
        self.d_model = config.d_model

        # Register buffer for the causal mask (lower triangular matrix)
        self.register_buffer("bias", torch.tril(torch.ones(2048, 2048))
                                     .view(1, 1, 2048, 2048))

    def forward(self, x):
        B, T, C = x.size() # Batch, Time(seq_len), Channels(dim)

        # Calculate query, key, values
        q, k, v  = self.c_attn(x).split(self.d_model, dim=2)

        # Transpose for multi-head attention: (B, nh, T, hs)
        k = k.view(B, T, self.n_head, C // self.n_head).transpose(1, 2)
        q = q.view(B, T, self.n_head, C // self.n_head).transpose(1, 2)
        v = v.view(B, T, self.n_head, C // self.n_head).transpose(1, 2)

        # Causal Attention (Masked)
        att = (q @ k.transpose(-2, -1)) * (1.0 / math.sqrt(k.size(-1)))
        att = att.masked_fill(self.bias[:,:,:T,:T] == 0, float('-inf'))
        att = F.softmax(att, dim=-1)
        y = att @ v

        # Re-assemble all head outputs side by side
        y = y.transpose(1, 2).contiguous().view(B, T, C)

        return self.c_proj(y)

class FeedForward(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(config.d_model, 4 * config.d_model),
            nn.GELU(),
            nn.Linear(4 * config.d_model, config.d_model),
            nn.Dropout(config.dropout),
        )

    def forward(self, x):
        return self.net(x)

class Block(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.ln1 = nn.LayerNorm(config.d_model)
        self.sa = CausalSelfAttention(config)
        self.ln2 = nn.LayerNorm(config.d_model)
        self.ffwd = FeedForward(config)

    def forward(self, x):
        # Pre-norm architecture
        x = x + self.sa(self.ln1(x))
        x = x + self.ffwd(self.ln2(x))
        return x

class TransformerDecoder(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.token_embedding = nn.Embedding(config.vocab_size, config.d_model)
        self.position_embedding = nn.Embedding(2048, config.d_model) # Max pos 2048

        self.blocks = nn.Sequential(*[Block(config) for _ in range(config.n_layer)])
        self.ln_f = nn.LayerNorm(config.d_model)
        self.lm_head = nn.Linear(config.d_model, config.vocab_size, bias=False)

        self.apply(self._init_weights)

    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
            if module.bias is not None:
                torch.nn.init.zeros_(module.bias)
        elif isinstance(module, nn.Embedding):
            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)

    def forward(self, idx, targets=None):
        B, T = idx.shape

        # Create position indices [0, 1, ..., T-1]
        pos = torch.arange(0, T, dtype=torch.long, device=idx.device)

        # Forward pass
        tok_emb = self.token_embedding(idx)
        pos_emb = self.position_embedding(pos)
        x = tok_emb + pos_emb

        x = self.blocks(x)
        x = self.ln_f(x)
        logits = self.lm_head(x)

        loss = None
        if targets is not None:
            # Flatten for CrossEntropyLoss
            # Logits: (B*T, VocabSize)
            # Targets: (B*T)
            # FIX: Ignore padding index (0)
            loss = F.cross_entropy(logits.reshape(-1, logits.size(-1)), targets.reshape(-1), ignore_index=0)

        return logits, loss

# ==========================================
# 5. Training Routine
# ==========================================
def main():
    # A. Setup
    config = Config()
    print(f"Using device: {config.device}")

    # Check for data
    if not os.path.exists(config.dataset_path):
        print(f"Error: Dataset file '{config.dataset_path}' not found.")
        print("Please update Config.dataset_path in the script to point to your actual data file.")
        return

    # B. Tokenizer
    tokenizer = train_or_load_tokenizer(config)
    config.vocab_size = tokenizer.get_vocab_size()
    print(f"Vocabulary Size: {config.vocab_size}")

    # C. Prepare Data
    # Determine block size from first line of CSV or TXT
    is_csv = config.dataset_path.lower().endswith('.csv')
    first_text = ""

    with open(config.dataset_path, 'r', encoding='utf-8') as f:
        if is_csv:
            reader = csv.DictReader(f)
            try:
                first_row = next(reader)
                if config.csv_column not in first_row:
                    print(f"Error: Column '{config.csv_column}' not found in CSV.")
                    print(f"Found headers: {list(first_row.keys())}")
                    print("Please update Config.csv_column to match one of these headers.")
                    return
                first_text = first_row[config.csv_column]
            except StopIteration:
                print(f"Error: The CSV file '{config.dataset_path}' appears to be empty.")
                return
        else:
            for line in f:
                if line.strip():
                    first_text = line.strip()
                    break
            if not first_text:
                print("Error: Dataset file appears empty.")
                return

    # Simple heuristic for max len (add buffer for BOS/EOS)
    dummy_enc = tokenizer.encode(first_text)
    detected_len = len(dummy_enc.ids) + 2
    print(f"Detected sequence length from first row (with BOS/EOS): {detected_len}")

    block_size = detected_len
    full_dataset = TextDataset(config.dataset_path, tokenizer, block_size, config.csv_column)

    # Split dataset into training and testing sets
    train_size = int(config.train_test_split_ratio * len(full_dataset))
    test_size = len(full_dataset) - train_size
    train_dataset, test_dataset = torch.utils.data.random_split(full_dataset, [train_size, test_size])

    print(f"Dataset split: Training samples = {len(train_dataset)}, Testing samples = {len(test_dataset)}")

    # Create DataLoaders for both sets
    train_dataloader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True)
    test_dataloader = DataLoader(test_dataset, batch_size=config.batch_size, shuffle=False) # Shuffle False for consistent testing

    # D. Model Initialization
    model = TransformerDecoder(config).to(config.device)
    print("Model initialized.")

    # E. Optimizer & Scheduler
    optimizer = torch.optim.AdamW(model.parameters(), lr=config.learning_rate)

    # Scheduler: OneCycleLR
    total_steps = len(train_dataloader) * config.epochs # Use train_dataloader for total steps
    scheduler = torch.optim.lr_scheduler.OneCycleLR(
        optimizer,
        max_lr=config.learning_rate,
        total_steps=total_steps,
        pct_start=0.3
    )

    # F. Training Loop
    model.train()
    print("Starting training...")

    for epoch in range(config.epochs):
        total_loss = 0
        for step, batch in enumerate(train_dataloader): # Use train_dataloader
            # Move to device
            batch = batch.to(config.device)

            # Autoregressive setup:
            # Input: [BOS, A, B, C, EOS]
            # Target: [A, B, C, EOS, PAD]

            inputs = batch[:, :-1]
            targets = batch[:, 1:]

            # Forward
            optimizer.zero_grad()
            logits, loss = model(inputs, targets=targets)

            # Backward
            loss.backward()
            optimizer.step()
            scheduler.step()

            total_loss += loss.item()

            if step % 50 == 0:
                print(f"Epoch {epoch+1} | Step {step} | Loss: {loss.item():.4f} | LR: {scheduler.get_last_lr()[0]:.6f}")

        avg_loss = total_loss / len(train_dataloader) # Use train_dataloader for average loss
        print(f"==> End of Epoch {epoch+1}, Average Loss: {avg_loss:.4f}")

    # G. Save
    torch.save(model.state_dict(), config.model_save_path)
    print(f"Model saved to {config.model_save_path}")

if __name__ == "__main__":
    main()

Using device: cuda
Training new BPE tokenizer on /content/processed_recipes.txt...
Tokenizer trained and saved.
Vocabulary Size: 5000
Detected sequence length from first row (with BOS/EOS): 268
Dataset split: Training samples = 6941, Testing samples = 1736
Model initialized.
Starting training...
Epoch 1 | Step 0 | Loss: 8.5948 | LR: 0.000004
Epoch 1 | Step 50 | Loss: 7.5535 | LR: 0.000005
Epoch 1 | Step 100 | Loss: 7.1878 | LR: 0.000010
Epoch 1 | Step 150 | Loss: 6.7675 | LR: 0.000016
Epoch 1 | Step 200 | Loss: 6.0353 | LR: 0.000025
==> End of Epoch 1, Average Loss: 7.0794
Epoch 2 | Step 0 | Loss: 5.8396 | LR: 0.000028
Epoch 2 | Step 50 | Loss: 5.0606 | LR: 0.000039
Epoch 2 | Step 100 | Loss: 4.3693 | LR: 0.000050
Epoch 2 | Step 150 | Loss: 4.1471 | LR: 0.000062
Epoch 2 | Step 200 | Loss: 3.8337 | LR: 0.000073
==> End of Epoch 2, Average Loss: 4.5349
Epoch 3 | Step 0 | Loss: 3.5210 | LR: 0.000076
Epoch 3 | Step 50 | Loss: 3.6357 | LR: 0.000086
Epoch 3 | Step 100 | Loss: 3.3859 | LR: 0.

In [None]:
# Step:2 Model Inferencing

# ==========================================
# 1. Configuration and Setup
# ==========================================
import torch
import torch.nn as nn # Added for nn.Module and nn.Linear
import torch.nn.functional as F
from tokenizers import Tokenizer
import os
import math # Added for math.sqrt

# Re-defining Config class and model architecture from the training cell
# to ensure they are available in this cell's scope.

# ==========================================
# 1. Hyperparameters & Configuration (Copied from training cell)
# ==========================================
class Config:
    # Model Architecture
    d_model = 512           # Embedding dimension
    n_head = 8              # Number of attention heads (changed from 6 to 8)
    n_layer = 16             # Number of decoder layers
    vocab_size = 5000       # Size of BPE vocabulary
    dropout = 0.1

    # Training (not all needed for inference, but keeping for consistency)
    batch_size = 32
    learning_rate = 0.0001
    epochs = 10
    train_test_split_ratio = 0.8

    # File Paths
    dataset_path = '/content/processed_recipes.txt'
    csv_column = 'text_column_name'
    tokenizer_path = 'bpe_tokenizer.json'
    model_save_path = 'decoder_model.pth'

    # Hardware
    device = 'cuda' if torch.cuda.is_available() else 'cpu'

# ==========================================
# 4. Model Architecture (Decoder Only) (Copied from training cell)
# ==========================================
class CausalSelfAttention(nn.Module):
    def __init__(self, config):
        super().__init__()
        assert config.d_model % config.n_head == 0
        self.c_attn = nn.Linear(config.d_model, 3 * config.d_model)
        self.c_proj = nn.Linear(config.d_model, config.d_model)
        self.n_head = config.n_head
        self.d_model = config.d_model

        # Register buffer for the causal mask (lower triangular matrix)
        self.register_buffer("bias", torch.tril(torch.ones(2048, 2048))
                                     .view(1, 1, 2048, 2048))

    def forward(self, x):
        B, T, C = x.size() # Batch, Time(seq_len), Channels(dim)

        # Calculate query, key, values
        q, k, v  = self.c_attn(x).split(self.d_model, dim=2)

        # Transpose for multi-head attention: (B, nh, T, hs)
        k = k.view(B, T, self.n_head, C // self.n_head).transpose(1, 2)
        q = q.view(B, T, self.n_head, C // self.n_head).transpose(1, 2)
        v = v.view(B, T, self.n_head, C // self.n_head).transpose(1, 2)

        # Causal Attention (Masked)
        att = (q @ k.transpose(-2, -1)) * (1.0 / math.sqrt(k.size(-1)))
        att = att.masked_fill(self.bias[:,:,:T,:T] == 0, float('-inf'))
        att = F.softmax(att, dim=-1)
        y = att @ v

        # Re-assemble all head outputs side by side
        y = y.transpose(1, 2).contiguous().view(B, T, C)

        return self.c_proj(y)

class FeedForward(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(config.d_model, 4 * config.d_model),
            nn.GELU(),
            nn.Linear(4 * config.d_model, config.d_model),
            nn.Dropout(config.dropout),
        )

    def forward(self, x):
        return self.net(x)

class Block(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.ln1 = nn.LayerNorm(config.d_model)
        self.sa = CausalSelfAttention(config)
        self.ln2 = nn.LayerNorm(config.d_model)
        self.ffwd = FeedForward(config)

    def forward(self, x):
        # Pre-norm architecture
        x = x + self.sa(self.ln1(x))
        x = x + self.ffwd(self.ln2(x))
        return x

class TransformerDecoder(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.token_embedding = nn.Embedding(config.vocab_size, config.d_model)
        self.position_embedding = nn.Embedding(2048, config.d_model) # Max pos 2048

        self.blocks = nn.Sequential(*[Block(config) for _ in range(config.n_layer)])
        self.ln_f = nn.LayerNorm(config.d_model)
        self.lm_head = nn.Linear(config.d_model, config.vocab_size, bias=False)

        self.apply(self._init_weights)

    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
            if module.bias is not None:
                torch.nn.init.zeros_(module.bias)
        elif isinstance(module, nn.Embedding):
            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)

    def forward(self, idx, targets=None):
        B, T = idx.shape

        # Create position indices [0, 1, ..., T-1]
        pos = torch.arange(0, T, dtype=torch.long, device=idx.device)

        # Forward pass
        tok_emb = self.token_embedding(idx)
        pos_emb = self.position_embedding(pos)
        x = tok_emb + pos_emb

        x = self.blocks(x)
        x = self.ln_f(x)
        logits = self.lm_head(x)

        loss = None
        if targets is not None:
            # Flatten for CrossEntropyLoss
            # Logits: (B*T, VocabSize)
            # Targets: (B*T)
            # FIX: Ignore padding index (0)
            loss = F.cross_entropy(logits.reshape(-1, logits.size(-1)), targets.reshape(-1), ignore_index=0)

        return logits, loss

# Create an instance of the Config class to access paths and device settings
config = Config()

# ==========================================
# 2. Load Tokenizer
# ==========================================
print(f"Loading tokenizer from {config.tokenizer_path}...")
if not os.path.exists(config.tokenizer_path):
    print(f"Error: Tokenizer file '{config.tokenizer_path}' not found.")
    print("Please ensure the tokenizer is trained and saved in the previous step.")
    exit()
tokenizer = Tokenizer.from_file(config.tokenizer_path)
config.vocab_size = tokenizer.get_vocab_size() # Update vocab_size from loaded tokenizer
print(f"Tokenizer loaded. Vocabulary Size: {config.vocab_size}")

# Cache special token IDs for easier use
bos_id = tokenizer.token_to_id("[BOS]")
eos_id = tokenizer.token_to_id("[EOS]")
pad_id = tokenizer.token_to_id("[PAD]")

# ==========================================
# 3. Load Model
# ==========================================
print(f"Loading model from {config.model_save_path}...")
if not os.path.exists(config.model_save_path):
    print(f"Error: Model file '{config.model_save_path}' not found.")
    print("Please ensure the model is trained and saved in the previous step.")
    exit()

# Instantiate the model (assuming TransformerDecoder class is defined globally)
model = TransformerDecoder(config).to(config.device)
model.load_state_dict(torch.load(config.model_save_path, map_location=config.device))
model.eval() # Set model to evaluation mode for inference
print("Model loaded successfully.")

# ==========================================
# 4. Text Generation Function
# ==========================================
def generate_text(model, tokenizer, prompt_text, max_length=200, temperature=0.7, top_k=40, device='cpu'):
    """
    Generates text from a given prompt using the trained model.

    Args:
        model: The trained TransformerDecoder model.
        tokenizer: The BPE tokenizer.
        prompt_text (str): The initial text prompt.
        max_length (int): Maximum length of the generated sequence.
        temperature (float): Controls randomness in sampling. Lower values make output more deterministic.
        top_k (int): If not None, sample from the top_k most probable tokens.
        device (str): The device to run inference on ('cpu' or 'cuda').

    Returns:
        str: The generated text.
    """
    # Encode the prompt text
    encoded_prompt = tokenizer.encode(prompt_text)
    input_ids = encoded_prompt.ids

    # Prepend BOS token if it's available and not already at the start
    if bos_id is not None and (not input_ids or input_ids[0] != bos_id):
        input_ids = [bos_id] + input_ids

    # Convert to tensor and add batch dimension (batch_size=1)
    input_ids_tensor = torch.tensor(input_ids, dtype=torch.long, device=device).unsqueeze(0)

    # List to store all generated token IDs, starting with the prompt's IDs
    generated_ids = list(input_ids)

    with torch.no_grad(): # Disable gradient calculations for inference
        for _ in range(max_length - len(input_ids)): # Generate tokens up to max_length
            # Get predictions (logits) from the model
            logits, _ = model(input_ids_tensor)
            # Focus on the logits for the last predicted token
            logits = logits[:, -1, :] / temperature

            # Apply top-k sampling to logits
            if top_k is not None:
                # Get top_k values and their indices
                v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
                # Set logits of tokens outside the top_k to a very low value (effectively zero probability)
                logits[logits < v[:, [-1]]] = -float('Inf')

            # Convert logits to probabilities and sample the next token
            probs = F.softmax(logits, dim=-1)
            next_token_id = torch.multinomial(probs, num_samples=1).squeeze(1)

            # Append the newly generated token to the input tensor for the next step
            input_ids_tensor = torch.cat((input_ids_tensor, next_token_id.unsqueeze(0)), dim=1)
            generated_ids.append(next_token_id.item()) # Add to our tracking list

            # Stop generation if EOS token is produced
            if next_token_id.item() == eos_id:
                break

    # Decode the complete sequence of token IDs back into text
    # Remove the BOS token if it was prepended by this function and is not part of desired output
    if bos_id is not None and generated_ids and generated_ids[0] == bos_id:
        generated_ids_for_decode = generated_ids[1:]
    else:
        generated_ids_for_decode = generated_ids

    return tokenizer.decode(generated_ids_for_decode, skip_special_tokens=True)


# ==========================================
# 5. Interactive User Input and Generation Loop
# ==========================================
print("\n--- Recipe Generation Mode ---")
print("Enter a prompt to start generating a recipe. Type 'exit' to quit.\n")

while True:
    user_prompt = input("Your prompt: ")
    if user_prompt.lower() == 'exit':
        break

    if not user_prompt.strip():
        print("Please enter a non-empty prompt.")
        continue

    print("\nGenerating recipe based on your prompt...")
    generated_recipe = generate_text(
        model=model,
        tokenizer=tokenizer,
        prompt_text=user_prompt,
        max_length=200, # Max tokens for the generated output
        temperature=0.8, # Adjust for creativity (higher = more creative, lower = more focused)
        top_k=50,        # Adjust to control diversity (higher = more diverse token choices)
        device=config.device
    )

    print("\n--- Generated Recipe ---")
    print(generated_recipe)
    print("------------------------\n")

print("Exiting recipe generation. Goodbye!")

Loading tokenizer from bpe_tokenizer.json...
Tokenizer loaded. Vocabulary Size: 5000
Loading model from decoder_model.pth...
Model loaded successfully.

--- Recipe Generation Mode ---
Enter a prompt to start generating a recipe. Type 'exit' to quit.

Your prompt: actually, The recipe for Sweet Paniyaram (Bellam Ponganalu) is a delightful dish. For this recipe

Generating recipe based on your prompt...

--- Generated Recipe ---
actually , The recipe for Sweet Paniyaram ( Bell am P ong an alu ) is a delightful dish . For this recipe , you will need the following ingredients : cup urad dal ( split , fine chopped ) to 4 to teaspoon mustard seeds teaspoon cumin seeds teaspoon urad dal ( optional ) teaspoon ginger paste ( curry leaves , adjust to taste ) 1 g reen chillies ( skip for lid . The full instructions are as follows : Add rice to a bowl with salt and green chili . In a grinder jar , add water . When the mixture comes to a thick , mash . Add ginger garlic paste and fry for few mins o