# Creating a Recipe Generator

In [2]:
import numpy as np 
import pandas as pd 
import torch

In [3]:
# --- Architecture Hyperparameters ---
block_size = 512       # Context window (long enough for a full recipe)
n_embd = 384           # Embedding dimension
n_head = 6             # Number of attention heads
n_layer = 6            # Number of transformer blocks
dropout = 0.2          # Higher dropout to prevent memorization of specific recipes

# --- Training Hyperparameters ---
batch_size = 32        # How many recipes to process at once
learning_rate = 3e-4   # The "sweet spot" for small transformers
max_iters = 5000       # Total training steps
eval_interval = 500    # How often to check validation loss
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# --- Special Control Tokens ---
# These are the "switches" for your model
VEGAN_TOKEN = "[VEGAN]"
NORMAL_TOKEN = "[NORMAL]"
TITLE_TOKEN = "[TITLE]"
INGRED_TOKEN = "[INGRED]"
STEPS_TOKEN = "[STEPS]"
EOS_TOKEN = "<|endoftext|>"

## 1). Data Pre-Processing 

In [None]:
def create_training_data(csv_path):
    df = pd.read_csv(csv_path)
    # List of non-vegan keywords to check against the NER column
    animal_prods = {'milk', 'eggs', 'butter', 'meat', 'beef', 'chicken', 'fish', 'cheese', 'lard'}
    
    formatted_recipes = []
    
    for _, row in df.iterrows():
        # 1. Determine the Diet Tag
        ner_list = eval(row['NER'])
        is_vegan = all(item not in animal_prods for item in ner_list)
        diet_tag = VEGAN_TOKEN if is_vegan else NORMAL_TOKEN
        
        # 2. Build the structured string
        recipe_str = (
            f"{diet_tag} {TITLE_TOKEN} {row['title']} "
            f"{INGRED_TOKEN} {', '.join(ner_list)} "
            f"{STEPS_TOKEN} {' '.join(eval(row['directions']))} {EOS_TOKEN}"
        )
        formatted_recipes.append(recipe_str)
    
    return "\n".join(formatted_recipes)