In [6]:
# 1) Install & imports
!pip install --quiet sentence-transformers faiss-cpu langchain transformers accelerate

from sentence_transformers import SentenceTransformer
from langchain.text_splitter import RecursiveCharacterTextSplitter
import faiss, numpy as np

# 2) Paste the recipe text
doc_text = """
How to Make Simple Pancakes:

Ingredients:
– 1 cup all-purpose flour  
– 2 tablespoons sugar  
– 1 teaspoon baking powder  
– 1 teaspoon baking soda  
– 1 pinch of salt  
– 1 cup buttermilk  
– 1 egg  
– 2 tablespoons melted butter  

Instructions:
In a bowl, whisk together the flour, sugar, baking powder, baking soda, and salt.  
In another bowl, beat the egg with the buttermilk and melted butter until combined.  
Pour the wet ingredients into the dry ingredients and stir until just mixed—it's okay if there are a few lumps.  

Cooking:
Heat a non-stick skillet over medium heat and lightly grease with butter.  
Pour ¼ cup batter per pancake; cook until bubbles form on top (about 2 minutes), then flip and cook the other side until golden brown.  
Serve warm with maple syrup, fresh fruit, or your favorite toppings.
"""

# 3) Chunk into ~200-char pieces with 50-char overlap, honoring paragraphs
splitter = RecursiveCharacterTextSplitter(
    separators=["\n\n", "\n", " ", ""],
    chunk_size=200,
    chunk_overlap=50
)
chunks = splitter.split_text(doc_text)

print(f"Generated {len(chunks)} chunks:")
for i, c in enumerate(chunks):
    print(f"--- Chunk {i} ---\n{c}\n")


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Generated 8 chunks:
--- Chunk 0 ---
How to Make Simple Pancakes:

--- Chunk 1 ---
Ingredients:
– 1 cup all-purpose flour  
– 2 tablespoons sugar  
– 1 teaspoon baking powder  
– 1 teaspoon baking soda  
– 1 pinch of salt  
– 1 cup buttermilk  
– 1 egg

--- Chunk 2 ---
– 1 cup buttermilk  
– 1 egg  
– 2 tablespoons melted butter

--- Chunk 3 ---
Instructions:
In a bowl, whisk together the flour, sugar, baking powder, baking soda, and salt.  
In another bowl, beat the egg with the buttermilk and melted butter until combined.

--- Chunk 4 ---
Pour the wet ingredients into the dry ingredients and stir until just mixed—it's okay if there are a few lumps.

--- Chunk 5 ---
Cooking:
Heat a non-stick skillet over medium heat and lightly grease with butter.

--- Chunk 6 ---
Pour ¼ cup batter per pancake; cook until bubbles form on top (about 2 minutes), then flip and cook the other side until golden brown.

--- Chunk 7 ---
Serve warm with maple syrup, fresh fruit, or your favorite toppings.



In [7]:
# 1) Load a local embedding model
embed_model = SentenceTransformer("all-MiniLM-L6-v2")

# 2) Compute embeddings and normalize
vecs = embed_model.encode(chunks, convert_to_numpy=True).astype("float32")
vecs /= np.linalg.norm(vecs, axis=1, keepdims=True)

# 3) Build an in-memory FAISS index (cosine via dot-product)
index = faiss.IndexFlatIP(vecs.shape[1])
index.add(vecs)

print("✅ FAISS index built with", index.ntotal, "vectors")


✅ FAISS index built with 8 vectors


In [8]:
from transformers import pipeline

# 1) Instruction-tuned model pipeline
generator = pipeline(
    "text2text-generation",
    model="google/flan-t5-small",
    tokenizer="google/flan-t5-small"
)

# 2) Retrieval helper
def retrieve(question, k=2):
    qv = embed_model.encode([question], convert_to_numpy=True).astype("float32")
    qv /= np.linalg.norm(qv)
    D, I = index.search(qv, k)
    return [chunks[i] for i in I[0]]

# 3) Full RAG function
def rag(question):
    context = "\n\n---\n\n".join(retrieve(question))
    prompt = (
        "You are a cooking assistant. Use ONLY the context to answer.\n\n"
        f"Context:\n{context}\n\nQuestion:\n{question}\nAnswer:"
    )
    out = generator(prompt, max_length=200)[0]["generated_text"]
    return out.split("Answer:")[-1].strip()

# 4) Test with your three questions
for q in [
    "What dry ingredients do I need?",
    "How long should I cook each pancake side?",
    "What can I serve with the pancakes?"
]:
    print(f"\n🔎 Q: {q}")
    print("🟢 A:", rag(q))


Device set to use cpu
Both `max_new_tokens` (=256) and `max_length`(=200) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)



🔎 Q: What dry ingredients do I need?


Both `max_new_tokens` (=256) and `max_length`(=200) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


🟢 A: all-purpose flour – 2 tablespoons sugar – baking powder – baking soda – pinch of salt – buttermilk – egg

🔎 Q: How long should I cook each pancake side?


Both `max_new_tokens` (=256) and `max_length`(=200) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


🟢 A: 14 cup batter

🔎 Q: What can I serve with the pancakes?
🟢 A: warm with maple syrup, fresh fruit, or your favorite toppings
