In [1]:
import os
import json
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np


2025-04-06 08:40:25.826238: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [7]:
with open("../data/combined/all_chunks.json", "r", encoding="utf-8") as f:
    chunks = json.load(f)
print(f"Loaded {len(chunks)} chunks")
#chunks

Loaded 370 chunks


In [9]:
model = SentenceTransformer("all-MiniLM-L6-v2")

texts = [chunk["chunk"] for chunk in chunks]
#texts

In [10]:
embeddings = model.encode(texts, show_progress_bar=True)

Batches:   0%|          | 0/12 [00:00<?, ?it/s]

In [12]:
import numpy as np
np.save("../data/combined/embeddings.npy", embeddings)

In [13]:


dimension = len(embeddings[0])
index = faiss.IndexFlatL2(dimension)
index.add(np.array(embeddings))
dimension

384

In [14]:
index

<faiss.swigfaiss_avx2.IndexFlatL2; proxy of <Swig Object of type 'faiss::IndexFlatL2 *' at 0x1679efb70> >

In [15]:
faiss.write_index(index, "../data/combined/faiss_index.index")

# Save metadata alongside
with open("../data/combined/chunk_metadata.json", "w", encoding="utf-8") as f:
    json.dump(chunks, f, indent=2, ensure_ascii=False)

### Saved the metadata and vectors, now is the time to test by sending some queries

- Load the saved faiss index and metadata
- Load the model to embed the query

In [19]:
# query = "meal ideas for 10-month-old baby with no eggs"
# query_embedding = model.encode([query])
# D, I = index.search(np.array(query_embedding), k=5)

# for idx in I[0]:
#     print(chunks[idx]["chunk"])
#     print("Source:", chunks[idx]["source_type"], "Title:", chunks[idx]["title"])
#     print("-----------")


In [3]:
import json
import faiss

with open("../data/combined/chunk_metadata.json", "r", encoding="utf-8") as f:
    chunk_metadata = json.load(f)

print(f"Loaded metadata for {len(chunk_metadata)} chunks")

index = faiss.read_index("../data/combined/faiss_index.index")

Loaded metadata for 370 chunks


In [6]:
from sentence_transformers import SentenceTransformer
import numpy as np

model = SentenceTransformer("all-MiniLM-L6-v2")

query = "meal ideas for 10-month-old baby"
query_vector = model.encode([query])

In [7]:

D, I = index.search(query_vector, k=6)
top_chunks = [chunk_metadata[i] for i in I[0]]

In [8]:
top_chunks

[{'chunk': "baby's diet should consist of a variety of the following: - fruit and vegetables, including ones with bitter flavours, such as broccoli, cauliflower, spinach and cabbage - potatoes, bread, rice, pasta and other starchy foods - beans, pulses, fish, eggs, meat and other non-dairy sources of protein - pasteurised full-fat dairy products, such as plain yoghurt and cheese (choose lower salt options) As your baby becomes a more confident eater, remember to offer them more mashed, lumpy and finger foods. Providing finger foods as part of each meal helps encourage infants to feed themselves, develop hand and eye co-ordination, and learn to bite off, chew and swallow pieces of soft food. Remember, babies do not need salt or sugar added to their food (or cooking water). Feeding your baby: from 10 to 12 months From about 10 months, your baby should now be having 3 meals a day (breakfast, lunch",
  'section': 'Baby-led weaning',
  'source_url': 'https://www.nhs.uk/conditions/baby/weani

### This is not working well because the allergen is suggested in the meal option!!! 

- Need to work on filtering after faiss embedding
- First check how LLM is going to help already

In [9]:
from dotenv import load_dotenv
import os

In [10]:
# Load from .env file
load_dotenv()

# Get the key
openai_api_key = os.getenv("OPENAI_API_KEY")

if openai_api_key:
    print("API key loaded successfully")
else:
    print("Failed to load API key")

API key loaded successfully


In [11]:
import openai

openai.api_key = openai_api_key


In [12]:
user_query = "What are some meal ideas for a 10-month-old baby with an egg allergy?"

# Join top chunks
context = "\n\n".join(chunk["chunk"] for chunk in top_chunks)

prompt = f"""You are a baby nutrition assistant helping new parents.

Answer the user's question using the information below.
Avoid meals that include ingredients the user is allergic to.

User question:
{user_query}

Context:
{context}
"""


In [16]:
response = openai.chat.completions.create(
    model="gpt-3.5-turbo",
    messages=[
        {"role": "system", "content": "You are a helpful baby nutrition assistant."},
        {"role": "user", "content": prompt}
    ],
    temperature=0.3
)

print("--- Prompt Context ---")
print(context)

print("\n--- GPT Answer ---")
print(response.choices[0].message.content)

--- Prompt Context ---
baby's diet should consist of a variety of the following: - fruit and vegetables, including ones with bitter flavours, such as broccoli, cauliflower, spinach and cabbage - potatoes, bread, rice, pasta and other starchy foods - beans, pulses, fish, eggs, meat and other non-dairy sources of protein - pasteurised full-fat dairy products, such as plain yoghurt and cheese (choose lower salt options) As your baby becomes a more confident eater, remember to offer them more mashed, lumpy and finger foods. Providing finger foods as part of each meal helps encourage infants to feed themselves, develop hand and eye co-ordination, and learn to bite off, chew and swallow pieces of soft food. Remember, babies do not need salt or sugar added to their food (or cooking water). Feeding your baby: from 10 to 12 months From about 10 months, your baby should now be having 3 meals a day (breakfast, lunch

enjoying a wide range of tastes and textures. They should be able to manage a wi

### The LLM works perfectly but it generated it's own recipes, used the NHS guildlines for safety regulation but not the recipes

- Trying to use some filetring 
- also, pick like 2 chunks from NHS and 3 from recipes

In [18]:
def filter_chunks_by_allergen(chunks, excluded_allergens):
    filtered = []
    for chunk in chunks:
        ingredients = chunk.get("ingredients", [])
        joined = " ".join(ingredients).lower()
        if not any(allergen in joined for allergen in excluded_allergens):
            filtered.append(chunk)
    return filtered

filtered_chunks = filter_chunks_by_allergen(top_chunks, ["egg"])


In [19]:
filtered_chunks

[{'chunk': "baby's diet should consist of a variety of the following: - fruit and vegetables, including ones with bitter flavours, such as broccoli, cauliflower, spinach and cabbage - potatoes, bread, rice, pasta and other starchy foods - beans, pulses, fish, eggs, meat and other non-dairy sources of protein - pasteurised full-fat dairy products, such as plain yoghurt and cheese (choose lower salt options) As your baby becomes a more confident eater, remember to offer them more mashed, lumpy and finger foods. Providing finger foods as part of each meal helps encourage infants to feed themselves, develop hand and eye co-ordination, and learn to bite off, chew and swallow pieces of soft food. Remember, babies do not need salt or sugar added to their food (or cooking water). Feeding your baby: from 10 to 12 months From about 10 months, your baby should now be having 3 meals a day (breakfast, lunch",
  'section': 'Baby-led weaning',
  'source_url': 'https://www.nhs.uk/conditions/baby/weani

In [20]:
nhs_chunks = [c for c in filtered_chunks if c["source_type"] == "nhs"]
recipe_chunks = [c for c in filtered_chunks if c["source_type"] == "recipe"]


In [23]:
recipe_chunks

[{'chunk': 'serving. Freeze any leftovers in small pots or ice cube trays. Add any left over fruit to cereal or porridge for a tasty breakfast. Weaning tip Try mashed and finger foods as soon as your baby is ready, to help them learn to chew and swallow solid foods.',
  'chunk_id': 'fruity_stew_1',
  'title': 'Fruity Stew',
  'description': 'First tastes don’t have to be hard work! These warm stewed apples are easy to whip up and a great introduction to fruit for your baby.',
  'source_url': 'https://www.nhs.uk/start-for-life/baby/recipes-and-meal-ideas/fruity-stew/',
  'scraped_at': '2025-04-04T07:45:17.167592',
  'tokens': 62,
  'age_range': '6 months or older',
  'prep_time': '5 mins',
  'cook_time': '10 mins',
  'portions': 6,
  'ingredients': ['1 large apple, peeled, cored and chopped', '75ml water'],
  'weaning_tip': None,
  'meal_type': 'unknown',
  'source_type': 'recipe'}]

In [21]:
context_chunks = nhs_chunks[:3] + recipe_chunks[:2]
context = "\n\n".join(chunk["chunk"] for chunk in context_chunks)


In [22]:
user_query = "Meal ideas for 10-month-old with egg allergy"

prompt = f"""You are a helpful baby nutrition assistant.

Answer the user’s question using only the provided context.
Avoid any suggestions that include egg or unsafe foods for a 10-month-old.

User question:
{user_query}

Context:
{context}
"""

response = openai.chat.completions.create(
    model="gpt-3.5-turbo",
    messages=[
        {"role": "system", "content": "You are a helpful baby nutrition assistant."},
        {"role": "user", "content": prompt}
    ],
    temperature=0.3
)

print(response.choices[0].message.content)


For a 10-month-old with an egg allergy, you can consider offering a variety of fruits, vegetables, starchy foods like potatoes and rice, beans, pulses, fish, and meat as non-dairy sources of protein. You can also include pasteurized full-fat dairy products like plain yogurt and cheese (choose lower salt options). Remember to avoid adding salt or sugar to their food. As your baby becomes more confident with eating, offer mashed, lumpy, and finger foods to encourage self-feeding and develop hand-eye coordination. Be patient and keep offering a variety of fruits and vegetables, including those with bitter flavors like broccoli, cauliflower, spinach, and cabbage. You can mix chopped or mashed vegetables with other foods like rice, mashed potatoes, or meat sauces. Additionally, consider offering fruit-based desserts or mixing fruit with cereal or porridge for breakfast.


### Observation
- It picked up NHS guideline and some recipes. Recipes clearly avoided eggs and general NHS guideline is fine with eggs
- it created a very strong answer but not a meal really
- so going to try a few more filters for now, assuming over optimization will happen for one kind of query

In [24]:
recipe_chunks = [c for c in filtered_chunks if c["source_type"] == "recipe"]

In [25]:
recipe_chunks

[{'chunk': 'serving. Freeze any leftovers in small pots or ice cube trays. Add any left over fruit to cereal or porridge for a tasty breakfast. Weaning tip Try mashed and finger foods as soon as your baby is ready, to help them learn to chew and swallow solid foods.',
  'chunk_id': 'fruity_stew_1',
  'title': 'Fruity Stew',
  'description': 'First tastes don’t have to be hard work! These warm stewed apples are easy to whip up and a great introduction to fruit for your baby.',
  'source_url': 'https://www.nhs.uk/start-for-life/baby/recipes-and-meal-ideas/fruity-stew/',
  'scraped_at': '2025-04-04T07:45:17.167592',
  'tokens': 62,
  'age_range': '6 months or older',
  'prep_time': '5 mins',
  'cook_time': '10 mins',
  'portions': 6,
  'ingredients': ['1 large apple, peeled, cored and chopped', '75ml water'],
  'weaning_tip': None,
  'meal_type': 'unknown',
  'source_type': 'recipe'}]

In [28]:
context_chunks = nhs_chunks[:3] + recipe_chunks[:2] 
context = "\n\n".join(chunk["chunk"] for chunk in context_chunks)


In [29]:
prompt = f"""You are a helpful baby nutrition assistant.

Answer the user’s question using only the provided context.
Avoid meals with egg. If suitable, suggest a specific meal or recipe mentioned.

User question:
{user_query}

Context:
{context}
"""


In [30]:
response = openai.chat.completions.create(
    model="gpt-3.5-turbo",
    messages=[
        {"role": "system", "content": "You are a helpful baby nutrition assistant."},
        {"role": "user", "content": prompt}
    ],
    temperature=0.3
)

print(response.choices[0].message.content)


Meal ideas for a 10-month-old with an egg allergy could include mashed vegetables with rice, mashed potatoes, or meat sauces. You can also try adding vegetables to dishes like cottage or shepherd's pie, spaghetti bolognese, or casseroles. For a snack, consider offering carrot sticks, cucumber sticks, or slices of pepper with hummus. Remember to avoid meals with egg and continue offering a variety of fruits and vegetables in every meal.


In [31]:
# Reformat recipe chunks to include recipe title as part of the chunk text
for chunk in recipe_chunks:
    chunk["chunk"] = f"Recipe: {chunk['title']}\n{chunk['chunk']}"


In [32]:
context_chunks = nhs_chunks[:3] + recipe_chunks[:2]
context = "\n\n".join(chunk["chunk"] for chunk in context_chunks)


In [43]:
prompt = f"""You are a helpful baby nutrition assistant.

You are a baby nutrition assistant helping new parents.

Answer the user's question using only the provided context.
If the context includes a named recipe, describe it in full (ingredients + steps), and mention why it is safe for the user's allergy.
Also suggest what ingredients can replace the allergen (e.g. other protein sources if eggs are avoided), and how they can be added to meals to keep them balanced and nutritious.

Only use information from the context. Do not invent recipes or ingredients that are not present in the context.

User question:
{user_query}

Context:
{context}
"""


In [44]:
response = openai.chat.completions.create(
    model="gpt-3.5-turbo",
    messages=[
        {"role": "system", "content": "You are a helpful baby nutrition assistant."},
        {"role": "user", "content": prompt}
    ],
    temperature=0.3
)

print(response.choices[0].message.content)


For a 10-month-old with an egg allergy, you can consider the following meal ideas:

1. **Fruity Stew**: This recipe is a great option for your baby and is safe for an egg allergy. It contains a variety of fruits and is easy to prepare. Here's how you can make it:
   - Ingredients: Assorted fruits (such as apples, pears, peaches, and berries), water
   - Steps: 
     1. Wash and peel the fruits, removing any seeds or pits.
     2. Chop the fruits into small, baby-friendly pieces.
     3. Place the fruits in a saucepan with a little water.
     4. Cook the fruits over low heat until they are soft and can be easily mashed.
     5. Mash or puree the fruits to the desired consistency.
     6. Let it cool before serving to your baby.

To replace the protein from eggs in your baby's diet, you can include other non-dairy sources of protein such as beans, pulses, fish, and meat. For example, you can add mashed beans or lentils to pureed vegetables, mix flaked fish into mashed potatoes, or offer

### Tried many prompts and the answers were:
- 1st only the list of egg replacement 
- 2nd only the recipe
- 3rd answer had both
- now going to try other questions with the final prompt
    - What should I do if my baby refuses solid food?
    - Can I give salt to my 9-month-old baby?
    - What should a typical day of meals look like for a 10-month-old?
    - Can I give my baby Kombucha?
    - Meal ideas for 10-month-old with egg allergy

In [64]:
# 🔄 CHANGE THIS LINE for each test
user_query = "When should I introduce peanuts?"

# 1. Embed the query
query_vector = model.encode([user_query])

# 2. Search FAISS
D, I = index.search(np.array(query_vector), k=6)
retrieved_chunks = [chunk_metadata[i] for i in I[0]]

# 3. (Optional) Allergen filter — disabled here
# filtered_chunks = filter_chunks_by_allergen(retrieved_chunks, ["egg"])
filtered_chunks = retrieved_chunks  # For general queries

# 4. Categorize by source
nhs_chunks = [c for c in filtered_chunks if c.get("source_type") == "nhs"]
recipe_chunks = [c for c in filtered_chunks if c.get("source_type") == "recipe"]

# 5. Inject recipe title into chunk if needed
for chunk in recipe_chunks:
    chunk["chunk"] = f"Recipe: {chunk['title']}\n{chunk['chunk']}"

# 6. Blend context (adjust counts as needed)
context_chunks = nhs_chunks[:4] + recipe_chunks[:1]
context = "\n\n".join(c["chunk"] for c in context_chunks)

# 7. Build the prompt
prompt = f"""You are a helpful baby nutrition assistant.

Answer the user’s question using only the information in the context below.
If the context contains any named recipes, you may mention them only if relevant.
Always prioritize safe advice. If you're unsure, say you don't know.

User question:
{user_query}

Context:
{context}
"""

# 8. Call GPT
response = openai.chat.completions.create(
    model="gpt-3.5-turbo",
    messages=[
        {"role": "system", "content": "You are a helpful baby nutrition assistant."},
        {"role": "user", "content": prompt}
    ],
    temperature=0.3
)

# 9. Show response
print("GPT Answer:\n")
print(response.choices[0].message.content)


GPT Answer:

You can introduce nuts and peanuts to your baby from around 6 months old, as long as they are crushed, ground, or in the form of a smooth nut or peanut butter. If there is a history of food allergies in your family, it's best to consult with your GP or health visitor before introducing nuts and peanuts.
