In [1]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pip install -U bitsandbytes --quiet
!pip install -U faiss-cpu sentence-transformers transformers accelerate --quiet

In [None]:
import pandas as pd
import faiss
from sentence_transformers import SentenceTransformer
from transformers import BitsAndBytesConfig
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import numpy as np

In [None]:
recipes = pd.read_csv("/content/drive/My Drive/SP25/ECE 285/Project/Food Ingredients and Recipe Dataset with Image Name Mapping.csv")
supertracker = pd.read_excel("/content/drive/My Drive/SP25/ECE 285/Project/supertrackerfooddatabase.xlsx", sheet_name=None)
nutrients = supertracker["Nutrients"]

In [None]:
def format_nutrition(row):
    return f"{row['foodname']}: {row['_203 Protein (g)']}g protein, {row['_204 Total Fat (g)']}g fat, {row['_208 Energy (kcal)']} kcal"

def format_recipe(row):
    return f"Recipe: {row['Title']} — Ingredients: {row['Cleaned_Ingredients']}"

nutritional_docs = nutrients.apply(format_nutrition, axis=1).dropna().tolist()
recipe_docs = recipes.apply(format_recipe, axis=1).dropna().tolist()
kb_docs = nutritional_docs + recipe_docs

embedder = SentenceTransformer("all-MiniLM-L6-v2")
kb_embeddings = embedder.encode(kb_docs, show_progress_bar=True)
dimension = kb_embeddings[0].shape[0]

index = faiss.IndexFlatL2(dimension)
index.add(np.array(kb_embeddings))

Batches:   0%|          | 0/632 [00:00<?, ?it/s]

In [None]:
model_id = "unsloth/llama-3-8b-Instruct-bnb-4bit"

tokenizer = AutoTokenizer.from_pretrained(model_id)
# llm = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", load_in_8bit=True)

from transformers import BitsAndBytesConfig

bnb_config = BitsAndBytesConfig(load_in_4bit=True)

llm = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    quantization_config=bnb_config
)

generator = pipeline("text-generation", model=llm, tokenizer=tokenizer)



model.safetensors:   0%|          | 0.00/5.70G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/220 [00:00<?, ?B/s]

Device set to use cuda:0


In [None]:
def generate_meal_plan_prompt(user_profile: str, retrieved_chunks: list) -> str:
    context = "\n".join(retrieved_chunks)

    prompt = f"""
You are a certified AI nutritionist specializing in preventive care and chronic condition management.

Below is background medical information about a patient:
{user_profile}

You also have access to medically grounded nutrition data and recipe information:
{context}

Based on the profile and the retrieved information:
- Suggest a complete daily meal plan (breakfast, lunch, dinner, snacks).
- Ensure it supports management of the patient’s condition (e.g., hypertension, obesity).
- Prioritize foods and meal types shown in the context.

Format:
Meal Plan:
- Breakfast: ...
- Snack: ...
- Lunch: ...
- Snack: ...
- Dinner: ...

Respond only with the meal plan. Do not repeat the context or user profile.
"""
    return prompt.strip()


In [None]:
def retrieve_relevant_chunks(query, top_k=5):
    q_emb = embedder.encode([query])
    scores, indices = index.search(np.array(q_emb), top_k)
    return [kb_docs[i] for i in indices[0]]

def generate_rag_response(user_profile, top_k=5, max_tokens=300):
    retrieved = retrieve_relevant_chunks(user_profile, top_k)
    prompt = generate_meal_plan_prompt(user_profile, retrieved)
    output = generator(prompt, max_new_tokens=max_tokens)[0]["generated_text"]
    return output


In [None]:
query = "User profile: Age: 31, Diagnosis: Essential Hypertension (Stage 1), Duration of Hypertension: 5 years, Comorbidities: None (or list: diabetes, obesity, etc.), Blood Pressure Readings: Average 162/88 mm Hg (recent readings), Symptoms: Occasional headaches, mild shortness of breath; no chest pain, BMI: 31 kg/m² (Obese category), Relevant Laboratory Tests:, Electrocardiogram (ECG): Normal, Lipid Profile: Mildly elevated cholesterol, Kidney Function: Normal, Blood Glucose: Normal."
response = generate_rag_response(query)
print(response)

You are a certified AI nutritionist specializing in preventive care and chronic condition management.

Below is background medical information about a patient:
User profile: Age: 31, Diagnosis: Essential Hypertension (Stage 1), Duration of Hypertension: 5 years, Comorbidities: None (or list: diabetes, obesity, etc.), Blood Pressure Readings: Average 162/88 mm Hg (recent readings), Symptoms: Occasional headaches, mild shortness of breath; no chest pain, BMI: 31 kg/m² (Obese category), Relevant Laboratory Tests:, Electrocardiogram (ECG): Normal, Lipid Profile: Mildly elevated cholesterol, Kidney Function: Normal, Blood Glucose: Normal.

You also have access to medically grounded nutrition data and recipe information:
Palm hearts, cooked, no fat added: 2.7g protein, 0.2g fat, 115.0 kcal
Heart, cooked: 28.2398g protein, 4.69g fat, 164.0 kcal
Bacardi cocktail: 0.06g protein, 0.08g fat, 186.0 kcal
Mint julep: 0.0g protein, 0.0g fat, 240.0 kcal
No Fear energy drink: 0.42g protein, 0.0g fat, 6

User Query → Embedding → FAISS Search → Retrieved Context
→ Inject into LLM Prompt → Generate Grounded Response


In [None]:
import json
with open("/content/drive/My Drive/SP25/ECE 285/Project/input_profiles.json", "r") as f:
    profile_dict = json.load(f)


filtered_profiles = profile_dict["Hypertension"]

In [None]:
results = []

In [None]:
for user_profile in filtered_profiles[:10]:
    response = generate_rag_response(user_profile)
    results.append({"profile": user_profile, "meal_plan": response})

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


In [None]:
pd.DataFrame(results).to_csv("/content/drive/My Drive/SP25/ECE 285/Project/rag_meal_plans_hypertension.csv", index=False)