ChromaDB Setup

In [1]:
import os
from dotenv import load_dotenv
load_dotenv()

if not os.environ.get("GROQ_API_KEY"):
    raise RuntimeError("GROQ_API_KEY not found in environment")

In [2]:
import chromadb
from chromadb.config import Settings

chroma_settings = Settings(persist_directory="./chroma_db")
client = chromadb.Client(settings=chroma_settings)
collection = client.get_or_create_collection(name="food_info")

In [3]:
#Generate embeddings using HuggingFace
from langchain.embeddings import HuggingFaceEmbeddings
embedder = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

  embedder = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
  from .autonotebook import tqdm as notebook_tqdm


sample data

In [4]:
docs = [
    "Apple: 95 kcal, 0.3 g fat, 0.5 g protein",
    "Banana: 105 kcal, 0.4 g fat, 1.3 g protein"
]
metas = [{"food": "apple"}, {"food": "banana"}]
ids = ["apple_1", "banana_1"]

adding to chroma

In [6]:
embs = embedder.embed_documents(docs)
collection.add(
    documents=docs,
    embeddings=embs,
    metadatas=metas,
    ids=ids,
)

using langchain to connect chroma vector store

In [7]:
from langchain.vectorstores import Chroma as LCChroma

vectorstore = LCChroma(
    persist_directory="./chroma_db",
    collection_name="food_info",
    embedding_function=embedder,
)

retriever = vectorstore.as_retriever(search_kwargs={"k": 1})

  vectorstore = LCChroma(


In [16]:
from langchain.llms import OpenAI
from langchain.chat_models import init_chat_model
llm = init_chat_model(
    model="llama3-8b-8192", 
    model_provider="groq",
    temperature=0.2,
    max_tokens=256
)


creating retrievalQA chain

In [13]:
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

custom_prompt = PromptTemplate.from_template("""
Use only the following context to answer the question as briefly and factually as possible.
If the answer is numerical or specific, quote it exactly.

Context:
{context}

Question: {question}
Answer:
""")

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    chain_type="stuff",  # default, but explicit
    chain_type_kwargs={"prompt": custom_prompt}
)


In [17]:
query = "How many kcal are there in bananas?"
answer = qa_chain.run(query)
print("🧠 Answer:", answer)

🧠 Answer: According to the United States Department of Agriculture (USDA), one medium-sized banana (approximately 100g) contains 105 calories (kcal).


## Creating vector embeddings of our 101 food images

In [1]:
import pandas as pd
from sentence_transformers import SentenceTransformer
import chromadb
from chromadb.config import Settings
import os

# Load the dataset
data_path = "data/food_macros_100_realnames.csv"
df = pd.read_csv(data_path)

# Load CLIP text encoder model (ViT-B/32 text projection part)
model = SentenceTransformer("clip-ViT-B-32")

# Initialize ChromaDB client
client = chromadb.PersistentClient(path="./chroma_db")
collection = client.get_or_create_collection(name="food_macros_clip")

# Create embeddings and store in ChromaDB
for i, row in df.iterrows():
    food_description = f"{row['Food']} with {row['Protein (g)']}g protein, {row['Carbs (g)']}g carbs, {row['Fat (g)']}g fat and {row['Calories']} calories"
    embedding = model.encode(food_description).tolist()

    collection.add(
        ids=[f"food_{i}"],
        documents=[food_description],
        metadatas=[{
            "food": row['Food'],
            "calories": row['Calories'],
            "protein": row['Protein (g)'],
            "carbs": row['Carbs (g)'],
            "fat": row['Fat (g)']
        }],
        embeddings=[embedding]
    )


  from .autonotebook import tqdm as notebook_tqdm
Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


In [2]:
collection.count()

100