# Create Vector DB with ChromaDB for Moroccan Recipes and Meals :

In [14]:
import pandas as pd
import chromadb
from langchain_ollama import OllamaEmbeddings
from tqdm import tqdm

## Read the Moroccan Recipes and Meals from CSV :

In [9]:
df = pd.read_csv('moroccan_recipes_dataset.csv')
print(df.shape)
df.head()

(4627, 2)


Unnamed: 0,prompt,completion
0,Recipe Name : Duck and prune tagine\n Descript...,['Pour the oil into a heavy-based pan or casse...
1,Recipe Name : Potato tagine with baked halloum...,['Heat the oven to 180°C fan/gas 6. Heat the o...
2,Recipe Name : Merguez sausages with crispy chi...,['Heat the oven to 180°C fan/gas 6. Put the ch...
3,Recipe Name : Chicken tagine casserole\n Descr...,['Heat the oven to 180ºC fan/gas 6. Slash the ...
4,Recipe Name : Preserved lemons\n Description :...,['Juice 4 of the lemons and set the juice asid...


### -> 'prompt' Column Contains : Recipe Name, Description, Ingredients and Steps for preparation

In [4]:
df['prompt'][0]

"Recipe Name : Duck and prune tagine\n Description : Duck legs are the ultimate choice for low-and-slow cooking, as they become fall-apart tender with ease. What’s more they’re flavoursome enough to stand up to lots of spices. It’s also a beautiful meat to match with fruit, which this tagine effortlessly proves. This recipe can be made ahead and frozen – see the tips below. Discover more brilliant batch cooking recipes.\n Ingredients : ['2 tbsp olive oil', '\\xa02 cinnamon sticks', '3 red onions, finely sliced', '5 garlic cloves, finely sliced', '2 tbsp harissa', '4 tbsp ras el hanout', '2 tsp ground ginger', '4 large duck legs', '1 litre chicken stock', '2 x 400g tins chopped tomatoes', '2 preserved lemons', '400g tin chickpeas, drained', '150g pitted kalamata olives', '200g soft pitted prunes']\n Steps for preparation : ['Pour the oil into a heavy-based pan or casserole over a medium heat, then add the cinnamon sticks followed by the onions, garlic and a pinch of salt. Cook for about

## Transform Data :

## Load Data to ChromaDB

In [10]:
embedding_model = OllamaEmbeddings(model="mxbai-embed-large:latest")
client = chromadb.PersistentClient(path='Moroccan_Recipes_ChromaDB')
collection = client.get_or_create_collection(name='recipes')

In [11]:
def generate_embeddings(text: str):
    return embedding_model.embed_query(text)

In [17]:
def load_to_chroma(document: dict):
    """Load a document into ChromaDB with its embedding."""
    embedding = generate_embeddings(document["content"])
    metadata = document.get("metadata", {})
    doc_id = metadata.get("chunk_id", "default_id")
        
    collection.add(
        documents=[document["content"]],
        embeddings=[embedding],
        metadatas=[metadata],
        ids=[doc_id]
    )

In [16]:
for i in tqdm(range(df.shape[0])) :
    row_text = df['prompt'][i]
    
    row_text_id = f"row_text_{i}"
    document = {
        "content": row_text,
        "metadata": {
            "chunk_id": row_text_id, 
            "chunk_index": i,
    }}
    
    load_to_chroma(document)

  0%|▎                                                                             | 22/4627 [01:18<4:34:33,  3.58s/it]


KeyboardInterrupt: 