In [15]:
import chromadb
import ollama

def add_documents_to_chromadb(documents, db_path):
    client = chromadb.PersistentClient(path=db_path)

    try:
        collection = client.get_collection(name="docs")
    except:
        collection = client.create_collection(name="docs")

    for i, doc in enumerate(documents):
        content = doc["content"]
        response = ollama.embeddings(model="mxbai-embed-large", prompt=content)
        embedding = response["embedding"]

        collection.add(
            ids=[str(i)],
            embeddings=[embedding],
            documents=[content],
            metadatas=[{"dept": doc["dept"]}]
        )

def search_department_in_chromadb(db_path, department, query, n_results):
    client = chromadb.PersistentClient(path=db_path)
    collection = client.get_collection("docs")
    
    response = ollama.embeddings(model="mxbai-embed-large", prompt=query)
    query_embedding = response["embedding"]
    
    results = collection.query(
        query_embeddings=[query_embedding],
        n_results=n_results,
        where={"dept": department}
    )
    
    return results


In [16]:
documents = [
    {"dept": "HR", "content": "HR is responsible for recruiting new employees."},
    {"dept": "HR", "content": "HR handles employee benefits and payroll."},
    {"dept": "IT", "content": "IT manages the company’s internal network."},
    {"dept": "IT", "content": "IT provides technical support to employees."},
    {"dept": "Finance", "content": "Finance manages the company’s budget."},
    {"dept": "Finance", "content": "Finance oversees all financial transactions."},
    {"dept": "Marketing", "content": "Marketing handles the promotion of products."},
    {"dept": "Marketing", "content": "Marketing analyzes market trends and competitors."}
]

db_path = "docs/"


In [17]:
add_documents_to_chromadb(documents, db_path)


Insert of existing embedding ID: 0
Add of existing embedding ID: 0
Insert of existing embedding ID: 1
Add of existing embedding ID: 1
Insert of existing embedding ID: 2
Add of existing embedding ID: 2
Insert of existing embedding ID: 3
Add of existing embedding ID: 3
Insert of existing embedding ID: 4
Add of existing embedding ID: 4
Insert of existing embedding ID: 5
Add of existing embedding ID: 5
Insert of existing embedding ID: 6
Add of existing embedding ID: 6
Insert of existing embedding ID: 7
Add of existing embedding ID: 7


In [22]:
department = "Marketing"
query = "Products"
n_results = 3
search_results = search_department_in_chromadb(db_path, department, query,n_results)
from rich import print
print(search_results)


In [19]:
search_results

{'ids': [['2', '3']],
 'distances': [[181.65177816464146, 267.6069437780134]],
 'metadatas': [[{'dept': 'IT'}, {'dept': 'IT'}]],
 'embeddings': None,
 'documents': [['IT manages the company’s internal network.',
   'IT provides technical support to employees.']],
 'uris': None,
 'data': None,
 'included': ['metadatas', 'documents', 'distances']}