In [1]:
import chromadb
from IPython.display import display, Markdown

In [2]:
# Create a Persistent ChromaDB client (stores data locally)
client = chromadb.PersistentClient(path="./chroma_db")

In [3]:
# Create or get a collection
collection = client.get_or_create_collection(name="documents")

What is a Collection?
A collection in ChromaDB is similar to a table in a database. It stores vectors along with metadata (like text).

Each collection has:
✅ A unique name (in this case, "documents")
✅ Embeddings (numerical vectors representing data)
✅ Metadata (extra details like text descriptions)

In [4]:
# Add some example vectors (each with an ID and metadata)
collection.add(
    ids=["1", "2", "3"],
    embeddings=[
        [0.1, 0.2, 0.3],  # Vector 1
        [0.4, 0.5, 0.6],  # Vector 2
        [0.7, 0.8, 0.9]   # Vector 3
    ],
    metadatas=[
        {"text": "Hello world"},
        {"text": "ChromaDB is cool"},
        {"text": "Vector search example"}
    ]
)

Add of existing embedding ID: 1
Add of existing embedding ID: 2
Add of existing embedding ID: 3
Insert of existing embedding ID: 1
Insert of existing embedding ID: 2
Insert of existing embedding ID: 3
Add of existing embedding ID: 1
Add of existing embedding ID: 2
Add of existing embedding ID: 3


In [5]:
# Function to find closest match
def find_closest_match(query_vector, top_n=1):
    results = collection.query(
        query_embeddings=[query_vector],
        n_results=top_n
    )

    # Extract relevant details
    closest_match_text = results["metadatas"][0][0]["text"]
    distance_score = results["distances"][0][0]

    # Format output
    output_md = f"""
### 🔍 Query Vector: `{query_vector}`
✅ **Closest Match:** `{closest_match_text}`  
📏 **Distance Score:** `{distance_score:.6f}` _(Lower = Better Match)_
    """
    
    display(Markdown(output_md))  # Render formatted output in Jupyter Notebook

# Example Query
query_vector = [0.45, 0.55, 0.65]
find_closest_match(query_vector)


### 🔍 Query Vector: `[0.45, 0.55, 0.65]`
✅ **Closest Match:** `ChromaDB is cool`  
📏 **Distance Score:** `0.007500` _(Lower = Better Match)_
    

In [6]:
query_vector = [1.45, 0.55, 2.65]
find_closest_match(query_vector)


### 🔍 Query Vector: `[1.45, 0.55, 2.65]`
✅ **Closest Match:** `Vector search example`  
📏 **Distance Score:** `3.687501` _(Lower = Better Match)_
    

How collection.query() Works Internally in ChromaDB

In [7]:
from PIL import Image

def load_image_pil(image_path):
    image = Image.open(image_path)
    image.show()  # Opens the image in the default viewer
    return image

# Example call
image = load_image_pil("eucledian_distance.png")