In [6]:
import chromadb
from sentence_transformers import SentenceTransformer
from typing import List, Dict, Any, Optional

# Load embedding model
EMBEDDING_MODEL = SentenceTransformer("all-MiniLM-L6-v2")

# Initialize ChromaDB
chroma_client = chromadb.PersistentClient(path="./chromadb")
collection = chroma_client.get_or_create_collection(name="video_description")


def hybrid_search(
    collection,
    query: str,
    n_results: int = 5,
    filters: Optional[Dict[str, Any]] = None,
    embedding_model=EMBEDDING_MODEL
) -> Dict[str, Any]:
    """
    Performs a hybrid search on ChromaDB using vector similarity and metadata filters.

    Args:
        collection: The ChromaDB collection.
        query (str): The search query.
        n_results (int): Number of results to retrieve.
        filters (Optional[Dict[str, Any]]): Metadata filters for keyword-based search.

    Returns:
        Dict[str, Any]: Search results from ChromaDB.
    """
    query_vector = embedding_model.encode(query).tolist()
    
    # Perform query with optional filtering
    results = collection.query(
        query_embeddings=[query_vector], 
        n_results=n_results,  
        where=filters if filters else None  # Apply filters if provided
    )

    return results





In [9]:
# Example Usage:
search_query = "Zurich, Switzerland,"
# filter_criteria = {"category": "documentary", "tags": {"$contains": "ocean"}}
filter_criteria = {}

search_results = hybrid_search(collection, search_query, n_results=1, filters=filter_criteria)

print("Search Results:", search_results)

Search Results: {'ids': [['Switz_IMG_0280']], 'embeddings': None, 'documents': [[None]], 'uris': None, 'data': None, 'metadatas': [[{'created': '2024-09-19T08:03:09Z', 'duration': 4, 'file_name': 'IMG_0280', 'folder_name': 'Switz', 'id': 'Switz_IMG_0280', 'location': '21, Limmatstrasse, Gewerbeschule, Industriequartier, Zurich, District Zurich, Zurich, 8005, Switzerland', 'video_description': 'The video begins with a serene scene of a white and blue tramcar on a quiet street in Zurich, Switzerland, with a crane visible in the background, setting the tone for a tranquil urban landscape. As the video progresses, the tram is shown traveling to the right, displaying destinations such as Frankenviad and Frankenstein, with buildings, cars, and a red crane in the background, conveying a sense of daytime activity. The scene then shifts to a blue and white street car traversing a city street with tram tracks, showcasing two large buildings and a tree on the right side, emphasizing urban life an

In [7]:
print(collection.count())  # Should return the number of stored embeddings


1


In [10]:
sample = collection.get(limit=1)  # Fetch first stored item
print(sample)


{'ids': ['Switz_IMG_0280'], 'embeddings': None, 'documents': [None], 'uris': None, 'data': None, 'metadatas': [{'created': '2024-09-19T08:03:09Z', 'duration': 4, 'file_name': 'IMG_0280', 'folder_name': 'Switz', 'id': 'Switz_IMG_0280', 'location': '21, Limmatstrasse, Gewerbeschule, Industriequartier, Zurich, District Zurich, Zurich, 8005, Switzerland', 'video_description': 'The video begins with a serene scene of a white and blue tramcar on a quiet street in Zurich, Switzerland, with a crane visible in the background, setting the tone for a tranquil urban landscape. As the video progresses, the tram is shown traveling to the right, displaying destinations such as Frankenviad and Frankenstein, with buildings, cars, and a red crane in the background, conveying a sense of daytime activity. The scene then shifts to a blue and white street car traversing a city street with tram tracks, showcasing two large buildings and a tree on the right side, emphasizing urban life and transportation. The