diff --git a/examples/image_search/README.md b/examples/image_search/README.md index 3299ab45..0aa2a7c9 100644 --- a/examples/image_search/README.md +++ b/examples/image_search/README.md @@ -2,23 +2,22 @@ ![image](https://github.com/user-attachments/assets/3a696344-c9b4-46e8-9413-6229dbb8672a) -- QDrant for Vector Storage +- Qdrant for Vector Storage - Ollama Gemma3 (Image to Text) - CLIP ViT-L/14 - Embeddings Model - Live Update ## Make sure Postgres and Qdrant are running ``` -docker run -d --name qdrant -p 6334:6334 qdrant/qdrant:latest +docker run -d -p 6334:6334 -p 6333:6333 qdrant/qdrant export COCOINDEX_DATABASE_URL="postgres://cocoindex:cocoindex@localhost/cocoindex" ``` -## Create QDrant Collection +## Create Qdrant Collection ``` -curl -X PUT - 'http://localhost:6333/collections/image_search' \ - --header 'Content-Type: application/json' \ - --data-raw '{ +curl -X PUT 'http://localhost:6333/collections/image_search' \ + -H 'Content-Type: application/json' \ + -d '{ "vectors": { "embedding": { "size": 768, @@ -26,7 +25,6 @@ curl -X PUT } } }' - ``` ## Run Ollama @@ -35,29 +33,28 @@ ollama pull gemma3 ollama serve ``` -## Create virtual environment and install dependencies -``` -python -m venv .venv -source .venv/bin/activate -pip install -r requirements.txt -``` - ### Place your images in the `img` directory. - No need to update manually. CocoIndex will automatically update the index as new images are added to the directory. ## Run Backend -``` -cocoindex setup main.py -uvicorn main:app --reload --host 0.0.0.0 --port 8000 -``` - -## Run Frontend -``` -cd frontend -npm install -npm run dev -``` +- Install dependencies: + ``` + pip install -e . + ``` + +- Run Backend + ``` + cocoindex setup main.py + uvicorn main:app --reload --host 0.0.0.0 --port 8000 + ``` + +- Run Frontend + ``` + cd frontend + npm install + npm run dev + ``` Go to `http://localhost:5174` to search. diff --git a/examples/image_search/main.py b/examples/image_search/main.py index 7ea2e9eb..abbc7443 100644 --- a/examples/image_search/main.py +++ b/examples/image_search/main.py @@ -7,9 +7,11 @@ from fastapi import FastAPI, Query from fastapi.middleware.cors import CORSMiddleware from fastapi.staticfiles import StaticFiles +from qdrant_client import QdrantClient OLLAMA_URL = "http://localhost:11434/api/generate" OLLAMA_MODEL = "gemma3" +QDRANT_GRPC_URL = os.getenv("QDRANT_GRPC_URL", "http://localhost:6334/") # 1. Extract caption from image using Ollama vision model @cocoindex.op.function(cache=True, behavior_version=1) @@ -42,7 +44,12 @@ def get_image_caption(img_bytes: bytes) -> str: # 2. Embed the caption string -def caption_to_embedding(caption: cocoindex.DataSlice) -> cocoindex.DataSlice: +@cocoindex.transform_flow() +def caption_to_embedding(caption: cocoindex.DataSlice[str]) -> cocoindex.DataSlice[list[float]]: + """ + Embed the caption using a CLIP model. + This is shared logic between indexing and querying. + """ return caption.transform( cocoindex.functions.SentenceTransformerEmbed( model="clip-ViT-L-14", @@ -70,7 +77,7 @@ def image_object_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scope: "img_embeddings", cocoindex.storages.Qdrant( collection_name="image_search", - grpc_url=os.getenv("QDRANT_GRPC_URL", "http://localhost:6334/"), + grpc_url=QDRANT_GRPC_URL, ), primary_key_fields=["id"], setup_by_user=True, @@ -93,26 +100,31 @@ def image_object_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scope: def startup_event(): load_dotenv() cocoindex.init() - app.state.query_handler = cocoindex.query.SimpleSemanticsQueryHandler( - name="ImageObjectSearch", - flow=image_object_embedding_flow, - target_name="img_embeddings", - query_transform_flow=caption_to_embedding, - default_similarity_metric=cocoindex.VectorSimilarityMetric.COSINE_SIMILARITY, + # Initialize Qdrant client + app.state.qdrant_client = QdrantClient( + url=QDRANT_GRPC_URL, + prefer_grpc=True ) app.state.live_updater = cocoindex.FlowLiveUpdater(image_object_embedding_flow) app.state.live_updater.start() @app.get("/search") def search(q: str = Query(..., description="Search query"), limit: int = Query(5, description="Number of results")): - query_handler = app.state.query_handler - results, _ = query_handler.search(q, limit, "embedding") + # Get the embedding for the query + query_embedding = caption_to_embedding.eval(q) + + # Search in Qdrant + search_results = app.state.qdrant_client.search( + collection_name="image_search", + query_vector=("embedding", query_embedding), + limit=limit + ) + + # Format results out = [] - for result in results: - row = dict(result.data) - # Only include filename and score + for result in search_results: out.append({ - "filename": row["filename"], + "filename": result.payload["filename"], "score": result.score }) return {"results": out} diff --git a/examples/image_search/pyproject.toml b/examples/image_search/pyproject.toml new file mode 100644 index 00000000..ac010d53 --- /dev/null +++ b/examples/image_search/pyproject.toml @@ -0,0 +1,9 @@ +[project] +name = "image-search" +version = "0.1.0" +description = "Simple example for cocoindex: build embedding index based on images." +requires-python = ">=3.11" +dependencies = ["cocoindex>=0.1.42", "python-dotenv>=1.0.1", "fastapi>=0.100.0"] + +[tool.setuptools] +packages = []