Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[BUG]: Adding validation check for "chroma:document" in metadata. #1718

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions chromadb/api/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

# Re-export types from chromadb.types
__all__ = ["Metadata", "Where", "WhereDocument", "UpdateCollectionMetadata"]

META_KEY_CHROMA_DOCUMENT = "chroma:document"
T = TypeVar("T")
OneOrMany = Union[T, List[T]]

Expand Down Expand Up @@ -265,6 +265,10 @@ def validate_metadata(metadata: Metadata) -> Metadata:
if len(metadata) == 0:
raise ValueError(f"Expected metadata to be a non-empty dict, got {metadata}")
for key, value in metadata.items():
if key == META_KEY_CHROMA_DOCUMENT:
raise ValueError(
f"Expected metadata to not contain the reserved key {META_KEY_CHROMA_DOCUMENT}"
)
if not isinstance(key, str):
raise TypeError(
f"Expected metadata key to be a str, got {key} which is a {type(key)}"
Expand Down Expand Up @@ -476,7 +480,7 @@ def validate_embeddings(embeddings: Embeddings) -> Embeddings:
raise ValueError(
f"Expected each embedding in the embeddings to be a list, got {embeddings}"
)
for i,embedding in enumerate(embeddings):
for i, embedding in enumerate(embeddings):
if len(embedding) == 0:
raise ValueError(
f"Expected each embedding in the embeddings to be a non-empty list, got empty embedding at pos {i}"
Expand Down
9 changes: 9 additions & 0 deletions chromadb/test/segment/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import tempfile
import pytest
from typing import Generator, List, Callable, Iterator, Dict, Optional, Union, Sequence

from chromadb.api.types import validate_metadata
from chromadb.config import System, Settings
from chromadb.db.base import ParameterValue, get_sql
from chromadb.db.impl.sqlite import SqliteDB
Expand Down Expand Up @@ -677,3 +679,10 @@ def test_delete_segment(
res = cur.execute(sql, params)
# assert that all FTS rows are gone
assert len(res.fetchall()) == 0


def test_metadata_validation_forbidden_key() -> None:
with pytest.raises(ValueError, match="chroma:document"):
validate_metadata(
{"chroma:document": "this is not the document you are looking for"}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lol

)
Loading