Skip to content

Commit

Permalink
[BUG]: Adding validation check for "chroma:document" in metadata. (#1718
Browse files Browse the repository at this point in the history
)

Closes: #1717

## Description of changes

*Summarize the changes made by this PR.*
 - Improvements & Bug fixes
- Fixed an issue where if the metadata key is set to `chroma:document`
it is either ignore when inserting or overrides the actual document when
updating records by `id`

## Test plan
*How are these changes tested?*

- [x] Tests pass locally with `pytest` for python, `yarn test` for js

## Documentation Changes
N/A
  • Loading branch information
tazarov committed Feb 14, 2024
1 parent 4b656d9 commit da68516
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 2 deletions.
8 changes: 6 additions & 2 deletions chromadb/api/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

# Re-export types from chromadb.types
__all__ = ["Metadata", "Where", "WhereDocument", "UpdateCollectionMetadata"]

META_KEY_CHROMA_DOCUMENT = "chroma:document"
T = TypeVar("T")
OneOrMany = Union[T, List[T]]

Expand Down Expand Up @@ -265,6 +265,10 @@ def validate_metadata(metadata: Metadata) -> Metadata:
if len(metadata) == 0:
raise ValueError(f"Expected metadata to be a non-empty dict, got {metadata}")
for key, value in metadata.items():
if key == META_KEY_CHROMA_DOCUMENT:
raise ValueError(
f"Expected metadata to not contain the reserved key {META_KEY_CHROMA_DOCUMENT}"
)
if not isinstance(key, str):
raise TypeError(
f"Expected metadata key to be a str, got {key} which is a {type(key)}"
Expand Down Expand Up @@ -476,7 +480,7 @@ def validate_embeddings(embeddings: Embeddings) -> Embeddings:
raise ValueError(
f"Expected each embedding in the embeddings to be a list, got {embeddings}"
)
for i,embedding in enumerate(embeddings):
for i, embedding in enumerate(embeddings):
if len(embedding) == 0:
raise ValueError(
f"Expected each embedding in the embeddings to be a non-empty list, got empty embedding at pos {i}"
Expand Down
9 changes: 9 additions & 0 deletions chromadb/test/segment/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import tempfile
import pytest
from typing import Generator, List, Callable, Iterator, Dict, Optional, Union, Sequence

from chromadb.api.types import validate_metadata
from chromadb.config import System, Settings
from chromadb.db.base import ParameterValue, get_sql
from chromadb.db.impl.sqlite import SqliteDB
Expand Down Expand Up @@ -677,3 +679,10 @@ def test_delete_segment(
res = cur.execute(sql, params)
# assert that all FTS rows are gone
assert len(res.fetchall()) == 0


def test_metadata_validation_forbidden_key() -> None:
with pytest.raises(ValueError, match="chroma:document"):
validate_metadata(
{"chroma:document": "this is not the document you are looking for"}
)

0 comments on commit da68516

Please sign in to comment.