Skip to content

Commit

Permalink
[BUG]: Deleting FTS entries upon deletion of individual records (chro…
Browse files Browse the repository at this point in the history
…ma-core#1689)

Refs: chroma-core#1664

## Description of changes

*Summarize the changes made by this PR.*
 - Improvements & Bug fixes
	 - Follow up on chroma-core#1664 

## Test plan
*How are these changes tested?*

- [x] Tests pass locally with `pytest` for python

## Documentation Changes

N/A
  • Loading branch information
tazarov authored and atroyn committed Apr 3, 2024
1 parent dcf5030 commit 0a7e7a6
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 0 deletions.
19 changes: 19 additions & 0 deletions chromadb/segment/impl/metadata/sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,13 +404,31 @@ def insert_into_fulltext_search() -> None:
def _delete_record(self, cur: Cursor, record: EmbeddingRecord) -> None:
"""Delete a single EmbeddingRecord from the DB"""
t = Table("embeddings")
fts_t = Table("embedding_fulltext_search")
q = (
self._db.querybuilder()
.from_(t)
.where(t.segment_id == ParameterValue(self._db.uuid_to_db(self._id)))
.where(t.embedding_id == ParameterValue(record["id"]))
.delete()
)
q_fts = (
self._db.querybuilder()
.from_(fts_t)
.delete()
.where(
fts_t.rowid.isin(
self._db.querybuilder()
.from_(t)
.select(t.id)
.where(
t.segment_id == ParameterValue(self._db.uuid_to_db(self._id))
)
.where(t.embedding_id == ParameterValue(record["id"]))
)
)
)
cur.execute(*get_sql(q_fts))
sql, params = get_sql(q)
sql = sql + " RETURNING id"
result = cur.execute(sql, params).fetchone()
Expand All @@ -422,6 +440,7 @@ def _delete_record(self, cur: Cursor, record: EmbeddingRecord) -> None:
# Manually delete metadata; cannot use cascade because
# that triggers on replace
metadata_t = Table("embedding_metadata")

q = (
self._db.querybuilder()
.from_(metadata_t)
Expand Down
59 changes: 59 additions & 0 deletions chromadb/test/segment/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -681,6 +681,65 @@ def test_delete_segment(
assert len(res.fetchall()) == 0


def test_delete_single_fts_record(
system: System,
sample_embeddings: Iterator[SubmitEmbeddingRecord],
produce_fns: ProducerFn,
) -> None:
producer = system.instance(Producer)
system.reset_state()
topic = str(segment_definition["topic"])

segment = SqliteMetadataSegment(system, segment_definition)
segment.start()

embeddings, seq_ids = produce_fns(producer, topic, sample_embeddings, 10)
max_id = seq_ids[-1]

sync(segment, max_id)

assert segment.count() == 10
results = segment.get_metadata(ids=["embedding_0"])
assert_equiv_records(embeddings[:1], results)
_id = segment._id
_db = system.instance(SqliteDB)
# Delete by ID
delete_embedding = SubmitEmbeddingRecord(
id="embedding_0",
embedding=None,
encoding=None,
metadata=None,
operation=Operation.DELETE,
collection_id=uuid.UUID(int=0),
)
max_id = produce_fns(producer, topic, (delete_embedding for _ in range(1)), 1)[1][
-1
]
t = Table("embeddings")

sync(segment, max_id)
fts_t = Table("embedding_fulltext_search")
q_fts = (
_db.querybuilder()
.from_(fts_t)
.select()
.where(
fts_t.rowid.isin(
_db.querybuilder()
.from_(t)
.select(t.id)
.where(t.segment_id == ParameterValue(_db.uuid_to_db(_id)))
.where(t.embedding_id == ParameterValue(delete_embedding["id"]))
)
)
)
sql, params = get_sql(q_fts)
with _db.tx() as cur:
res = cur.execute(sql, params)
# assert that the ids that are deleted from the segment are also gone from the fts table
assert len(res.fetchall()) == 0


def test_metadata_validation_forbidden_key() -> None:
with pytest.raises(ValueError, match="chroma:document"):
validate_metadata(
Expand Down

0 comments on commit 0a7e7a6

Please sign in to comment.