Skip to content

Commit

Permalink
Add document normalization in Chroma. (#3640)
Browse files Browse the repository at this point in the history
  • Loading branch information
showmecodett committed Apr 15, 2024
1 parent 6d128f2 commit 2f2221c
Showing 1 changed file with 6 additions and 3 deletions.
9 changes: 6 additions & 3 deletions server/knowledge_base/kb_service/chromadb_kb_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,12 @@ def do_search(self, query: str, top_k: int, score_threshold: float = SCORE_THRES

def do_add_doc(self, docs: List[Document], **kwargs) -> List[Dict]:
doc_infos = []
data = self._docs_to_embeddings(docs)
ids = [str(uuid.uuid1()) for _ in range(len(data["texts"]))]
for _id, text, embedding, metadata in zip(ids, data["texts"], data["embeddings"], data["metadatas"]):
embed_func = EmbeddingsFunAdapter(self.embed_model)
texts = [doc.page_content for doc in docs]
metadatas = [doc.metadata for doc in docs]
embeddings = embed_func.embed_documents(texts=texts)
ids = [str(uuid.uuid1()) for _ in range(len(texts))]
for _id, text, embedding, metadata in zip(ids, texts, embeddings, metadatas):
self.collection.add(ids=_id, embeddings=embedding, metadatas=metadata, documents=text)
doc_infos.append({"id": _id, "metadata": metadata})
return doc_infos
Expand Down

0 comments on commit 2f2221c

Please sign in to comment.