### Chromadb (Vector Store)
- 기본 코드 정리
- Document 문서를 꼭 참고하기 바람

### 참고
[튜토리얼 가이드](https://www.datacamp.com/tutorial/chromadb-tutorial-step-by-step-guide#rdl)
[langchain 가이드](https://python.langchain.com/docs/integrations/vectorstores/chroma/)
[Document](https://docs.trychroma.com/guides)

In [21]:
import os
import chromadb
import shutil
from chromadb.db.base import UniqueConstraintError
from chromadb.utils import embedding_functions

from config.settings import DB_DIR

In [None]:
client = chromadb.PersistentClient(path=DB_DIR)
em = embedding_functions.SentenceTransformerEmbeddingFunction(
    model_name="/home/0_models/BAAI_bge-m3"
)

# 컬렉션 : 임베딩, 문서 및 추가 메타데이터를 저장하는 곳
try:
    collection = client.create_collection(
        name="Test", metadata={"hnsw:space": "cosine"}, embedding_function=em
    )
except UniqueConstraintError:  # already exist collection
    collection = client.get_collection(
        name="Test",
        embedding_function=em,
    )

#### Distance function
hnsw:space : "l2", "ip", "cosine"
default is "l2"

In [None]:
# save : 컬렉션에 텍스트 문서 추가
collection.add(
    documents=[
        "NAVER Corporation Earnings Surprise",
        "Samgsung Corporation Earnings Surprise",
        "Test",
    ],
    metadatas=[
        {"Doc Title": 1},
        {"Doc Title": 2},
        {"Doc Title": 3},
    ],
    ids=["naver1", "samsung1", "Test"],
)


# returns a list of the first 10 items in the collection
print(collection.peek())
# returns the number of items in the collection
print(collection.count())
# Rename the collection
# collection.modify(name="new_name")

In [None]:
# query
results = collection.query(query_texts="SamSam", n_results=2)
print(results)

In [None]:
# 임베딩 벡터 직접 제공
collection.add(
    documents=["doc1", "doc2", "doc3", ...],
    embeddings=[[1.1, 2.3, 3.2], [4.5, 6.9, 4.4], [1.1, 2.3, 3.2], ...],
    metadatas=[
        {"chapter": "3", "verse": "16"},
        {"chapter": "3", "verse": "5"},
        {"chapter": "29", "verse": "11"},
        ...,
    ],
    ids=["id1", "id2", "id3", ...],
)

In [None]:
# embeddings직접 제공 Query
collection.query(
    query_embeddings=[[11.1, 12.1, 13.1], [1.1, 2.3, 3.2], ...],
    n_results=10,
    where={"metadata_field": "is_equal_to_this"},
    where_document={"$contains": "search_string"},
)

In [None]:
collection.delete(
    ids=["naver1", "samsung1", "Test"],
    # where={"chapter": "20"}
)