In [1]:
# https://python.langchain.com/docs/integrations/vectorstores/qdrant/#initialization

#### Initialization

In [2]:
from config import settings

In [3]:
import os
os.environ["OPENAI_API_KEY"] = settings.openai_api_key

from langchain_openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

In [4]:
from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams

client = QdrantClient(":memory:")

client.create_collection(
    collection_name="demo_collection",
    vectors_config=VectorParams(size=3072, distance=Distance.COSINE),
)

vector_store = QdrantVectorStore(
    client=client,
    collection_name="demo_collection",
    embedding=embeddings,
)

#### Manage vector store

In [5]:
from uuid import uuid4
from langchain_core.documents import Document

document_1 = Document(
    page_content="I love ice cream.",
    metadata={"source": "my brain"}
)

document_2 = Document(
    page_content="I like to eat candy.",
    metadata={"source": "my brain"}
)

document_3 = Document(
    page_content="I have to eat sweets.",
    metadata={"source": "my brain"}
)

documents = [
    document_1,
    document_2,
    document_3,
]
uuids = [str(uuid4()) for _ in documents]

In [None]:
vector_store.add_documents(documents=documents, ids=uuids)
# vector_store.delete(ids=[uuids[-1]])

#### Query directly

In [None]:
results = vector_store.similarity_search("Don't you like sweets?", k=3)
for res in results:
    print(f"* {res.page_content} [{res.metadata}]")

#### Dense Vector Search

In [None]:
from langchain_qdrant import QdrantVectorStore, RetrievalMode
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams

qdrant = QdrantVectorStore(
    client=client,
    collection_name="demo_collection",
    embedding=embeddings,
    retrieval_mode=RetrievalMode.DENSE,
)

qdrant.add_documents(documents=documents, ids=uuids)

query = "Don't you like sweets?"
found_docs = qdrant.similarity_search(query, k=3)
found_docs

#### Sparse Vector Search

In [None]:
from langchain_qdrant import FastEmbedSparse, QdrantVectorStore, RetrievalMode
from qdrant_client import QdrantClient, models
from qdrant_client.http.models import Distance, SparseVectorParams, VectorParams

sparse_embeddings = FastEmbedSparse(model_name="Qdrant/bm25")

client.create_collection(
    collection_name="sparse_collection",
    vectors_config={
        "dense": VectorParams(
            size=3072,
            distance=Distance.COSINE,
        ),
    },
    sparse_vectors_config={
        "sparse": SparseVectorParams(
            index=models.SparseIndexParams(
                on_disk=False
            )
        )
    },
)

qdrant = QdrantVectorStore(
    client=client,
    collection_name="sparse_collection",
    sparse_embedding=sparse_embeddings,
    retrieval_mode=RetrievalMode.SPARSE,
    sparse_vector_name="sparse",
)

qdrant.add_documents(documents=documents, ids=uuids)

query = "Don't you like sweets?"
found_docs = qdrant.similarity_search(query, k=3)
found_docs

#### Hybrid Vector Search

In [None]:
client.create_collection(
    collection_name="hybrid_collection",
    vectors_config={
        "dense": VectorParams(size=3072, distance=Distance.COSINE),
    },
    sparse_vectors_config={
        "sparse": SparseVectorParams(
            index=models.SparseIndexParams(
                on_disk=False
            )
        )
    },
)

qdrant = QdrantVectorStore(
    client=client,
    collection_name="hybrid_collection",
    embedding=embeddings,
    sparse_embedding=sparse_embeddings,
    retrieval_mode=RetrievalMode.HYBRID,
    vector_name="dense",
    sparse_vector_name="sparse",
)

qdrant.add_documents(documents=documents, ids=uuids)

query = "Don't you like sweets?"
found_docs = qdrant.similarity_search(query, k=3)
found_docs

In [None]:
qdrant = QdrantVectorStore(
    client=client,
    collection_name="hybrid_collection",
    embedding=embeddings,
    sparse_embedding=sparse_embeddings,
    retrieval_mode=RetrievalMode.HYBRID,
    vector_name="dense",
    sparse_vector_name="sparse",
)

query = "Don't you like sweets?"
found_docs = qdrant.similarity_search(query, k=3)
found_docs

# llamaindex 예제 : 
#   https://docs.llamaindex.ai/en/stable/examples/vector_stores/QdrantIndexDemo/
#
# vector_store = QdrantVectorStore(
#     client=client,
#     aclient=aclient,
#     collection_name="paul_graham_hybrid",
#     enable_hybrid=True,
#     fastembed_sparse_model="Qdrant/bm25",
# )

# index = VectorStoreIndex.from_documents(
#     documents,
#     storage_context=StorageContext.from_defaults(vector_store=vector_store),
# )

# # retrieve 2 sparse, 2 dense, and filter down to 3 total hybrid results
# query_engine = index.as_query_engine(
#     vector_store_query_mode="hybrid",
#     sparse_top_k=2,
#     similarity_top_k=2,
#     hybrid_top_k=3,
# )

In [None]:
results = vector_store.similarity_search_with_score(
    query="Don't you like sweets?", k=3
)
for doc, score in results:
    print(f"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]")

#### Metadata filtering

In [None]:
from qdrant_client import models

query = "Don't you like sweets?"

results = vector_store.similarity_search(
    query=query,
    k=3,
    filter=models.Filter(
        # should=[
        #     models.FieldCondition(
        #         key="page_content",
        #         match=models.MatchText(
        #             text="candy"
        #         )
        #     )
        # ],
        must_not=[
            models.FieldCondition(
                key="page_content",
                match=models.MatchText(
                    text="sweets"
                )
            )
        ]
    )
)
for doc in results:
    print(f"* {doc.page_content} [{doc.metadata}]")

#### Query by turning into retriever

In [None]:
retriever = vector_store.as_retriever(search_type="mmr", search_kwargs={"k": 1, "fetch_k": 3})
retriever.invoke("Stealing from the bank is a crime")