In [23]:
# Imports
import os,sys
import qdrant_client
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams

In [24]:
# Get the url and key, and open a connection.
# If you've created a free tier Qdrant cluster, you'll get the url and key from your Qdrant.io account
qdrant_url = os.environ['QDRANT_URL']
qdrant_key = os.environ['QDRANT_KEY']

client = QdrantClient(
    url=qdrant_url, 
    api_key=qdrant_key,
)


In [None]:
# Create a Qdrant collection
client.create_collection(
    collection_name="test_collection",
    vectors_config=VectorParams(size=4, distance=Distance.DOT),
)


In [25]:
# Upsert data into Qdrant
from qdrant_client.models import PointStruct

operation_info = client.upsert(
    collection_name="test_collection",
    wait=True,
    points=[
        PointStruct(id=1, vector=[0.05, 0.61, 0.76, 0.74], payload={"city": "Berlin", "country": "Germany"}),
        PointStruct(id=2, vector=[0.19, 0.81, 0.75, 0.11], payload={"city": "London", "country": "England"}),
        PointStruct(id=3, vector=[0.36, 0.55, 0.47, 0.94], payload={"city": "Moscow", "country": "Russia"}),
        PointStruct(id=4, vector=[0.18, 0.01, 0.85, 0.80], payload={"city": "New York", "country": "USA"}),
        PointStruct(id=5, vector=[0.24, 0.18, 0.22, 0.44], payload={"city": "Beijing", "country": "China"}),
        PointStruct(id=6, vector=[0.35, 0.08, 0.11, 0.44], payload={"city": "Mumbai", "country": "India"}),
    ],
)

print(operation_info)


operation_id=9 status=<UpdateStatus.COMPLETED: 'completed'>


In [None]:
# NOTE: This cell won't run...example only
# If loading data via LangChain:
# from_documents() clears and replaces the data in the vector DB each call...probably not what you want.
# Use the add_texts() (for raw text) or add_documents() (for document objects) call instead.
# Either of these will upsert data with the same point ID in Qdrant
# You can use an integer ID, or generate a UUID by taking the md5hash of the payload content to de-dup

metadata = [  # LangChain automatically nests filter payload under metadata.*...see the sample search below
    {"filter1": filter1} | {"filter2": filter2} | {"last_update": datetime.now()}
]  # Note: See the cell below for explanation of the vertical bar | 

# Example upsert...new ID's are added with payload, existing ID's get updated payload
upserted_ids = vector_db.add_texts(
    texts = text_chunks,  # List of text content that is already chunked
    metadatas = metadata, # List of dictionaries 
    ids = id_list # list of int's or UUID's generated from md5hash of each text chunk
)

print(upserted_ids)

In [26]:
# Vertical bar | used above now merges dictionaries into a single dict
print("Example1:", {'city': "New York"} | {'country': "USA"})

Example1: {'city': 'New York', 'country': 'USA'}


In [27]:
# Do a simple vector search with k=3
search_result = client.search(
    collection_name="test_collection", query_vector=[0.2, 0.1, 0.9, 0.7], limit=3
)

search_result

[ScoredPoint(id=4, version=9, score=1.362, payload={'city': 'New York', 'country': 'USA'}, vector=None, shard_key=None),
 ScoredPoint(id=1, version=9, score=1.273, payload={'city': 'Berlin', 'country': 'Germany'}, vector=None, shard_key=None),
 ScoredPoint(id=3, version=9, score=1.208, payload={'city': 'Moscow', 'country': 'Russia'}, vector=None, shard_key=None)]

In [28]:
# Do a search that pre-filters using payload, then does the vector search
from qdrant_client.models import Filter, FieldCondition, MatchValue, MatchAny

country_value = 'USA'
city_list = ['New York', 'Los Angeles', 'London']

search_result = client.search(
    collection_name="test_collection",
    query_vector=[0.2, 0.1, 0.9, 0.7],
    query_filter=Filter(
        must=[
             FieldCondition(key="country", match=MatchValue(value=country_value)),
             FieldCondition(key="city", match=MatchAny(any=city_list)) 
             ]
    ),
    with_payload=True,
    limit=3,
)

search_result


[ScoredPoint(id=4, version=9, score=1.362, payload={'city': 'New York', 'country': 'USA'}, vector=None, shard_key=None)]

In [None]:
# NOTE:  This cell won't run, it's just an example
# For LangChain...you can use the same filter definition in a retriever
# Another more complex search with multiple filter conditions...uses metadata.xxx from Langchain 
from qdrant_client import models

search_filter = models.Filter(
    must=[
        models.FieldCondition(
            key="metadata.caseID", match=models.MatchValue(value=123)
        ),
        models.FieldCondition(
            key="metadata.doclist", match=models.MatchAny(any=["abc", "def", "ghi"])
        ),
    ]
)

retriever = vector_db.as_retriever(search_kwargs={"filter": search_filter})
chain = retrievalQAWithSourcesChain.from_chain_type(llm, retriever=retriever)
question = 'Some question here'
result = chain.invoke({"question": question}, callbacks=[StreamingStdOutCallbackHandler()])
print(result)
