In [1]:
from qdrant_client import QdrantClient
client = QdrantClient(":memory:", prefer_grpc=True)

In [2]:
import numpy as np
from tqdm import tqdm

In [3]:
from qdrant_client.models import Distance, VectorParams, PointStruct

In [4]:
from qdrant_client.models import Filter, FieldCondition, Range, OptimizersConfigDiff

In [5]:
client.recreate_collection(
    collection_name="clip_image_product",
    vectors_config=VectorParams(size=512, distance=Distance.COSINE),
    # optimizers_config=OptimizersConfigDiff(
    #     indexing_threshold=int(1e6-10),
    # ),
)

True

In [6]:
assert client.count(collection_name="clip_image_product").count == 0

In [7]:
import numpy as np
from qdrant_client.models import PointStruct


for chunk in tqdm(range(10000)):
    vectors = np.random.rand(100, 512)
    client.upsert(
        collection_name="clip_image_product",
        points=[
            PointStruct(
                id=idx+chunk*100,
                vector=vector.tolist(),
                payload={"color": ["red", "yellow", "blue"][idx%3], "rand_number": idx % 10}
            )
            for idx, vector in enumerate(vectors)
        ]
    )

100%|██████████| 10000/10000 [23:55<00:00,  6.96it/s]


In [9]:
client.count(collection_name="clip_image_product").count

1000000

In [10]:
import psutil

# function to convert bytes to a more human-readable format
def convert_bytes(num):
    for x in ['bytes', 'KB', 'MB', 'GB', 'TB']:
        if num < 1024.0:
            return f"{num:.2f} {x}"
        num /= 1024.0

# get the current process's memory usage
process = psutil.Process()
memory_info = process.memory_info().rss

# print the memory usage in a human-readable format
print(f"Current memory usage: {convert_bytes(memory_info)}")

Current memory usage: 3.48 GB


In [13]:
query_vector = np.random.rand(512)
hits = client.search(
    collection_name="clip_image_product",
    query_vector=query_vector,
    # query_filter=Filter(
    #     must=[  # These conditions are required for search results
    #         FieldCondition(
    #             key='rand_number',  # Condition based on values of `rand_number` field.
    #             range=Range(
    #                 gte=3  # Select only those results where `rand_number` >= 3
    #             )
    #         )
    #     ]
    # ),
    limit=1000  # Return 5 closest points
)

In [15]:
query_vector = np.random.rand(512)
hits = client.search(
    collection_name="clip_image_product",
    query_vector=query_vector,
    query_filter=Filter(
        must=[  # These conditions are required for search results
            FieldCondition(
                key='rand_number',  # Condition based on values of `rand_number` field.
                range=Range(
                    gte=3  # Select only those results where `rand_number` >= 3
                )
            )
        ]
    ),
    limit=1000  # Return 5 closest points
)