In [1]:
from qdrant_client import QdrantClient
client = QdrantClient(url="http://localhost:6333")
client_grpc = QdrantClient(url="http://localhost:6334", prefer_grpc=True)

In [6]:
import numpy as np
from tqdm import tqdm

In [2]:
from qdrant_client.models import Distance, VectorParams, PointStruct

In [4]:
from qdrant_client.models import Filter, FieldCondition, Range, OptimizersConfigDiff

In [12]:
client.recreate_collection(
    collection_name="clip_image_product",
    vectors_config=VectorParams(size=512, distance=Distance.COSINE),
    hnsw_config=OptimizersConfigDiff(
        indexing_threshold=int(1e8 - 1e3),
    )
)
assert client.count(collection_name="clip_image_product").count == 0
id_start = 0
payloads = [{"color": "red", "rand_number": idx % 10} for idx in range(int(1e3))]
for i in tqdm(range(int(1e5))):
    ids = list(range(id_start, id_start + int(1e3)))
    vectors = np.random.rand(int(1e3), 512)
    client_grpc.upload_collection(
        collection_name="clip_image_product",
        vectors=vectors,
        payload=payloads,
        ids=ids
    )
    id_start += int(1e3)

100%|██████████| 100000/100000 [7:17:57<00:00,  3.81it/s]  


In [17]:
%%timeit
query_vector = np.random.rand(512)
hits = client_grpc.search(
    collection_name="clip_image_product",
    query_vector=query_vector,
    query_filter=Filter(
        must=[  # These conditions are required for search results
            FieldCondition(
                key='rand_number',  # Condition based on values of `rand_number` field.
                range=Range(
                    gte=3  # Select only those results where `rand_number` >= 3
                )
            )
        ]
    ),
    limit=5  # Return 5 closest points
)

158 ms ± 3.13 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [25]:
%%timeit
query_vector = np.random.rand(512)
hits = client_grpc.search(
    collection_name="clip_image_product",
    query_vector=query_vector,
    query_filter=Filter(
        must=[  # These conditions are required for search results
            FieldCondition(
                key='rand_number',  # Condition based on values of `rand_number` field.
                range=Range(
                    gte=3  # Select only those results where `rand_number` >= 3
                )
            )
        ]
    ),
    limit=1000  # Return 5 closest points
)

180 ms ± 2.44 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [19]:
%%timeit
query_vector = np.random.rand(512)
hits = client_grpc.search(
    collection_name="clip_image_product",
    query_vector=query_vector,
    query_filter=Filter(
        must=[  # These conditions are required for search results
            FieldCondition(
                key='rand_number',  # Condition based on values of `rand_number` field.
                range=Range(
                    gte=3  # Select only those results where `rand_number` >= 3
                )
            )
        ]
    ),
    limit=5000  # Return 5 closest points
)

396 ms ± 8.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [18]:
%%timeit
query_vector = np.random.rand(512)
hits = client_grpc.search(
    collection_name="clip_image_product",
    limit=5  # Return 5 closest points
)

8.81 ms ± 1.25 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [21]:
%%timeit
query_vector = np.random.rand(512)
hits = client_grpc.search(
    collection_name="clip_image_product",
    query_vector=query_vector,
    limit=1000  # Return 5 closest points
)

30.4 ms ± 190 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [20]:
%%timeit
query_vector = np.random.rand(512)
hits = client_grpc.search(
    collection_name="clip_image_product",
    query_vector=query_vector,
    limit=5000  # Return 5 closest points
)

151 ms ± 5.18 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [26]:
import psutil

# function to convert bytes to a more human-readable format
def convert_bytes(num):
    for x in ['bytes', 'KB', 'MB', 'GB', 'TB']:
        if num < 1024.0:
            return f"{num:.2f} {x}"
        num /= 1024.0

# get the current process's memory usage
process = psutil.Process()
memory_info = process.memory_info().rss

# print the memory usage in a human-readable format
print(f"Current memory usage: {convert_bytes(memory_info)}")

Current memory usage: 140.05 MB


In [31]:
!docker ps

CONTAINER ID   IMAGE           COMMAND      CREATED        STATUS        PORTS                              NAMES
4251ba871d4e   qdrant/qdrant   "./qdrant"   16 hours ago   Up 16 hours   0.0.0.0:6333-6334->6333-6334/tcp   bold_mccarthy


In [32]:
!docker stats

[2J[HCONTAINER ID   NAME            CPU %      MEM USAGE / LIMIT   MEM %     NET I/O         BLOCK I/O      PIDS
4251ba871d4e   bold_mccarthy   1217.29%   235GiB / 1.475TiB   15.56%    211GB / 591MB   12.6MB / 6TB   902
[2J[HCONTAINER ID   NAME            CPU %      MEM USAGE / LIMIT   MEM %     NET I/O         BLOCK I/O      PIDS
4251ba871d4e   bold_mccarthy   1217.29%   235GiB / 1.475TiB   15.56%    211GB / 591MB   12.6MB / 6TB   902
[2J[HCONTAINER ID   NAME            CPU %     MEM USAGE / LIMIT   MEM %     NET I/O         BLOCK I/O      PIDS
4251ba871d4e   bold_mccarthy   2.63%     235GiB / 1.475TiB   15.56%    211GB / 591MB   12.6MB / 6TB   902
[2J[HCONTAINER ID   NAME            CPU %     MEM USAGE / LIMIT   MEM %     NET I/O         BLOCK I/O      PIDS
4251ba871d4e   bold_mccarthy   2.63%     235GiB / 1.475TiB   15.56%    211GB / 591MB   12.6MB / 6TB   902
[2J[HCONTAINER ID   NAME            CPU %     MEM USAGE / LIMIT   MEM %     NET I/O         BLOCK I/O      PIDS
42