In [8]:
from pymilvus import Collection, connections, FieldSchema, CollectionSchema, DataType, utility
import pandas as pd
import numpy as np
import os
import time

In [39]:
# load the existing collection
connections.connect(host='localhost', port='19530')

collection_name = 'text_embeddings'
collection = Collection(name=collection_name)

## Query without index

In [None]:
collection.drop_index()
collection.load()

In [133]:
expr = f"company_name == 'TheHersheyCompany'"
t0 = time.time()
results = collection.query(expr=expr, output_fields=["company_name"])
print(time.time() - t0)

0.009450912475585938


## Query with index

In [None]:
collection.release()
index_params = {
    "metric_type": "L2",
    "index_type": "IVF_FLAT",
    "params": {"nlist": 128},
    "index_name": "embedding_index"
}
collection.create_index(field_name="embedding", index_params=index_params)
collection.load()

In [146]:
expr = f"company_name == 'TheHersheyCompany'"
t0 = time.time()
query_results = collection.query(expr=expr, output_fields=["embedding"])
print(time.time() - t0)

0.17076706886291504


## Single vector search 

In [124]:
# single-vector
query_vectors = [query_results[0]['embedding']]
t0 = time.time()
results = collection.search(data=query_vectors, limit=3, anns_field="embedding", param={"metric_type": "L2", "params": {}})
print(time.time() - t0)
for result in results:
    print(result)

0.009879827499389648
['id: 449090663332412588, distance: 0.0, entity: {}', 'id: 449090663332676917, distance: 1.270500094019411e-10, entity: {}', 'id: 449090663332412878, distance: 113.849853515625, entity: {}']


In [113]:
# bulk-vector
query_vectors = [query_results[0]['embedding'], query_results[1]['embedding']]
t0 = time.time()
results = collection.search(data=query_vectors, limit=3, anns_field="embedding", param={"metric_type": "L2", "params": {}})
print(time.time() - t0)
for result in results:
    print(result)

0.011841058731079102
['id: 449090663332412588, distance: 0.0, entity: {}', 'id: 449090663332676917, distance: 1.270500094019411e-10, entity: {}', 'id: 449090663332412878, distance: 113.849853515625, entity: {}']
['id: 449090663332412589, distance: 0.0, entity: {}', 'id: 449090663332412590, distance: 0.0, entity: {}', 'id: 449090663332676918, distance: 2.620709482759054e-10, entity: {}']


In [159]:
# range search
search_params = {
    "metric_type": "L2",
    "params": {
        "radius": 0.8 # Radius of the search circle
    }
}
query_vectors = [query_results[1]['embedding']]
t0 = time.time()
results = collection.search(data=query_vectors, limit=5, anns_field="embedding", param=search_params)
print(time.time() - t0)
for result in results:
    print(result)

0.006422996520996094
['id: 449090663332412589, distance: 0.0, entity: {}', 'id: 449090663332412590, distance: 0.0, entity: {}', 'id: 449090663332676918, distance: 2.620709482759054e-10, entity: {}', 'id: 449090663332676919, distance: 2.620709482759054e-10, entity: {}']
