In [1]:
# through tuning the parameter sof index, we can get different vectorDB retrival performance
# we select two basic index as the object, IVF, and HNSW.

# we use the dataset
# - Glove-25-angular, dimension 25 train set 1,183,514 test set 10,000
# download ref: http://ann-benchmarks.com/glove-25-angular.hdf5

import time
import h5py
import numpy as np
from pymilvus import MilvusClient, DataType

In [2]:
fmt = "\n=== {:30} ===\n"
search_latency_fmt = "search latency = {:.4f}s"

In [3]:
f = h5py.File("./glove-25-angular.hdf5", "r")
num_entities, dim = f['train'].shape
distance = f.attrs['distance']

In [4]:
# ------------------  1. Connect to Milvus Server 
HOST = '10.10.10.250'
PORT = 19530
DB_NAME = 'testdb'
URL="http://"+HOST+':'+str(PORT)
# if you deployed the standalone milvus, and connect to the database server
# if you deployed in your local machine, use "http://localhost:19530"
client = MilvusClient("http://10.10.10.247:19530")

if DB_NAME not in client.list_databases():
    client.create_database(DB_NAME)
client.using_database(DB_NAME)
client.list_databases()

2025-02-13 06:51:56,861 [ERROR][_create_connection]: Failed to create new connection using: 6c68e7cdc5be4b74ba7091fc7d994593 (milvus_client.py:849)


MilvusException: <MilvusException: (code=2, message=Fail connecting to server on 10.10.10.247:19530, illegal connection params or server unavailable)>

In [None]:
# ------------------ 2. Create a collection with customized schema
# we are going to create a collection with 2 fields
# +-+------------+------------+------------------+------------------------------+
# | | field name | field type | other attributes |       field description      |
# +-+------------+------------+------------------+------------------------------+
# |1|    "pk"    |   VarChar  |  is_primary=True |      "primary field"         |
# +-+------------+------------+------------------+------------------------------+
# |2|"embeddings"| FloatVector|     dim=dim     |"float vector with specific dim"|
# +-+------------+------------+------------------+------------------------------+
COLLECTION_NAME = "glove_25_anugular"
schema = MilvusClient.create_schema(
    auto_id = False,
    enable_dynamic_field = True,
)

schema.add_field(field_name = 'pk', datatype=DataType.INT64, is_primary=True, auto_id = False)
schema.add_field(field_name = 'embeddings', datatype=DataType.FLOAT_VECTOR, dim=dim)

if client.has_collection(COLLECTION_NAME):
    client.drop_collection(COLLECTION_NAME)
res = client.create_collection(collection_name = COLLECTION_NAME, schema = schema)
res

In [None]:
# ------------------ 3. Insert data into the collection
data = [
    {"pk":i[0], "embeddings":i[1]}
    for i in zip(range(num_entities), f['train'][:])
]

In [None]:
batch_size = 100_000
for batch in range(0, num_entities, batch_size):
    res = client.insert(collection_name = COLLECTION_NAME, data = data[batch:batch+batch_size])
    print(f"inserted {batch+batch_size}/{num_entities}, result = {res['insert_count']}")

inserted 100000/1183514, result = 100000
inserted 200000/1183514, result = 100000
inserted 300000/1183514, result = 100000
inserted 400000/1183514, result = 100000
inserted 500000/1183514, result = 100000
inserted 600000/1183514, result = 100000
inserted 700000/1183514, result = 100000
inserted 800000/1183514, result = 100000
inserted 900000/1183514, result = 100000
inserted 1000000/1183514, result = 100000
inserted 1100000/1183514, result = 100000
inserted 1200000/1183514, result = 83514


In [None]:
# flush and check the number of entities
client.flush(collection_name=COLLECTION_NAME)
client.get_collection_stats(COLLECTION_NAME)

{'row_count': 1183514}

In [None]:
# ------------------ 4[IVF_FLAT]. Create IVF_FLAT index
# in this tutorial, we will deep into the index and tuning the parameters

# IVF_FLAT - Inverted File FLAT Index
# which aims to improve the search performance of the basic FLAT index 
# by implementing approximate nearest neighbors (ANNs) algorithm instead of the native KNN (FLAT). 
# recommend to read this blog: https://zilliz.com/learn/how-to-pick-a-vector-index-in-milvus-visual-guide

# IVF-FLAT provides two hyperparameters we can tune:

# - nlist: the number of partitions to create using the k-means algorithm. default-128
# we determine the nlist when building the IVF-FLAT Index
# generally, nlist impacts both indexing time and query performance.
# adjuting nlist is relatively complex and expensive

# - nprobe: the number of partitions to consider during the search for candidate
# we can tune nprobe when executing each query request
# generally, given a nlist, nprobe tuning is more intuitive and simple

In [None]:
# first, we fix nlist, tune nprobe to check the query performance (latency and recall)
nlist = 1024
index_params = MilvusClient.prepare_index_params()
index_params.add_index(
    field_name="embeddings",
    metric_type = "COSINE",
    # related distance metric to angular is CONSINE
    index_type = "IVF_FLAT",
    index_name = "vector_index",
    params = {
        "nlist":nlist
    }    
)
start_time = time.time()
client.create_index(collection_name = COLLECTION_NAME, 
                   index_params = index_params,
                   sync = True)
end_time = time.time()
print(f"create index time: {end_time-start_time:.4f}s")

create index time: 17.5975s


In [None]:
client.load_collection(collection_name=COLLECTION_NAME)
res = client.get_load_state(collection_name=COLLECTION_NAME)
print(fmt.format("Load collection into memory"))


=== Load collection into memory    ===



In [None]:
# query_index
neighbors_num = f['neighbors'].shape[1]
neighbors = f['neighbors'][:]
query_embedding = f['test'][:]

In [None]:
# ------------------ 4[IVF_FLAT]. Search with different nprobe, check the performance
for nprobe in [1,16, 64, 256, 1024]:
    start_time = time.time()
    res = client.search(
        collection_name=COLLECTION_NAME,
        data=query_embedding,
        limit = 100,
        search_params={
            "params" : {"nprobe":nprobe}
        }
    )
    end_time = time.time()
    print(fmt.format(f"----------------- nprobe={nprobe} search --------------------"))
    print(search_latency_fmt.format(end_time-start_time))
    
    # calculate the Mean Average Recall.
    # Recall@K = (# of true positive in top K) / (# of true positive)
    # MAR (Mean Average Recall) = 1/C * sum(Recall@K). C is the number of queries
    mar_ = []
    for i, candidate_res in enumerate(res):
        y = neighbors[i]
        y_ = [j['id'] for j in candidate_res]
        y, y_ = set(y), set(y_)
        mar_.append(1.0 * len(y & y_) / len(y))
    mar = np.mean(mar_)
    print(f"Mean Average Recall = {mar:.4f}")
        


=== ----------------- nprobe=1 search -------------------- ===

search latency = 3.8174s
Mean Average Recall = 0.3681

=== ----------------- nprobe=16 search -------------------- ===

search latency = 4.2593s
Mean Average Recall = 0.8873

=== ----------------- nprobe=64 search -------------------- ===

search latency = 6.7913s
Mean Average Recall = 0.9804

=== ----------------- nprobe=256 search -------------------- ===

search latency = 13.6901s
Mean Average Recall = 0.9994

=== ----------------- nprobe=1024 search -------------------- ===

search latency = 43.0186s
Mean Average Recall = 1.0000


In [None]:
# from the result, we can easily find that
# with the increase of nprobe, the search latency increase while the recall degrade
# nprobe represents the number of partitions to consider during the search candidate
# the value is in the range of [1, nlist].
# When it is 1, only one partion is considered, which is the fastest but with loss of recall
# When it is nlist, all partition is considered, equal to the FLAT index.

In [None]:
# drop index and change nlist to 256 
client.release_collection(
    collection_name=COLLECTION_NAME
)
client.drop_index(
    collection_name=COLLECTION_NAME,
    index_name="vector_index"
)

In [None]:
# ------------------- 4[IVF_FLAT]. Create IVF_FLAG index with nlist=256
nlist = 256
index_params = MilvusClient.prepare_index_params()
index_params.add_index(
    field_name="embeddings",
    metric_type = "COSINE",
    # related distance metric to angular is CONSINE
    index_type = "IVF_FLAT",
    index_name = "vector_index",
    params = {
        "nlist":nlist
    }    
)
start_time = time.time()
client.create_index(collection_name = COLLECTION_NAME, 
                   index_params = index_params,
                   sync = True)
end_time = time.time()
print(f"create index time: {end_time-start_time:.4f}s")

create index time: 3.5281s


In [None]:
# decrease the nlist, we spend less time on indexing building
# load it into memory
client.load_collection(collection_name=COLLECTION_NAME)
res = client.get_load_state(collection_name=COLLECTION_NAME)
print(fmt.format("Load collection into memory"))


=== Load collection into memory    ===



In [None]:
# ------------------- 4[IVF_FLAT]. Search with different nprobe, check the performance
for nprobe in [1, 16, 64, 256]:
    start_time = time.time()
    res = client.search(
        collection_name=COLLECTION_NAME,
        data=query_embedding,
        limit = 100,
        search_params={
            "params" : {"nprobe":nprobe}
        }
    )
    end_time = time.time()
    print(fmt.format(f"----------------- nprobe={nprobe} search --------------------"))
    print(search_latency_fmt.format(end_time-start_time))
    
    # calculate the Mean Average Recall.
    # Recall@K = (# of true positive in top K) / (# of true positive)
    # MAR (Mean Average Recall) = 1/C * sum(Recall@K). C is the number of queries
    mar_ = []
    for i, candidate_res in enumerate(res):
        y = neighbors[i]
        y_ = [j['id'] for j in candidate_res]
        y, y_ = set(y), set(y_)
        mar_.append(1.0 * len(y & y_) / len(y))
    mar = np.mean(mar_)
    print(f"Mean Average Recall = {mar:.4f}")


=== ----------------- nprobe=1 search -------------------- ===

search latency = 3.2711s
Mean Average Recall = 0.4697

=== ----------------- nprobe=16 search -------------------- ===

search latency = 5.8951s
Mean Average Recall = 0.9495

=== ----------------- nprobe=64 search -------------------- ===

search latency = 12.3161s
Mean Average Recall = 0.9971

=== ----------------- nprobe=256 search -------------------- ===

search latency = 38.0888s
Mean Average Recall = 1.0000


In [None]:
# when nlist decrease, given the same nprobe, more candidates cause the mar increase.
# basically, nlist is a probe to control the granularity of the search space.
# also increasing nlist makes the index building process slow
# generally, there are several situations to tune nlist:
# - the dataset is large, the nlist should be large
# - low recall during search, the nprobe is fixed, decrease nlist
# - the search latency is high, the nprobe is fixed, increase nlist

In [None]:
# drop index 
client.release_collection(
    collection_name=COLLECTION_NAME
)
client.drop_index(
    collection_name=COLLECTION_NAME,
    index_name="vector_index"
)

In [None]:
# ------------------- 5.[HNSW]. Create HNSW index

# HNSW: Hierarchical Navigable Small World
# algorithm detail refer to paper https://arxiv.org/pdf/1603.09320

# HNSW provides three hyperparameters we can tune:

# When building index:
# - M: the maximum number of connections for each node in the graph. 
# Higher M makes the graph more connected, which increase memory suage and indexing building time.
# but with higher search quality.

# - efConstruction: the size of the dynamic cnadidate list which controls index seach speed/build speed tradeoff.
# Higher efConstruction makes the index building slower but with higher search quality.

# When searching:
# - ef: the size of the dynamic candidate list during search.
# Higher ef makes the search slower and memory usage higher but with higher search quality.

In [None]:
M = 32
ef_construct = 500

index_params = MilvusClient.prepare_index_params()
index_params.add_index(
    field_name="embeddings",
    metric_type = "COSINE",
    # related distance metric to angular is CONSINE
    index_type = "HNSW",
    index_name = "vector_index",
    params = {
        "M":M,
        "efConstruction":ef_construct
    }    
)
start_time = time.time()
client.create_index(collection_name = COLLECTION_NAME, 
                   index_params = index_params,
                   sync = True)
end_time = time.time()
print(f"create index time: {end_time-start_time:.4f}s")

create index time: 89.4777s


In [None]:
client.load_collection(collection_name=COLLECTION_NAME)
res = client.get_load_state(collection_name=COLLECTION_NAME)
print(fmt.format("Load collection into memory"))


=== Load collection into memory    ===



In [None]:
# ------------------- 5.[HNSW]. Search with different ef, check the performance
for ef in [100, 200, 400, 800]:
    start_time = time.time()
    res = client.search(
        collection_name=COLLECTION_NAME,
        data=query_embedding,
        limit = 100,
        search_params={
            "params" : {"ef":ef}
        }
    )
    end_time = time.time()
    print(fmt.format(f"----------------- ef={ef} search --------------------"))
    print(search_latency_fmt.format(end_time-start_time))
    
    # calculate the Mean Average Recall.
    # Recall@K = (# of true positive in top K) / (# of true positive)
    # MAR (Mean Average Recall) = 1/C * sum(Recall@K). C is the number of queries
    mar_ = []
    for i, candidate_res in enumerate(res):
        y = neighbors[i]
        y_ = [j['id'] for j in candidate_res]
        y, y_ = set(y), set(y_)
        mar_.append(1.0 * len(y & y_) / len(y))
    mar = np.mean(mar_)
    print(f"Mean Average Recall = {mar:.4f}")


=== ----------------- ef=100 search -------------------- ===

search latency = 4.0822s
Mean Average Recall = 0.9758

=== ----------------- ef=200 search -------------------- ===

search latency = 4.2695s
Mean Average Recall = 0.9949

=== ----------------- ef=400 search -------------------- ===

search latency = 5.1274s
Mean Average Recall = 0.9993

=== ----------------- ef=800 search -------------------- ===

search latency = 7.0809s
Mean Average Recall = 0.9999


In [None]:
# from the result, we can easily find that
# with the increase of ef, the search latency increase while the recall degrade

In [None]:
# drop index 
client.release_collection(
    collection_name=COLLECTION_NAME
)
client.drop_index(
    collection_name=COLLECTION_NAME,
    index_name="vector_index"
)

In [None]:
# ------------------- 5.[HNSW]. Create HNSW index - Decrease M
M = 8
ef_construct = 500

index_params = MilvusClient.prepare_index_params()
index_params.add_index(
    field_name="embeddings",
    metric_type = "COSINE",
    # related distance metric to angular is CONSINE
    index_type = "HNSW",
    index_name = "vector_index",
    params = {
        "M":M,
        "efConstruction":ef_construct
    }    
)
start_time = time.time()
client.create_index(collection_name = COLLECTION_NAME, 
                   index_params = index_params,
                   sync = True)
end_time = time.time()
print(f"create index time: {end_time-start_time:.4f}s")

create index time: 39.2173s


In [None]:
# index building time decrease with the decrease of M.
# index building time 89s -> 39s

In [None]:
client.load_collection(collection_name=COLLECTION_NAME)
res = client.get_load_state(collection_name=COLLECTION_NAME)
print(fmt.format("Load collection into memory"))

# ------------------- 5.[HNSW]. Search with different ef, check the performance
for ef in [100, 200, 400, 800]:
    start_time = time.time()
    res = client.search(
        collection_name=COLLECTION_NAME,
        data=query_embedding,
        limit = 100,
        search_params={
            "params" : {"ef":ef}
        }
    )
    end_time = time.time()
    print(fmt.format(f"----------------- ef={ef} search --------------------"))
    print(search_latency_fmt.format(end_time-start_time))
    
    # calculate the Mean Average Recall.
    # Recall@K = (# of true positive in top K) / (# of true positive)
    # MAR (Mean Average Recall) = 1/C * sum(Recall@K). C is the number of queries
    mar_ = []
    for i, candidate_res in enumerate(res):
        y = neighbors[i]
        y_ = [j['id'] for j in candidate_res]
        y, y_ = set(y), set(y_)
        mar_.append(1.0 * len(y & y_) / len(y))
    mar = np.mean(mar_)
    print(f"Mean Average Recall = {mar:.4f}")


=== Load collection into memory    ===


=== ----------------- ef=100 search -------------------- ===

search latency = 3.1490s
Mean Average Recall = 0.8430

=== ----------------- ef=200 search -------------------- ===

search latency = 4.0087s
Mean Average Recall = 0.9208

=== ----------------- ef=400 search -------------------- ===

search latency = 4.1213s
Mean Average Recall = 0.9674

=== ----------------- ef=800 search -------------------- ===

search latency = 5.3935s
Mean Average Recall = 0.9891


In [None]:
# given the ef and efConstruction, 
# decrease M makes the search quality decrease but speed up the search process

In [None]:
# drop index 
client.release_collection(
    collection_name=COLLECTION_NAME
)
client.drop_index(
    collection_name=COLLECTION_NAME,
    index_name="vector_index"
)

In [None]:
# ------------------- 5.[HNSW]. Create HNSW index - Decrease efConstruction
M = 32
ef_construct = 100

index_params = MilvusClient.prepare_index_params()
index_params.add_index(
    field_name="embeddings",
    metric_type = "COSINE",
    # related distance metric to angular is CONSINE
    index_type = "HNSW",
    index_name = "vector_index",
    params = {
        "M":M,
        "efConstruction":ef_construct
    }    
)
start_time = time.time()
client.create_index(collection_name = COLLECTION_NAME, 
                   index_params = index_params,
                   sync = True)
end_time = time.time()
print(f"create index time: {end_time-start_time:.4f}s")

create index time: 22.1257s


In [None]:
# index building time decrease with the decrease of efConstruction.
# index building time 89s -> 22s

In [None]:
client.load_collection(collection_name=COLLECTION_NAME)
res = client.get_load_state(collection_name=COLLECTION_NAME)
print(fmt.format("Load collection into memory"))

# ------------------- 5.[HNSW]. Search with different ef, check the performance
for ef in [100, 200, 400, 800]:
    start_time = time.time()
    res = client.search(
        collection_name=COLLECTION_NAME,
        data=query_embedding,
        limit = 100,
        search_params={
            "params" : {"ef":ef}
        }
    )
    end_time = time.time()
    print(fmt.format(f"----------------- ef={ef} search --------------------"))
    print(search_latency_fmt.format(end_time-start_time))
    
    # calculate the Mean Average Recall.
    # Recall@K = (# of true positive in top K) / (# of true positive)
    # MAR (Mean Average Recall) = 1/C * sum(Recall@K). C is the number of queries
    mar_ = []
    for i, candidate_res in enumerate(res):
        y = neighbors[i]
        y_ = [j['id'] for j in candidate_res]
        y, y_ = set(y), set(y_)
        mar_.append(1.0 * len(y & y_) / len(y))
    mar = np.mean(mar_)
    print(f"Mean Average Recall = {mar:.4f}")


=== Load collection into memory    ===


=== ----------------- ef=100 search -------------------- ===

search latency = 3.5012s
Mean Average Recall = 0.9574

=== ----------------- ef=200 search -------------------- ===

search latency = 4.5053s
Mean Average Recall = 0.9862

=== ----------------- ef=400 search -------------------- ===

search latency = 4.4190s
Mean Average Recall = 0.9963

=== ----------------- ef=800 search -------------------- ===

search latency = 6.8445s
Mean Average Recall = 0.9993


In [None]:
# given the ef and M, 
# decrease efConstruction makes the search quality decrease but speed up the search process