In [None]:
from pymilvus import (
    connections,
    utility,
    FieldSchema, CollectionSchema, DataType,
    Collection,
    AnnSearchRequest, RRFRanker, WeightedRanker,
    db
)
import random
import numpy as np
import time

conn = connections.connect(host="127.0.0.1", port=19530)

# database = db.create_database("test_db")
db.using_database("test_db")
print(db.list_database())
# db.drop_database("test_db")

# 列出所有集合
collections = utility.list_collections()
print(collections)
# 删除指定集合（例如名为 "your_collection_name" 的集合）
collection_name = "hybrid_search_collection"
if utility.has_collection(collection_name):
    utility.drop_collection(collection_name)
    print(f"集合 {collection_name} 已删除")

['default', 'test_db']
['hybrid_search_collection']
集合 hybrid_search_collection 已删除


In [None]:
dim = 128
collection_name = "test_collection"

fields = [
    FieldSchema(name="pk", dtype=DataType.INT64, is_primary=True, auto_id=True),
    FieldSchema(name="text", dtype=DataType.VARCHAR, max_length=512),
    FieldSchema(name="embeddings", dtype=DataType.FLOAT_VECTOR, dim=dim)
]
schema = CollectionSchema(fields, "test_schema")
test_collection = Collection(collection_name, schema)

has = utility.has_collection(collection_name)
print(f"Does collection {collection_name} exist in Milvus: {has}")

rng = np.random.default_rng(seed=19530)
entities = [
    ['文本'+str(i) for i in range(100)],
    rng.random((100, dim)),    # field embeddings, supports numpy.ndarray and list
]

insert_res = test_collection.insert(entities)
print(insert_res)
print(test_collection.num_entities)

test_collection.flush()

(insert count: 100, delete count: 0, upsert count: 0, timestamp: 456343895500652548, success count: 100, err count: 0
0


In [18]:
print("Start Creating index IVF_FLAT")
index = {
    "index_type": "IVF_FLAT",
    "metric_type": "L2",
    "params": {"nlist": 128},
}

test_collection.create_index("embeddings", index)
print("Index created")
print(utility.index_building_progress(collection_name))

Start Creating index IVF_FLAT
Index created
{'total_rows': 100, 'indexed_rows': 100, 'pending_index_rows': 0, 'state': 'Finished'}


In [None]:

print("Start loading")
test_collection.load()
field_names = ["embeddings"]

search_params = {
    "metric_type": "L2",
    "params": {"nprobe": 10},
}

result = test_collection.search(
    data=[rng.random(dim)],
    anns_field="embeddings",
    param=search_params,
    limit=10,
    expr="text in ['文本15']",# "text in ['文本1', '文本2']",
    output_fields=["text"],
)

Start loading
data: ['["pk: 456329487459038530, distance: 15.325583457946777, entity: {\'text\': \'文本15\'}"]']
pk: 456329487459038530, distance: 15.325583457946777, entity: {'text': '文本15'}
