In [1]:
import random
from pymilvus import connections, CollectionSchema, FieldSchema, DataType, Collection, utility

connections.connect(host='127.0.0.1', port='19530')

In [2]:
book_id = FieldSchema(
    name="book_id",
    dtype=DataType.INT64,
    is_primary=True,
)
book_name = FieldSchema(
    name="book_name",
    dtype=DataType.VARCHAR,
    max_length=200,
)
word_count = FieldSchema(
    name="word_count",
    dtype=DataType.INT64,
)
book_intro = FieldSchema(
    name="book_intro",
    dtype=DataType.FLOAT_VECTOR,
    dim=2
)
schema = CollectionSchema(
    fields=[book_id, book_name, word_count, book_intro],
    description="Test book search",
)

collection = Collection(
    name="book_table",
    schema=schema,
    using='default'
)

collection.create_index(
    field_name="book_intro",
    index_params={
        "metric_type": "L2",
        "index_type": "IVF_FLAT",
        "params": {"nlist": 1024}
    }
)

collection.create_index(
    field_name="book_name",
    index_name="scalar_index",
)

collection.load()

collection.insert([[i for i in range(2000)],
                   [str(i) for i in range(2000)],
                   [i for i in range(10000, 12000)],
                   [[random.random() for _ in range(2)] for _ in range(2000)]])

(insert count: 2000, delete count: 0, upsert count: 0, timestamp: 442749860659855361, success count: 2000, err count: 0)

官方参考文档:https://milvus.io/docs/search.md

In [3]:
search_params = {
    "metric_type": "L2",
    "params": {"nprobe": 10,
               "offset": 5}}

results = collection.search(
    # Vectors to search with.
    data=[[0.1, 0.2]],
    # Name of the field to search on.
    anns_field="book_intro",
    param=search_params,
    # Number of the most similar results to return. The sum of this value and `offset` should be less than 16384.
    limit=10,
    # Boolean expression used to filter attribute. See Boolean Expression Rules for more information.
    expr=None,
    # Name of the field to return. Vector field is not supported in current release.
    output_fields=['book_id', 'book_name', 'word_count'],
)

In [4]:
# get the IDs of all returned hits
print(results[0].ids)

# get the distances to the query vector from all returned hits
print(results[0].distances)

[1107, 1050, 1263, 201, 598, 597, 833, 1805, 828, 1589]
[1.1048490705434233e-05, 1.2806008271581959e-05, 1.860082738858182e-05, 2.0475592464208603e-05, 2.5180794182233512e-05, 2.699909600778483e-05, 2.749182931438554e-05, 3.0744013201911e-05, 4.841619738726877e-05, 5.490558032761328e-05]


In [5]:
for i in range(10):
    hit = results[0][i]
    print(hit)

id: 1107, distance: 1.1048490705434233e-05, entity: {'book_id': 1107, 'book_name': '1107', 'word_count': 11107}
id: 1050, distance: 1.2806008271581959e-05, entity: {'book_id': 1050, 'book_name': '1050', 'word_count': 11050}
id: 1263, distance: 1.860082738858182e-05, entity: {'book_id': 1263, 'book_name': '1263', 'word_count': 11263}
id: 201, distance: 2.0475592464208603e-05, entity: {'book_id': 201, 'book_name': '201', 'word_count': 10201}
id: 598, distance: 2.5180794182233512e-05, entity: {'book_id': 598, 'book_name': '598', 'word_count': 10598}
id: 597, distance: 2.699909600778483e-05, entity: {'book_id': 597, 'book_name': '597', 'word_count': 10597}
id: 833, distance: 2.749182931438554e-05, entity: {'book_id': 833, 'book_name': '833', 'word_count': 10833}
id: 1805, distance: 3.0744013201911e-05, entity: {'book_id': 1805, 'book_name': '1805', 'word_count': 11805}
id: 828, distance: 4.841619738726877e-05, entity: {'book_id': 828, 'book_name': '828', 'word_count': 10828}
id: 1589, dist

In [6]:
collection.release()