In [115]:
import random
from pymilvus import connections, CollectionSchema, FieldSchema, DataType, Collection, utility

connections.connect(host='127.0.0.1', port='19530')

In [116]:
book_id = FieldSchema(
    name="book_id",
    dtype=DataType.INT64,
    is_primary=True,
)
book_name = FieldSchema(
    name="book_name",
    dtype=DataType.VARCHAR,
    max_length=200,
)
word_count = FieldSchema(
    name="word_count",
    dtype=DataType.INT64,
)
book_intro = FieldSchema(
    name="book_intro",
    dtype=DataType.FLOAT_VECTOR,
    dim=2
)
schema = CollectionSchema(
    fields=[book_id, book_name, word_count, book_intro],
    description="Test book search",
)

collection = Collection(
    name="book_table",
    schema=schema,
    using='default'
)

collection.create_index(
    field_name="book_intro",
    index_params={
        "metric_type": "L2",
        "index_type": "IVF_FLAT",
        "params": {"nlist": 1024}
    }
)

collection.create_index(
    field_name="book_name",
    index_name="scalar_index",
)

collection.insert([[i for i in range(2000)],
                   [str(i) for i in range(2000)],
                   [i for i in range(10000, 12000)],
                   [[random.random() for _ in range(2)] for _ in range(2000)]])

(insert count: 2000, delete count: 0, upsert count: 0, timestamp: 442739402773364739, success count: 2000, err count: 0)

In [117]:
# All search and query operations within Milvus are executed in memory. Load the collection to memory before conducting a vector similarity search.
collection.load()

官方参考文档:https://milvus.io/docs/search.md

In [118]:
search_params = {
    "metric_type": "L2",
    "params": {"nprobe": 10,
               "offset": 5}}

results = collection.search(
    # Vectors to search with.
    data=[[0.1, 0.2]],
    # Name of the field to search on.
    anns_field="book_intro",
    param=search_params,
    # Number of the most similar results to return. The sum of this value and `offset` should be less than 16384.
    limit=10,
    # Boolean expression used to filter attribute. See Boolean Expression Rules for more information.
    expr=None,
    # Name of the field to return. Vector field is not supported in current release.
    output_fields=['book_id', 'book_name', 'word_count'],
)

In [119]:
# get the IDs of all returned hits
print(results[0].ids)

# get the distances to the query vector from all returned hits
print(results[0].distances)

[1107, 433, 1489, 825, 140, 768, 174, 1275, 1083, 404]
[1.1048490705434233e-05, 6.296827632468194e-05, 6.588322867173702e-05, 8.589174831286073e-05, 0.0001263414742425084, 0.00013537416816689074, 0.00020955209038220346, 0.0002286857779836282, 0.00024863710859790444, 0.00026549637550488114]


In [120]:
for i in range(10):
    hit = results[0][i]
    print(hit)

id: 1107, distance: 1.1048490705434233e-05, entity: {'book_id': 1107, 'book_name': '1107', 'word_count': 11107}
id: 433, distance: 6.296827632468194e-05, entity: {'book_id': 433, 'book_name': '433', 'word_count': 10433}
id: 1489, distance: 6.588322867173702e-05, entity: {'book_id': 1489, 'book_name': '1489', 'word_count': 11489}
id: 825, distance: 8.589174831286073e-05, entity: {'book_id': 825, 'book_name': '825', 'word_count': 10825}
id: 140, distance: 0.0001263414742425084, entity: {'book_id': 140, 'book_name': '140', 'word_count': 10140}
id: 768, distance: 0.00013537416816689074, entity: {'book_id': 768, 'book_name': '768', 'word_count': 10768}
id: 174, distance: 0.00020955209038220346, entity: {'book_id': 174, 'book_name': '174', 'word_count': 10174}
id: 1275, distance: 0.0002286857779836282, entity: {'book_id': 1275, 'book_name': '1275', 'word_count': 11275}
id: 1083, distance: 0.00024863710859790444, entity: {'book_id': 1083, 'book_name': '1083', 'word_count': 11083}
id: 404, dis

In [121]:
# Releases the collection data from memory.
collection.release()