In [1]:
import random
from pymilvus import connections, CollectionSchema, FieldSchema, DataType, Collection

connections.connect(host='127.0.0.1', port='19530')

In [2]:
book_id = FieldSchema(
    name="book_id",
    dtype=DataType.INT64,
    is_primary=True,
)
book_name = FieldSchema(
    name="book_name",
    dtype=DataType.VARCHAR,
    max_length=200,
)
word_count = FieldSchema(
    name="word_count",
    dtype=DataType.INT64,
)
book_intro = FieldSchema(
    name="book_intro",
    dtype=DataType.FLOAT_VECTOR,
    dim=2
)
schema = CollectionSchema(
    fields=[book_id, book_name, word_count, book_intro],
    description="Test book search",
)

collection = Collection(
    name="book_table",
    schema=schema,
    using='default'
)

collection.create_index(
    field_name="book_intro",
    index_params={
        "metric_type": "L2",
        "index_type": "IVF_FLAT",
        "params": {"nlist": 1024}
    }
)

collection.create_index(
    field_name="book_name",
    index_name="scalar_index",
)

collection.load()

collection.insert([[i for i in range(2000)],
                   [str(i) for i in range(2000)],
                   [i for i in range(10000, 12000)],
                   [[random.random() for _ in range(2)] for _ in range(2000)]])

(insert count: 2000, delete count: 0, upsert count: 0, timestamp: 442749871145353217, success count: 2000, err count: 0)

In [3]:
# Unlike a vector similarity search, a vector query retrieves vectors via scalar filtering based on boolean expression.
res = collection.query(
    expr="book_id in [3, 4]",
    # Number of results to skip in the returned set. This parameter is available only when limit is specified, and the sum of this value and limit should be less than 16384.
    offset=0,
    # Number of the most similar results to return. The sum of this value and offset should be less than 16384.
    limit=100,
    output_fields=["book_id", "book_intro"],
)

In [4]:
res

[{'book_id': 3, 'book_intro': [0.6489556, 0.48023546]},
 {'book_id': 4, 'book_intro': [0.18178387, 0.9374216]}]

In [5]:
collection.release()