In [13]:
from pymilvus import (
    connections,
    utility,
    FieldSchema,
    CollectionSchema,
    DataType,
    Collection,
)
COLLECTION_NAME = "batch_123"

In [14]:
# docker-compose up -d to run Milvus service
# docker-compose down to stop Milvus service
# docker-compose ps to check Milvus service status
connections.connect(host="localhost", port="19530")

In [15]:
# Make data collection
fields = [
    FieldSchema(name="pk", dtype=DataType.INT64, is_primary=True, auto_id=False),
    FieldSchema(name="embeddings", dtype=DataType.FLOAT_VECTOR, dim=768),
    FieldSchema(name="filepath", dtype=DataType.VARCHAR, max_length=200),
    FieldSchema(name="text", dtype=DataType.VARCHAR, max_length=512)
]
schema = CollectionSchema(fields, "a basic schema for a batch of embeddings")
batch_123 = Collection(COLLECTION_NAME, schema)

In [16]:
# make dummy data
import random
entities = [
    [i for i in range(3000)],  # field pk
    [[random.random() for _ in range(768)] for _ in range(3000)],  # field embeddings
    [f"filepath_{i}" for i in range(3000)],  # field filepath
    [f"text_{i}" for i in range(3000)],  # field text
]
insert_result = batch_123.insert(entities)

In [17]:
# build index
index = {
    "index_type": "IVF_FLAT",
    "metric_type": "L2",
    "params": {"nlist": 128},
}
batch_123.create_index("embeddings", index)

Status(code=0, message='')

In [18]:
batch_123.load()
vectors_to_search = entities[1][:1]
print(len(vectors_to_search[0]))
search_params = {
    "metric_type": "L2",
    "params": {"nprobe": 10},
}
result = batch_123.search(vectors_to_search, "embeddings", search_params, limit=3, output_fields=["text"])
print(result)

768
["['(distance: 111.05941772460938, id: 823)', '(distance: 111.55264282226562, id: 1248)', '(distance: 112.07041931152344, id: 673)']"]
