In [None]:
# pip install pymilvus

# 导入必要的库
from pymilvus import MilvusClient
import numpy as np

# ===================== 连接到 Milvus 数据库 =====================
# lite 版本指定本地文件路径
client = MilvusClient("./milvus_demo.db")

# 其他版本连接方式（使用 HTTP 协议连接到远程服务器）
# client = MilvusClient(uri="http://localhost:19530", token="root:Milvus")


# ===================== 创建集合 =====================
# 如果集合已存在则删除
if client.has_collection(collection_name="demo_collection"):
    client.drop_collection(collection_name="demo_collection")
    
# 创建一个新集合，指定向量维度为 10
client.create_collection(
    collection_name="demo_collection",
    dimension=10  
)

# ===================== 准备数据 =====================
# 示例文本数据
docs = [
    "Artificial intelligence was founded as an academic discipline in 1956.",
    "Alan Turing was the first person to conduct substantial research in AI.",
    "Born in Maida Vale, London, Turing was raised in southern England.",
]

# 生成随机向量作为文本的向量表示
vectors = [[ np.random.uniform(-1, 1) for _ in range(10) ] for _ in range(len(docs)) ]

# 整合数据：将 ID、向量、文本和主题字段组合成记录
data = [ {"id": i, "vector": vectors[i], "text": docs[i], "subject": "history"} for i in range(len(vectors)) ]

# ===================== 插入数据 =====================
res = client.insert(
    collection_name="demo_collection",
    data=data
)

# ===================== 向量相似度搜索 =====================
# 使用第一个向量作为查询向量，查找最相似的 2 条记录
res = client.search(
    collection_name="demo_collection",
    data=[vectors[0]],  # 查询向量
    filter="subject == 'history'",  # 过滤条件
    limit=2,  # 返回结果数量限制
    output_fields=["text", "subject"],  # 返回的字段
)
print("搜索结果:")
print(res)

# ===================== 基于属性查询 =====================
# 查询所有 subject 为 'history' 的记录
res = client.query(
    collection_name="demo_collection",
    filter="subject == 'history'",
    output_fields=["text", "subject"],
)
print("\n查询结果:")
print(res)

# ===================== 删除数据（已注释） =====================
# res = client.delete(
#     collection_name="demo_collection",
#     filter="subject == 'history'",
# )
# print(res)


data: ["[{'id': 0, 'distance': 0.9999999403953552, 'entity': {'text': 'Artificial intelligence was founded as an academic discipline in 1956.', 'subject': 'history'}}, {'id': 2, 'distance': 0.3660986125469208, 'entity': {'text': 'Born in Maida Vale, London, Turing was raised in southern England.', 'subject': 'history'}}]"]
data: ["{'id': 0, 'text': 'Artificial intelligence was founded as an academic discipline in 1956.', 'subject': 'history'}", "{'id': 1, 'text': 'Alan Turing was the first person to conduct substantial research in AI.', 'subject': 'history'}", "{'id': 2, 'text': 'Born in Maida Vale, London, Turing was raised in southern England.', 'subject': 'history'}"]


In [None]:
# 再次查询集合中的所有历史主题记录
res = client.query(
    collection_name="demo_collection",
    filter="subject == 'history'",
    output_fields=["text", "subject"],
)
print("查询结果:")
print(res)

data: []


In [None]:
# 获取集合的统计信息，包括实体数量等
client.get_collection_stats("demo_collection")

{'row_count': 3}

In [None]:
# 检查集合是否存在
client.has_collection("demo_collection")

True

In [None]:
# 获取集合的详细信息，包括字段设置、索引类型等
client.describe_collection("demo_collection")

{'collection_name': 'demo_collection',
 'auto_id': False,
 'num_shards': 0,
 'description': '',
 'fields': [{'field_id': 100,
   'name': 'id',
   'description': '',
   'type': <DataType.INT64: 5>,
   'params': {},
   'is_primary': True},
  {'field_id': 101,
   'name': 'vector',
   'description': '',
   'type': <DataType.FLOAT_VECTOR: 101>,
   'params': {'dim': 10}}],
 'functions': [],
 'aliases': [],
 'collection_id': 0,
 'consistency_level': 0,
 'properties': {},
 'num_partitions': 0,
 'enable_dynamic_field': True}

In [None]:
# 列出当前存在的所有集合
client.list_collections()

['demo_collection']

In [None]:
# 这是一个空白单元格，可以用于后续扩展