In [10]:
from pymilvus import MilvusClient

client = MilvusClient(uri="http://localhost:19530")

In [11]:
if client.has_collection(collection_name="demo_collection"):
    client.drop_collection(collection_name="demo_collection")
client.create_collection(
    collection_name="demo_collection",
    dimension=768,  # The vectors we will use in this demo has 768 dimensions
)

In [12]:
print(client.list_collections())

['demo_collection']


In [13]:
print(client.describe_collection(collection_name="demo_collection"))


{'collection_name': 'demo_collection', 'auto_id': False, 'num_shards': 1, 'description': '', 'fields': [{'field_id': 100, 'name': 'id', 'description': '', 'type': <DataType.INT64: 5>, 'params': {}, 'is_primary': True}, {'field_id': 101, 'name': 'vector', 'description': '', 'type': <DataType.FLOAT_VECTOR: 101>, 'params': {'dim': 768}}], 'functions': [], 'aliases': [], 'collection_id': 457995467641724500, 'consistency_level': 2, 'properties': {}, 'num_partitions': 1, 'enable_dynamic_field': True, 'created_timestamp': 457998651139817476}


In [14]:
from pymilvus import model

embedding_fn = model.DefaultEmbeddingFunction()

docs = [
    "Artificial intelligence was founded as an academic discipline in 1956.",
    "Alan Turing was the first person to conduct substantial research in AI.",
    "Born in Maida Vale, London, Turing was raised in southern England.",
]

vectors = embedding_fn.encode_documents(docs)

print("Dim:", embedding_fn.dim, vectors[0].shape)

data = [
    {"id": i, "vector": vectors[i], "text": docs[i], "subject": "history"}
    for i in range(len(vectors))
]

print("Data has", len(data), "entities, each with fields: ", data[0].keys())
print("Vector dim:", len(data[0]["vector"]))


Dim: 768 (768,)
Data has 3 entities, each with fields:  dict_keys(['id', 'vector', 'text', 'subject'])
Vector dim: 768


In [15]:
res = client.insert(collection_name="demo_collection", data=data)
print(res)

{'insert_count': 3, 'ids': [0, 1, 2]}


In [16]:
results = client.query(
    collection_name="demo_collection",
    filter="subject == 'history'",
    output_fields=["id", "text", "subject"]
)

for doc in results:
    print(doc)

In [None]:
query_vectors = embedding_fn.encode_queries(["Who is Alan Turing?"])

res = client.search(
    collection_name="demo_collection",  # target collection
    data=query_vectors,  # query vectors
    limit=2,  # number of returned entities
    output_fields=["text", "subject"],  # specifies fields to be returned
)
    
print(res)

data: [[]]


In [19]:
docs = [
    "Machine learning has been used for drug design.",
    "Computational synthesis with AI algorithms predicts molecular properties.",
    "DDR1 is involved in cancers and fibrosis.",
]

vectors = embedding_fn.encode_documents(docs)

data = [
    {"id": 3 + i, "vector": vectors[i], "text": docs[i], "subject": "biology"}
    for i in range(len(vectors))
]

res = client.insert(collection_name="demo_collection", data=data)
print(res)





{'insert_count': 3, 'ids': [3, 4, 5]}


In [21]:
res = client.search(
    collection_name="demo_collection",
    data=embedding_fn.encode_queries(["tell me AI related information"]),
    filter="subject == 'biology'",
    limit=2,
    output_fields=["text", "subject"],
)

print(res)

data: [[{'id': 4, 'distance': 0.2703055739402771, 'entity': {'text': 'Computational synthesis with AI algorithms predicts molecular properties.', 'subject': 'biology'}}]]


In [23]:
res = client.query(
    collection_name="demo_collection",
    filter="subject == 'history'",
    output_fields=["text", "subject"],
)

print(res)

data: ["{'id': 0, 'text': 'Artificial intelligence was founded as an academic discipline in 1956.', 'subject': 'history'}", "{'id': 1, 'text': 'Alan Turing was the first person to conduct substantial research in AI.', 'subject': 'history'}", "{'id': 2, 'text': 'Born in Maida Vale, London, Turing was raised in southern England.', 'subject': 'history'}"]


In [27]:
res = client.query(
    collection_name="demo_collection",
    ids=[0, 2],
    output_fields=["vector", "text", "subject"],
)

print(res)

data: ["{'vector': [np.float32(0.01072784), np.float32(-0.03589515), np.float32(0.018749738), np.float32(0.016348766), np.float32(0.0365169), np.float32(0.0035881721), np.float32(-0.0004004701), np.float32(0.028529387), np.float32(0.002274549), np.float32(0.0018362501), np.float32(0.00422585), np.float32(0.027173959), np.float32(-0.0036843463), np.float32(0.030791564), np.float32(0.0045054313), np.float32(0.04422815), np.float32(0.010503838), np.float32(-0.029494528), np.float32(-0.0670734), np.float32(-0.02052644), np.float32(0.015322757), np.float32(-0.0060049477), np.float32(-0.06228548), np.float32(-0.039614733), np.float32(0.014206295), np.float32(0.03270766), np.float32(-0.020834588), np.float32(-0.044174295), np.float32(-0.028339865), np.float32(0.029424466), np.float32(-0.028087215), np.float32(-0.02080904), np.float32(0.017159743), np.float32(0.0021116557), np.float32(0.021823762), np.float32(-0.0015776267), np.float32(-0.037696723), np.float32(0.041460745), np.float32(-0.0250

In [28]:
res = client.delete(collection_name="demo_collection", ids=[0, 2])
print(res)

res = client.delete(
    collection_name="demo_collection",
    filter="subject == 'biology'",
)

print(res)

{'delete_count': 2}
{'delete_count': 3}


In [29]:
if client.has_collection(collection_name="demo2_collection"):
    client.drop_collection(collection_name="demo2_collection")
client.create_collection(
    collection_name="demo2_collection",
    dimension=768,  # The vectors we will use in this demo has 768 dimensions
)

In [30]:
print(client.list_collections())

['demo_collection', 'demo2_collection']


In [31]:
text = [
    "The Milky Way is the galaxy that contains our Solar System.",
    "Saturn is the sixth planet from the Sun and is known for its prominent rings.",
    "Neil Armstrong was the first person to walk on the Moon."
]

vectors = embedding_fn.encode_documents(text)

data = [
    {"id": i, "vector": vectors[i], "text": text[i], "subject": "astronomy"}
    for i in range(len(docs))
]

response = client.insert(collection_name="demo_collection", data=data)
print("Inserted:", response)

Inserted: {'insert_count': 3, 'ids': [0, 1, 2]}


In [32]:
client.drop_collection(collection_name="demo_collection")

In [33]:
print(client.list_collections())

['demo2_collection']
