# Milvus Demo

In [1]:
from pymilvus import MilvusClient

### Create a Collection

In [2]:
client = MilvusClient("milvus_demo.db")
print(client.list_collections())

  from pkg_resources import DistributionNotFound, get_distribution


[]


In [3]:
if client.has_collection(collection_name="demo_collection"):
    print("Collection already exists.")
    client.drop_collection(collection_name="demo_collection")

client.create_collection(
    collection_name="demo_collection",
    dimension=768,  # The vectors we will use in this demo has 768 dimensions
)

### Represent text with vectors

In [4]:
from pymilvus import model

In [5]:
# This will download a small embedding model "paraphrase-albert-small-v2" (~50MB).
embedding_fn = model.DefaultEmbeddingFunction()

tokenizer_config.json:   0%|          | 0.00/465 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/827 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/760k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/245 [00:00<?, ?B/s]

model.onnx:   0%|          | 0.00/46.9M [00:00<?, ?B/s]

In [6]:
# Text strings to search from.
docs = [
    "Artificial intelligence was founded as an academic discipline in 1956.",
    "Alan Turing was the first person to conduct substantial research in AI.",
    "Born in Maida Vale, London, Turing was raised in southern England.",
]

In [7]:
vectors = embedding_fn.encode_documents(docs)
# The output vector has 768 dimensions, matching the collection that we just created.
print("Dim:", embedding_fn.dim, vectors[0].shape)  # Dim: 768 (768,)

Dim: 768 (768,)


In [8]:
# Each entity has id, vector representation, raw text, and a subject label that we use
# to demo metadata filtering later.
data = [
    {"id": i, "vector": vectors[i], "text": docs[i], "subject": "history"}
    for i in range(len(vectors))
]

print("Data has", len(data), "entities, each with fields: ", data[0].keys())
print("Vector dim:", len(data[0]["vector"]))

Data has 3 entities, each with fields:  dict_keys(['id', 'vector', 'text', 'subject'])
Vector dim: 768


### Insert Data

In [9]:
res = client.insert(collection_name="demo_collection", data=data)

print(res)

{'insert_count': 3, 'ids': [0, 1, 2]}


### Semantic Search

In [10]:
query_vectors = embedding_fn.encode_queries(["Who is Alan Turing?"])

In [11]:
query_vectors[0].shape

(768,)

In [12]:
res = client.search(
    collection_name="demo_collection",  # target collection
    data=query_vectors,  # query vectors
    limit=2,  # number of returned entities
    output_fields=["text", "subject"],  # specifies fields to be returned
)
print(res)

data: [[{'id': 2, 'distance': 0.5859943628311157, 'entity': {'text': 'Born in Maida Vale, London, Turing was raised in southern England.', 'subject': 'history'}}, {'id': 1, 'distance': 0.511825442314148, 'entity': {'text': 'Alan Turing was the first person to conduct substantial research in AI.', 'subject': 'history'}}]]


### Vector Search with Metadata Filtering

In [13]:
# Insert more docs in another subject.
docs = [
    "Machine learning has been used for drug design.",
    "Computational synthesis with AI algorithms predicts molecular properties.",
    "DDR1 is involved in cancers and fibrosis.",
]
vectors = embedding_fn.encode_documents(docs)
data = [
    {"id": 3 + i, "vector": vectors[i], "text": docs[i], "subject": "biology"}
    for i in range(len(vectors))
]

client.insert(collection_name="demo_collection", data=data)

{'insert_count': 3, 'ids': [3, 4, 5], 'cost': 0}

In [14]:
# This will exclude any text in "history" subject despite close to the query vector.
res = client.search(
    collection_name="demo_collection",
    data=embedding_fn.encode_queries(["tell me AI related information"]),
    filter="subject == 'biology'",
    limit=2,
    output_fields=["text", "subject"],
)

print(res)

data: [[{'id': 4, 'distance': 0.2703055143356323, 'entity': {'text': 'Computational synthesis with AI algorithms predicts molecular properties.', 'subject': 'biology'}}, {'id': 3, 'distance': 0.1642589271068573, 'entity': {'text': 'Machine learning has been used for drug design.', 'subject': 'biology'}}]]


### Query

In [15]:
res = client.query(
    collection_name="demo_collection",
    filter="subject == 'history'",
    output_fields=["text", "subject"],
)
print(res)

data: ["{'id': 0, 'text': 'Artificial intelligence was founded as an academic discipline in 1956.', 'subject': 'history'}", "{'id': 1, 'text': 'Alan Turing was the first person to conduct substantial research in AI.', 'subject': 'history'}", "{'id': 2, 'text': 'Born in Maida Vale, London, Turing was raised in southern England.', 'subject': 'history'}"], extra_info: {}


In [16]:
res = client.query(
    collection_name="demo_collection",
    ids=[0, 2],
    output_fields=["vector", "text", "subject"],
)
print(res)

data: ["{'id': 0, 'vector': [0.010727827437222004, -0.035895150154829025, 0.018749775364995003, 0.01634875126183033, 0.036516912281513214, 0.0035881735384464264, -0.0004005097725894302, 0.028529400005936623, 0.0022746133618056774, 0.0018362737027928233, 0.004225848242640495, 0.027173971757292747, -0.0036844350397586823, 0.03079148940742016, 0.004505416378378868, 0.044228143990039825, 0.01050381176173687, -0.029494544491171837, -0.06707343459129333, -0.020526446402072906, 0.015322724357247353, -0.006004977971315384, -0.06228544935584068, -0.039614804089069366, 0.014206250198185444, 0.03270765766501427, -0.02083454094827175, -0.044174302369356155, -0.028339840471744537, 0.029424462467432022, -0.028087247163057327, -0.020808996632695198, 0.017159733921289444, 0.002111649140715599, 0.021823620423674583, -0.0015776895452290773, -0.037696775048971176, 0.041460759937763214, -0.02505636401474476, 0.08333666622638702, -0.015979262068867683, 0.009813846088945866, -0.026605399325489998, 0.0006190

### Delete Entities

In [17]:
res = client.delete(collection_name="demo_collection", ids=[0, 2])

print(res)

res = client.delete(
    collection_name="demo_collection",
    filter="subject == 'biology'",
)

print(res)

[0, 2]
[3, 4, 5]


### Drop the collection

In [18]:
# Drop collection
client.drop_collection(collection_name="demo_collection")