In [5]:
!pwd

/Users/amiteshsinha/Training/10_2025_genai_lab/Session_05_Docker_Milvus_VectorDB


In [4]:
!ls -al

total 168
drwxr-xr-x@  3 amiteshsinha  staff     96 Nov  8 07:18 [34m__pycache__[m[m
drwxr-xr-x@  7 amiteshsinha  staff    224 Nov  8 14:22 [34m.[m[m
drwxr-xr-x  15 amiteshsinha  staff    480 Nov 11 16:27 [34m..[m[m
-rw-r--r--@  1 amiteshsinha  staff    935 Nov  8 07:17 llm_utlity.py
-rw-r--r--@  1 amiteshsinha  staff  12023 Nov  8 16:39 milvus_db_intro.ipynb
-rw-r--r--@  1 amiteshsinha  staff  58302 Nov  8 16:49 milvus_rag_search.ipynb
-rw-r--r--@  1 amiteshsinha  staff   5952 Nov  8 07:20 milvus_rag.ipynb


In [7]:
!rm -r test_dir

In [None]:
%pip install pymilvus # Virtual environment installation

# !pip install pymilvus - Operating system installation
# pip install pymilvus # .py script installation


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.1.1[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


### Connect to Milvus

In [11]:
from pymilvus import connections

connections.connect("default", host="127.0.0.1", port="19530")

### Create a Collection (like a table)

In [12]:
from pymilvus import FieldSchema, CollectionSchema, DataType, Collection

# Define schema fields
fields = [
    FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True),
    FieldSchema(name="title", dtype=DataType.VARCHAR, max_length=200),
    FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=4)
]

schema = CollectionSchema(fields, description="Simple demo collection")

# Create collection
collection = Collection(name="demo_collection", schema=schema)


In [13]:

from pymilvus import utility
print(utility.list_collections())  # list all

['employee_policies', 'demo_collection']


In [17]:
print(utility.has_collection("demo_collection"))

# Get details about a specific collection
# Get collection details
demo_collection = Collection("demo_collection")  # instantiate the collection object
print(demo_collection.schema)                    # show the schema
print(demo_collection.num_entities)              # number of entities
print(demo_collection.description)               # optional

True
{'auto_id': True, 'description': 'Simple demo collection', 'fields': [{'name': 'id', 'description': '', 'type': <DataType.INT64: 5>, 'is_primary': True, 'auto_id': True}, {'name': 'title', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 200}}, {'name': 'embedding', 'description': '', 'type': <DataType.FLOAT_VECTOR: 101>, 'params': {'dim': 4}}], 'enable_dynamic_field': False}
3
Simple demo collection


### Insert Data (like INSERT INTO)

In [18]:
demo_data = [
    ["Intro to AI", "Deep Learning", "Vector Databases"],
    [
        [0.1, 0.2, 0.3, 0.4],
        [0.2, 0.3, 0.4, 0.5],
        [0.3, 0.4, 0.5, 0.6],
    ]
]

demo_collection.insert(demo_data)
demo_collection.flush()
print(f"Number of entities after insertion: {collection.num_entities}")


Number of entities after insertion: 6


### Create an Index (for fast vector search)

In [24]:
index_params = {
    "metric_type": "L2",      # or "COSINE"
    "index_type": "IVF_FLAT", # fast approximate search
    "params": {"nlist": 64}
}
demo_collection.create_index(field_name="embedding", index_params=index_params)


Status(code=0, message=)

### Query Metadata (like SQL SELECT * WHERE)

In [25]:
demo_collection.load()
results = demo_collection.query(
    # expr="",
    expr="title == 'Deep Learning'",
    output_fields=["title", "embedding"],
    limit=10
)

for result in results:
    print(result)


{'title': 'Deep Learning', 'embedding': [0.20000000298023224, 0.30000001192092896, 0.4000000059604645, 0.5], 'id': 462137331136081147}
{'title': 'Deep Learning', 'embedding': [0.20000000298023224, 0.30000001192092896, 0.4000000059604645, 0.5], 'id': 462137331136081150}


In [26]:
demo_collection.load()
results = demo_collection.query(
    expr="title == 'Deep Learning'",
    output_fields=["title"],
    limit=10
)

for result in results:
    print(result)

{'title': 'Deep Learning', 'id': 462137331136081147}
{'title': 'Deep Learning', 'id': 462137331136081150}


### Search by Vector (the heart of Milvus)

In [29]:
import numpy as np

query_vector = [[0.15, 0.25, 0.35, 0.45]]
#query_vector = [[0.2, 0.3, 0.4, 0.5]]

search_params = {"metric_type": "L2", "params": {"nprobe": 10}}

results = demo_collection.search(
    data=query_vector,
    anns_field="embedding", # approximate nearest neighbor search field
    param=search_params,
    limit=2,
    output_fields=["title", "embedding"]
)
#select title from demo_collection where embedding is nearest to query_vector
#select title, embedding from demo_collection where embedding is nearest to query_vector

for hit in results[0]:
    print(f"Matched: {hit.entity.get('title')}, Distance: {hit.distance}")


Matched: Intro to AI, Distance: 0.009999996051192284
Matched: Deep Learning, Distance: 0.01000000350177288


### List & Drop Collections

In [30]:
from pymilvus import utility

print(f"List of collection BEFORE dropping the collection: {utility.list_collections()}")   # list all
utility.drop_collection("demo_collection")  # delete one
print(f"List of collection after dropping the collection: {utility.list_collections()}")  # list all again to confirm deletion

List of collection BEFORE dropping the collection: ['employee_policies', 'demo_collection']
List of collection after dropping the collection: ['employee_policies']
