## Milvus Vector DB for RAG 

### Create a collection of legal docs

In [19]:
%pip install pymilvus


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.1.1[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [20]:
import os
from dotenv import load_dotenv
# from pymilvus import connections

# # If using Docker standalone Milvus
# connections.connect("default", host="127.0.0.1", port="19530")

from pymilvus import connections

load_dotenv(override=True, dotenv_path="../.env.local")

milvus_uri = os.getenv("MILVUS_URI")
milvus_token = os.getenv("MILVUS_API_KEY")


connections.connect(
    alias="default",
    uri=milvus_uri,
    token=milvus_token
)

print("Connected to Milvus on Zilliz Cloud")


Connected to Milvus on Zilliz Cloud


In [21]:
from pymilvus import db
from pymilvus import Collection, FieldSchema, CollectionSchema, DataType

# 1. Create a new database
# db.create_database("rag_db")

# 2. Switch to that database
db.using_database("rag_db")

# ----- Create schema -----
fields = [
    FieldSchema("doc_id", DataType.INT64, is_primary=True, auto_id=False),
    FieldSchema("title", DataType.VARCHAR, max_length=200),
    FieldSchema("domain", DataType.VARCHAR, max_length=100),
    FieldSchema("content", DataType.VARCHAR, max_length=2000),
    FieldSchema("embedding", DataType.FLOAT_VECTOR, dim=384) 
]

schema = CollectionSchema(fields, description="Policy documents with embeddings")
collection = Collection("policy_docs_collection", schema)

# ----- Create index -----
index_params = {
    "index_type": "IVF_FLAT",
    "metric_type": "COSINE",
    "params": {"nlist": 128},
}
collection.create_index(field_name="embedding", index_params=index_params)


Status(code=0, message=)

In [22]:

# ----- Example data -----
content_chunks = [
    {
        "doc_id": 1,
        "section": "Pay Policies",
        "title": "Employee Pay Policy",
        "domain": "Human Resources",
        "content": "Employees are paid bi-weekly via direct deposit."
    },
    {
        "doc_id": 1,
        "section": "Leave of Absence",
        "title": "Leave Request and Approval Process",
        "domain": "Human Resources",
        "content": "Employees must submit a leave request for approval."
    },
    {
        "doc_id": 1,
        "section": "Internet Use",
        "title": "Acceptable Use of Company Internet",
        "domain": "IT & Security",
        "content": "Company internet must be used for work-related tasks only."
    },
    {
        "doc_id": 2,
        "section": "Break at Work",
        "title": "Employee Break Policy",
        "domain": "Workplace Operations",
        "content": "Employees can take an hour break."
    },
    {
        "doc_id": 2,
        "section": "Harassment",
        "title": "Workplace Harassment Policy",
        "domain": "Compliance",
        "content": "Interact with each employee with respect."
    }
]


# content_chunks_list = []
# for chunk in content_chunks:
#     content_chunks_list.append(chunk["content"])
content_chunks_list = [chunk["content"] for chunk in content_chunks]
print(content_chunks_list)
from sentence_transformers import SentenceTransformer
model = SentenceTransformer("all-MiniLM-L6-v2")

doc_vectors = model.encode(content_chunks_list)
doc_vectors.shape



['Employees are paid bi-weekly via direct deposit.', 'Employees must submit a leave request for approval.', 'Company internet must be used for work-related tasks only.', 'Employees can take an hour break.', 'Interact with each employee with respect.']


(5, 384)

In [23]:
# ---- Build columnar data ----
doc_ids = [int(i + 1) for i in range(len(content_chunks))]             # INT64
titles = [str(doc["title"]) for doc in content_chunks]                 # VARCHAR
domains = [str(doc["domain"]) for doc in content_chunks]               # VARCHAR
content = [str(doc["content"]) for doc in content_chunks]               # VARCHAR
embeddings = [list(map(float, vec)) for vec in doc_vectors]       # FLOAT_VECTOR(768)


# ---- Insert column-wise ----
collection.insert([doc_ids, titles, domains, content, embeddings])
collection.flush()

print(f"Successfully inserted {len(doc_ids)} documents into Milvus.")


Successfully inserted 5 documents into Milvus.


In [24]:
#Load the collection before searching or querying
collection.load()
res = collection.query(expr="doc_id > 0", output_fields=["doc_id", "title", "domain", "content", "embedding"], limit=5)
print(res)


data: ["{'content': 'Employees are paid bi-weekly via direct deposit.', 'embedding': [0.0247251708060503, -0.009081469848752022, 0.03887130320072174, 0.032301537692546844, -0.058905500918626785, 0.03912460058927536, 0.026017993688583374, -0.04369090870022774, -0.01916009932756424, -1.1305048701615306e-06, 0.01365350279957056, 0.004326740279793739, -0.05378558859229088, 0.006868922617286444, 0.007638321723788977, -0.060696400701999664, -0.037602175027132034, 0.007140923757106066, 0.13387171924114227, 0.0017353961011394858, -0.003075605956837535, -0.07781299203634262, -0.08451439440250397, -0.0032163704745471478, 0.1338280439376831, -0.03643588349223137, 0.016922777518630028, 0.04861478507518768, -0.04028793051838875, 0.019676748663187027, -0.043712858110666275, 0.051511410623788834, -0.01629961095750332, -0.03905921056866646, -0.004453877452760935, 0.0004745072219520807, -0.031205086037516594, 0.04895620048046112, 0.012831666506826878, 0.015940086916089058, -0.04758507013320923, -0.0172

In [25]:
# print(utility.has_collection("demo_collection"))

# # Get details about a specific collection
# # Get collection details
# collection = Collection("demo_collection")  # instantiate the collection object
print(collection.schema)                    # show the schema
print(collection.num_entities)              # number of entities
print(collection.description)               # optional

{'auto_id': False, 'description': 'Policy documents with embeddings', 'fields': [{'name': 'doc_id', 'description': '', 'type': <DataType.INT64: 5>, 'is_primary': True, 'auto_id': False}, {'name': 'title', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 200}}, {'name': 'domain', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 100}}, {'name': 'content', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 2000}}, {'name': 'embedding', 'description': '', 'type': <DataType.FLOAT_VECTOR: 101>, 'params': {'dim': 384}}], 'enable_dynamic_field': False, 'enable_namespace': False}
20
Policy documents with embeddings


In [26]:
# Display results
for record in res:
    print(f"Doc ID: {record['doc_id']}")
    print(f"Title: {record['title']}")
    print(f"Domain: {record['domain']}")
    print(f"Content: {record['content']}")
    # Show only first 5 embedding values for readability
    print(f"Embedding (first 5): {record['embedding'][:5]}")
    print("-" * 80)


Doc ID: 1
Title: Employee Pay Policy
Domain: Human Resources
Content: Employees are paid bi-weekly via direct deposit.
Embedding (first 5): [0.0247251708060503, -0.009081469848752022, 0.03887130320072174, 0.032301537692546844, -0.058905500918626785]
--------------------------------------------------------------------------------
Doc ID: 2
Title: Leave Request and Approval Process
Domain: Human Resources
Content: Employees must submit a leave request for approval.
Embedding (first 5): [0.033155038952827454, 0.04853382706642151, 0.04736274480819702, 0.02527042292058468, 0.059312108904123306]
--------------------------------------------------------------------------------
Doc ID: 3
Title: Acceptable Use of Company Internet
Domain: IT & Security
Content: Company internet must be used for work-related tasks only.
Embedding (first 5): [-0.07135903835296631, -0.030664706602692604, 0.03183770552277565, -0.07750101387500763, -0.005032442044466734]
-----------------------------------------------

In [27]:
query = "What’s the leave policy?"
query_vector = model.encode([query])[0]
query_vector[:5]  # Show only first 5 values

array([0.0604582 , 0.03882854, 0.01172253, 0.0312459 , 0.1212422 ],
      dtype=float32)

In [28]:
# Search for closest match only in the 'Human Resources' domain
results = collection.search(
    data=[query_vector],
    anns_field="embedding",
    param={"metric_type": "COSINE", "params": {"nprobe": 10}},
    limit=3,
    expr='domain == "Human Resources"',
    output_fields=["doc_id", "title", "domain", "content"]
)

context_sring = ""
for res in results[0]:
    print(f"doc_id={res.entity.get('doc_id')}, "
          f"title={res.entity.get('title')}, "
          f"domain={res.entity.get('domain')}, "
          f"content={res.entity.get('content')}, "
          f"score={res.distance}")
    context_sring += f"\n -- \n {res.entity.get('content')} " # Append content to context string

print("\nContext String for RAG:\n", context_sring)    


doc_id=2, title=Leave Request and Approval Process, domain=Human Resources, content=Employees must submit a leave request for approval., score=0.6198837161064148
doc_id=1, title=Employee Pay Policy, domain=Human Resources, content=Employees are paid bi-weekly via direct deposit., score=0.23205110430717468

Context String for RAG:
 
 -- 
 Employees must submit a leave request for approval. 
 -- 
 Employees are paid bi-weekly via direct deposit. 


In [29]:
from llm_utlity import ask_question_open_ai 

query = "What’s the leave policy?"
response = ask_question_open_ai(query, context_sring)
response


'From the provided context, the leave policy requires employees to submit a leave request for approval. No other details (like types of leave, approval timelines, or accruals) are specified. If you need more specifics, please share additional context.'

In [30]:
print(f"User query: {query}")
print(f"Context: {context_sring}")

print(f"\n\nOpen AI Response: {response}")

User query: What’s the leave policy?
Context: 
 -- 
 Employees must submit a leave request for approval. 
 -- 
 Employees are paid bi-weekly via direct deposit. 


Open AI Response: From the provided context, the leave policy requires employees to submit a leave request for approval. No other details (like types of leave, approval timelines, or accruals) are specified. If you need more specifics, please share additional context.


In [31]:
# RAG Evaluation
# Context_Recall: Did the system retrieve all relevant documents?
# Context_Precision: What proportion of retrieved documents are relevant?

input = "What’s the leave policy?"
expected_context = "Employees are entitled to 20 days of paid leave annually."
actual_context = '''
 Employees must submit a leave request for approval. 
 -- 
 Employees are paid bi-weekly via direct deposit. 
 '''
#Context_Recall: 0%


input = "What’s the leave policy?"
expected_context = ["Employees are entitled to 20 days of paid leave annually."," They must submit a leave request for approval."]
actual_context = ["Employees must submit a leave request for approval. ", " Employees are paid bi-weekly via direct deposit. ", " Company internet must be used for work-related tasks only. "]
#Context_Recall: 1 out of 2 expected = 1/2 = 50%


input = "What’s the leave policy?"
expected_context = ["Employees are entitled to 20 days of paid leave annually."," They must submit a leave request for approval."]
actual_context = ["They are entitled to 20 days of paid leave in a year."," They must submit a leave request for approval."]
#Context_Recall: 2 out of 2 expected = 2/2 = 100%

In [32]:
# Context_Precision: What proportion of retrieved documents are relevant?
input = "What’s the leave policy?"
expected_context = ["Employees are entitled to 20 days of paid leave annually."," They must submit a leave request for approval."]
actual_context = ["Employees must submit a leave request for approval. ", " Employees are paid bi-weekly via direct deposit. ", " Company internet must be used for work-related tasks only. "]
#Context_Precision: 1 out of 3 retrieved = 1/3 = 33%


input = "What’s the leave policy?"
expected_context = ["Employees are entitled to 20 days of paid leave annually."," They must submit a leave request for approval."]
actual_context = ["They are entitled to 20 days of paid leave in a year."," They must submit a leave request for approval.", " Company internet must be used for work-related tasks only. "]
#Context_Precision: 2 out of 3 retrieved = 2/3 = 67%

In [33]:
##### RAG Evaluation
### Retrieval Metrics
# Context_Recall: Did the system retrieve all relevant documents?
# Context_Precision: What proportion of retrieved documents are relevant?

### Generative Metrics
# Faithfulness
# Accuracy


# F1 = 2 * (Context_Precision * Context_Recall) / (Context_Precision + Context_Recall) -- NOT USED

In [34]:
### Generative Metrics
# Accuracy

input = "What’s the leave policy?"
context = ["Employees are entitled to 20 days of paid leave annually."," They must submit a leave request for approval."]

#expected_llm_output = '''
#llm as a judge
groundtruth = ''' 
    Based on the provided context, the leave policy states that employees are entitled to 20 days of paid leave annually 
    and must submit a leave request for approval.'''

actual_llm_output = '''
    Based on the provided context, the leave policy states that employees must submit a leave request for approval. 
    No other details are given. If you have more of the policy, I can summarize that as well.'''

accuracy = 0.50


# Faithfulness - Is the generated output consistent with the provided context?
faithfulness = 0.50