# Build RAG with Milvus

## Preparation

### Models and data

```bash
$ just prepare-for-build-rag
```

In [13]:
from glob import glob

text_lines = []

for file_path in glob("../milvus_docs/en/faq/*.md", recursive=True):
    with open(file_path, "r") as file:
        file_text = file.read()

    text_lines += file_text.split("# ")

print(text_lines)

[]


## Prepare the Embedding Model

In [None]:
import ollama


def emb_text(text):
    response = ollama.embed(model="mxbai-embed-large", input=text)
    embeddings = response["embeddings"]
    return embeddings

In [None]:
test_embedding = emb_text("This is a test")
embedding_dim = len(test_embedding[0])
print(embedding_dim)
print(test_embedding[:10])

## Load data into Milvus

### Create the Collection

In [None]:
from pymilvus import MilvusClient

milvus_client = MilvusClient(uri="./milvus_demo.db")

collection_name = "my_rag_collection"

In [None]:
# Check if the collection already exists and drop it if it does.
if milvus_client.has_collection(collection_name):
    milvus_client.drop_collection(collection_name)

In [None]:
milvus_client.create_collection(
    collection_name=collection_name,
    dimension=embedding_dim,
    metric_type="IP",  # Inner product distance
    consistency_level="Strong",
    # Supported values are (`"Strong"`, `"Session"`, `"Bounded"`, `"Eventually"`). See https://milvus.io/docs/consistency.md#Consistency-Level for more details.
)

### Insert data

In [None]:
from tqdm import tqdm

data = []

for i, line in enumerate(tqdm(text_lines, desc="Creating embeddings")):
    data.append({"id": i, "vector": emb_text(line)[0], "text": line})

milvus_client.insert(collection_name=collection_name, data=data)

## Build RAG


### Retrieve data for a query

In [None]:
question = "How is data stored in milvus?"

Search for the question in the collection and retrieve the semantic top-3 matches.

In [None]:
search_res = milvus_client.search(
    collection_name=collection_name,
    data=[
        emb_text(question)[0]
    ],  # Use the `emb_text` function to convert the question to an embedding vector
    limit=3,  # Return top 3 results
    search_params={"metric_type": "IP", "params": {}},  # Inner product distance
    output_fields=["text"],  # Return the text field
)

Letâ€™s take a look at the search results of the query

In [None]:
import json

retrieved_lines_with_distances = [
    (res["entity"]["text"], res["distance"]) for res in search_res[0]
]
print(json.dumps(retrieved_lines_with_distances, indent=4))

### Use LLM to get a RAG response

Convert the retrieved documents into a string format.

In [None]:
context = "\n".join(
    [line_with_distance[0] for line_with_distance in retrieved_lines_with_distances]
)

Define system and user prompts for the Language Model. This prompt is assembled with the retrieved documents from Milvus.

In [None]:
SYSTEM_PROMPT = """
Human: You are an AI assistant. You are able to find answers to the questions from the contextual passage snippets provided.
"""
USER_PROMPT = f"""
Use the following pieces of information enclosed in <context> tags to provide an answer to the question enclosed in <question> tags.
<context>
{context}
</context>
<question>
{question}
</question>
"""

In [None]:
from ollama import chat
from ollama import ChatResponse

response: ChatResponse = chat(
    model='gemma3',
    messages=[
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": USER_PROMPT},
    ],
)
print(response['message']['content'])
# or access fields directly from the response object
print(response.message.content)