In [1]:
from glob import glob

text_lines = []

for file_path in glob("milvus_docs/en/faq/*.md", recursive=True):
    with open(file_path, "r") as file:
        file_text = file.read()

    text_lines += file_text.split("# ")

In [3]:
import requests

def get_embedding(text, model="nomic-embed-text:latest"):
    response = requests.post(
        "http://localhost:11434/api/embeddings",
        json={
            "model": model,
            "prompt": text
        }
    )
    return response.json()
result = get_embedding("Hey, What are you doing?", model="nomic-embed-text:latest")
embedding_dim = len(result["embedding"])
print(embedding_dim)
print(result["embedding"][:10])

768
[-0.14856210350990295, -0.859839677810669, -3.4035048484802246, 0.5005109310150146, 1.3136088848114014, -0.5431387424468994, 1.047728180885315, -0.663569986820221, 0.15675599873065948, -1.0483444929122925]


In [5]:
from pymilvus import MilvusClient

milvus_client = MilvusClient(uri="http://localhost:19530")

collection_name = "my_rag_collection"

In [6]:
if milvus_client.has_collection(collection_name):
    milvus_client.drop_collection(collection_name)

In [7]:
milvus_client.create_collection(
    collection_name=collection_name,
    dimension=embedding_dim,
    metric_type="IP"
)

In [9]:
from tqdm import tqdm

data = []

for i, line in enumerate(tqdm(text_lines, desc="Creating embeddings")):
    data.append({"id": i, "vector": get_embedding(line)["embedding"], "text": line})

milvus_client.insert(collection_name=collection_name, data=data)

Creating embeddings: 100%|██████████| 72/72 [00:05<00:00, 12.08it/s]


{'insert_count': 72, 'ids': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71], 'cost': 0}

In [10]:
question = "How is data stored in milvus?"

search_res = milvus_client.search(
    collection_name=collection_name,
    data=[
        get_embedding(question)["embedding"]
    ],
    limit=3,
    search_params={"metric_type": "IP", "params": {}},
    output_fields=["text"],
)

In [11]:
import json

retrieved_lines_with_distances = [
    (res["entity"]["text"], res["distance"]) for res in search_res[0]
]
print(json.dumps(retrieved_lines_with_distances, indent=4))

[
    [
        " Where does Milvus store data?\n\nMilvus deals with two types of data, inserted data and metadata. \n\nInserted data, including vector data, scalar data, and collection-specific schema, are stored in persistent storage as incremental log. Milvus supports multiple object storage backends, including [MinIO](https://min.io/), [AWS S3](https://aws.amazon.com/s3/?nc1=h_ls), [Google Cloud Storage](https://cloud.google.com/storage?hl=en#object-storage-for-companies-of-all-sizes) (GCS), [Azure Blob Storage](https://azure.microsoft.com/en-us/products/storage/blobs), [Alibaba Cloud OSS](https://www.alibabacloud.com/product/object-storage-service), and [Tencent Cloud Object Storage](https://www.tencentcloud.com/products/cos) (COS).\n\nMetadata are generated within Milvus. Each Milvus module has its own metadata that are stored in etcd.\n\n###",
        353.2702941894531
    ],
    [
        "What data types does Milvus support on the primary key field?\n\nIn current release, Milv

In [12]:
context = "\n".join(
    [line_with_distance[0] for line_with_distance in retrieved_lines_with_distances]
)

In [13]:
SYSTEM_PROMPT = """
Human: You are an AI assistant. You are able to find answers to the questions from the contextual passage snippet provided.
"""
USER_PROMPT = f"""
Use the following pieces of information enclosed in <context> tags to provide an answer to the question enclosed in <question> tags.
<context>
{context}
</context>
<question>
{question}
</question>
"""

In [16]:
from litellm import completion

chat_stream = completion(
    model="ollama_chat/llama3.2:3b",
    messages=[
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": USER_PROMPT},
    ],
    api_base="http://localhost:11434",
    strem=True
)


In [23]:
for chunk in chat_stream:
    print(chunk)

('id', 'chatcmpl-74dd9734-643a-4f9d-bcfa-7d728ac85752')
('created', 1758222658)
('model', 'ollama_chat/llama3.2:3b')
('object', 'chat.completion')
('system_fingerprint', None)
('choices', [Choices(finish_reason='stop', index=0, message=Message(content='According to the provided contextual passage snippet, data in Milvus is stored as follows:\n\n- Inserted data (including vector data, scalar data, and collection-specific schema) are stored in persistent storage as incremental log.\n- Metadata are generated within Milvus and are stored in etcd.\n\nIn other words, inserted data is stored in a persistent storage backend such as MinIO, AWS S3, Google Cloud Storage, Azure Blob Storage, Alibaba Cloud OSS, or Tencent Cloud Object Storage, while metadata are stored in the etcd key-value store.', role='assistant', tool_calls=None, function_call=None, provider_specific_fields=None))])
('usage', Usage(completion_tokens=112, prompt_tokens=434, total_tokens=546, completion_tokens_details=None, promp

In [24]:
milvus_client.drop_collection(collection_name)