# Setting up Environment


In [1]:
from langchain_groq import ChatGroq
import os
from dotenv import load_dotenv

load_dotenv()
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["USER_AGENT"] = "myagent"
os.environ["TOKENIZERS_PARALLELISM"] = "false"

os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")

llm = ChatGroq(
    model="llama-3.3-70b-versatile",
    temperature=0.3,  # Reduce randomness
    max_tokens=512  # Limit response length
)

# Loading Documents


In [2]:
from langchain_community.document_loaders import PyMuPDFLoader

file_paths = [
    "../dataset/docs/prometheusDocs.pdf",
    "../dataset/docs/promqlCheatSheet.pdf",
    "../dataset/docs/prometheusBetterstack.pdf",
]

pdf_documents = []

# Loop through each file path and load the PDFs
for file_path in file_paths:
    loader = PyMuPDFLoader(file_path)
    documents = loader.load()
    pdf_documents.extend(documents)

print(f"Loaded {len(pdf_documents)} pages across all PDFs")



In [3]:
from langchain_community.document_loaders.csv_loader import CSVLoader

metric_csv_loader = CSVLoader(file_path="../dataset/metric_name.csv")

metric_csv_documents = metric_csv_loader.load()

# Chunking


In [4]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    add_start_index=True,
)

pdf_chunks = text_splitter.split_documents(pdf_documents)


metric_csv_chunks = text_splitter.split_documents(metric_csv_documents)

print(len(pdf_chunks))
print(len(metric_csv_chunks))



# Embedding and Storing


In [5]:
from langchain_milvus import Milvus
from langchain_huggingface import HuggingFaceEmbeddings

model_name = "BAAI/bge-m3"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': True}

embedding = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

URI = "./milvus_promql.db"

pdf_vectorstore = Milvus(
    embedding_function=embedding,
    collection_name="prometheus_pdf_docs",
    connection_args={"uri": URI},
    drop_old=True,
    auto_id=True
)
pdf_vectorstore.add_documents(documents=pdf_chunks)

metric_csv_vectorstore = Milvus(
    embedding_function=embedding,
    collection_name="prometheus_metric_csv_docs",
    connection_args={"uri": URI},
    drop_old=True,
    auto_id=True
)
metric_csv_vectorstore.add_documents(documents=metric_csv_chunks)

print("Finished adding documents to Milvus")





# Retriever


In [6]:
from rank_bm25 import BM25Okapi

def bm25_embedding_rerank_documents(query: str, top_n: int = 4, vectorstore: Milvus = None) -> str:
    """
    Retrieve documents using embedding similarity (from the vectorstore) and rerank them
    using BM25 scores combined with embedding similarity scores.

    Parameters:
      query: The user query.
      top_n: Number of top documents to return.
      vectorstore: The vectorstore to use for retrieving documents.

    Returns:
      A string combining the page_content of the top_n reranked documents.
    """
    if vectorstore is None:
        return "No vectorstore provided."

    # Retrieve top 10 docs (with scores) from the vectorstore
    docs_with_scores = vectorstore.similarity_search_with_relevance_scores(query, k=10)

    if not docs_with_scores:
        return "No documents found."

    # Separate documents and distances
    retrieved_docs = [doc for doc, _ in docs_with_scores]
    embedding_sims = [score for _, score in docs_with_scores]

    # Prepare corpus for BM25 (splitting on whitespace)
    doc_texts = [doc.page_content for doc in retrieved_docs]
    tokenized_corpus = [text.split() for text in doc_texts]
    tokenized_query = query.split()

    # Compute BM25 scores
    bm25 = BM25Okapi(tokenized_corpus)
    bm25_scores = bm25.get_scores(tokenized_query)

    # Combine BM25 and embedding similarity scores using a weighted sum
    combined_scores = []
    for doc, bm25_score, emb_sim in zip(retrieved_docs, bm25_scores, embedding_sims):
        final_score = 0.5 * bm25_score + 0.5 * emb_sim
        combined_scores.append((doc, final_score))

    # Sort documents by the combined score in descending order
    reranked = sorted(combined_scores, key=lambda x: x[1], reverse=True)
    # Get the top_n documents
    top_docs = [doc for doc, _ in reranked[:top_n]]

    # Return a concatenated string of the top documents' content
    return "\n\n".join(doc.page_content for doc in top_docs)

# Documentation Retriever Tool


In [7]:
from langchain.tools import Tool

def bm25_embedding_rerank_docs(query: str, top_n: int = 2, vectorstore= pdf_vectorstore) -> str:
    """
    Retrieves and reranks documents from the official documentation.
    """
    return bm25_embedding_rerank_documents(query, top_n, vectorstore)


docs_tool = Tool(
    name="Documentation_Reference",
    func=bm25_embedding_rerank_docs,
    description="Use for SYNTAX/FUNCTION REFERENCES when needing operator usage, function parameters, or conceptual explanations."
)

# Metric Name Retriever Tool


In [8]:
def bm25_embedding_rerank_metrics(query: str, top_n: int = 3, vectorstore = metric_csv_vectorstore) -> str:
    """
    Retrieves and reranks metric names from the vectorstore.
    """
    return bm25_embedding_rerank_documents(query, top_n, vectorstore)


metrics_tool = Tool(
    name="Metric_Resolver",
    func=bm25_embedding_rerank_metrics,
    description="Use for IDENTIFYING RELEVANT METRIC NAMES when query refers to specific metrics or measurements."
)

# Query Generation Tool


In [9]:
def generate_promql_query(query: str) -> str:
    """
    Converts a natural language query into an optimized PromQL query.
    Ensures syntactic correctness, efficiency, and adherence to PromQL best practices.
    Returns only the refined PromQL query text.
    """
    prompt = f"""
    You are an expert in PromQL query generation. Convert the given natural language request into a valid, efficient, and optimized PromQL query.

    - Ensure syntactic correctness and adherence to PromQL best practices.
    - Use appropriate aggregations (e.g., sum, rate) and filters.
    - Avoid inefficient operations like unnecessary subqueries.
    - Return only the PromQL query without explanations.

    Examples:
    - Input: "Total CPU usage in the last 5 minutes?"
      Output: sum(rate(cpu_usage[5m]))
    - Input: "What is the 95th percentile of request duration over the last 10 minutes?"
      Output: histogram_quantile(0.95, sum(rate(request_duration_bucket[10m])) by (le))
    - Input: "Average memory usage per node over the last hour?"
      Output: avg_over_time(node_memory_usage[1h])

    Natural Language Query: "{query}"

    PromQL Query:
    """

    generated_query = llm.invoke(prompt)
    return generated_query.content.strip()


query_generation_tool = Tool(
    name="PromQL_Query_Generator",
    func=generate_promql_query,
    description="Use to DIRECTLY CONVERT well-specified requirements to PromQL when confident, or after consulting examples/references."
)

# Syntax Correction Tool

In [None]:
def fix_promql_syntax(query: str) -> str:
    """
    Analyzes and corrects the syntax of a PromQL query.
    If the query is syntactically correct, returns it unchanged.
    Otherwise, fixes any syntax errors and returns the corrected query.
    """
    prompt = f"""
    You are a PromQL syntax expert. Analyze the following PromQL query:

    {query}

    The query might contain syntax errors, including mismatches between scalars and vectors 
    (for example, using a function that expects a scalar but receiving a vector, or vice versa),
    or operator/function usage issues. If the query is syntactically correct, return it unchanged.
    Otherwise, fix all syntax errors and return only the corrected query.

    Examples:
    - Input: "sum_over_time(active_users[5m])"
      Output: "sum_over_time(active_users[5m])"
    - Input: "sum(active_users[5m])"
      Output: "sum_over_time(active_users[5m])"
    """
    
    generated_query = llm.invoke(prompt)
    return generated_query.content.strip()


promql_syntax_correction_tool = Tool(
    name="PromQL_Syntax_Corrector",
    func=fix_promql_syntax,
    description="Use this tool to correct syntax errors in PromQL queries, ensuring they are valid and optimized according to PromQL best practices."
)

# Agent Prompt


In [11]:
from langchain_core.prompts import PromptTemplate

template = '''You are an expert PromQL engineer assisting with metric query creation. Strictly Follow these guidelines:
1. Prefer direct generation for simple/clear requests
2. Consult references only when uncertain about syntax or patterns or metrics name
3. Never use more than 5 actions total
4. Never use the same tool consecutively 

Available Tools:
{tools}

Response Format:
Question: Natural language query to convert
Thought: Your analysis of requirements and approach
Action: Tool name (choose from [{tool_names}])
Action Input: Input for the tool
Observation: Tool's response
... (repeat as needed, but never perform same action consecutively)
Thought: I now know the final answer
Final Answer: ONLY the final PromQL query

Special Cases:
- If query seems complete/valid after generation: STOP IMMEDIATELY
- If references disagree with generation: PRIORITIZE references
- If tools return irrelevant info: Trust generation
- If conflicting info: FLAG uncertainty in thought process

Begin!

Question: {input}
Thought:{agent_scratchpad}'''

agent_prompt = PromptTemplate.from_template(template)

# Agent


In [None]:
from langchain.agents import AgentExecutor, create_react_agent

tools = [docs_tool, metrics_tool, query_generation_tool, promql_syntax_correction_tool]

model = llm

agent = create_react_agent(model, tools, agent_prompt)
agent_executor = AgentExecutor(
    agent=agent,
    tools=tools,
    max_iterations=7,  # Hard stop after 7 steps
    early_stopping_method="generate",
    handle_parsing_errors="Return best available answer immediately"  # Add error recovery
)

In [13]:
example_query = "Average request latency over the last 24 hours"
agent_executor.invoke({"input": example_query})



In [14]:
import pandas as pd
from tqdm import tqdm

df = pd.read_csv("../evaluation/test_queries.csv")

if 'agentic_rag_v2_output' not in df.columns:
    df['agentic_rag_v2_output'] = [''] * len(df)

batch_size = 5
num_batches = (len(df) + batch_size - 1) // batch_size

for batch_num in range(num_batches):
    start_idx = batch_num * batch_size
    end_idx = min((batch_num + 1) * batch_size, len(df))
    batch_indices = range(start_idx, end_idx)

    agentic_rag_v2_outputs = []

    for idx in tqdm(batch_indices, desc=f"Processing Batch {batch_num + 1}/{num_batches}"):
        if pd.notna(df.loc[idx, 'agentic_rag_v2_output']) and df.loc[idx, 'agentic_rag_v2_output'].strip():
            agentic_rag_v2_outputs.append(df.loc[idx, 'agentic_rag_v2_output'])
            continue

        query = df.loc[idx, 'nl_query']
        
        try:
            result = agent_executor.invoke({"input": query})
            # Extract the 'output' field from the result dictionary.
            agentic_rag_v2_final_output = result["output"] if isinstance(result, dict) else result
        except Exception as e:
            agentic_rag_v2_final_output = "ERROR"

        agentic_rag_v2_outputs.append(agentic_rag_v2_final_output)

    # Update the DataFrame with new results
    df.iloc[start_idx:end_idx, df.columns.get_loc('agentic_rag_v2_output')] = agentic_rag_v2_outputs
    
    # Save progress after each batch
    df.to_csv("../evaluation/test_queries.csv", index=False)

print("Processing complete!")








