# Setting up Environment

In [1]:
import os
from dotenv import load_dotenv

load_dotenv()
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["USER_AGENT"] = "myagent"
os.environ["TOKENIZERS_PARALLELISM"] = "false"

os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")

from langchain_groq import ChatGroq

llm = ChatGroq(
    model="llama-3.1-8b-instant",
    temperature=0.3,  # Reduce randomness
    max_tokens=512  # Limit response length
)

# Loading Documents

In [2]:
from langchain_community.document_loaders.csv_loader import CSVLoader

csv_loader = CSVLoader(file_path="../dataset/sample_promql_queries.csv")

csv_documents = csv_loader.load()

for doc in csv_documents:
    doc.metadata["source"] = "csv"

In [3]:
from langchain_community.document_loaders.csv_loader import CSVLoader

metric_csv_loader = CSVLoader(file_path="../dataset/metric_name.csv")

metric_csv_documents = metric_csv_loader.load()

for doc in metric_csv_documents:
    doc.metadata["source"] = "metric"

In [4]:
import bs4
from langchain_community.document_loaders import WebBaseLoader

urls = [
    "https://prometheus.io/docs/prometheus/latest/querying/basics/",
    "https://prometheus.io/docs/prometheus/latest/querying/operators/",
    "https://prometheus.io/docs/prometheus/latest/querying/functions/",
    "https://prometheus.io/docs/prometheus/latest/querying/examples/",
    "https://prometheus.io/docs/prometheus/latest/querying/api/",
    "https://prometheus.io/docs/prometheus/latest/http_sd/",
    "https://promlabs.com/promql-cheat-sheet/",
]

bs4_strainer = bs4.SoupStrainer(["h2", "h3", "p", "ul", "code"])

web_loader = WebBaseLoader(
    web_paths=urls,
    bs_kwargs={"parse_only": bs4_strainer}
)

web_documents = web_loader.load()

for doc in web_documents:
    doc.metadata["source"] = "documentation"

In [5]:
documents = csv_documents + web_documents + metric_csv_documents

# Chunking

In [6]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    add_start_index=True,
)

docs = text_splitter.split_documents(documents)

print(len(docs))
# print(docs[1151])

1176


# Embedding and Storing

In [7]:
from langchain_chroma import Chroma
from langchain_huggingface import HuggingFaceEmbeddings

hf_embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

vectorstore = Chroma.from_documents(documents=docs, embedding=hf_embeddings)

# Retriever

In [8]:
from rank_bm25 import BM25Okapi

def bm25_embedding_rerank_documents(query: str, top_n: int = 4, filter: dict = None) -> str:
    """
    Retrieve documents using embedding similarity (from the vectorstore) and rerank them
    using BM25 scores combined with embedding similarity scores.
    
    Parameters:
      query: The user query.
      top_n: Number of top documents to return.
      filter: Optional dictionary to filter documents by metadata (e.g. {"source": "csv"}).
      
    Returns:
      A string combining the page_content of the top_n reranked documents.
    """
    # Retrieve top 10 docs (with scores) from the vectorstore using the provided filter
    docs_with_scores = vectorstore.similarity_search_with_score(query, k=10, filter=filter)
    
    if not docs_with_scores:
        return "No documents found."

    # Separate documents and distances
    retrieved_docs = [doc for doc, _ in docs_with_scores]
    distances = [dist for _, dist in docs_with_scores]

    # Convert distance to a similarity score (higher is better)
    embedding_sims = []
    for d in distances:
        # Ensure distance is positive
        if d < 0:
            d = abs(d)
        similarity = 1 / (1 + d)
        embedding_sims.append(similarity)

    # Prepare corpus for BM25 (splitting on whitespace)
    doc_texts = [doc.page_content for doc in retrieved_docs]
    tokenized_corpus = [text.split() for text in doc_texts]
    tokenized_query = query.split()

    # Compute BM25 scores
    bm25 = BM25Okapi(tokenized_corpus)
    bm25_scores = bm25.get_scores(tokenized_query)

    # Combine BM25 and embedding similarity scores using a weighted sum
    combined_scores = []
    for doc, bm25_score, emb_sim in zip(retrieved_docs, bm25_scores, embedding_sims):
        final_score = 0.5 * bm25_score + 0.5 * emb_sim
        combined_scores.append((doc, final_score))

    # Sort documents by the combined score in descending order
    reranked = sorted(combined_scores, key=lambda x: x[1], reverse=True)
    # Get the top_n documents
    top_docs = [doc for doc, _ in reranked[:top_n]]

    # Return a concatenated string of the top documents' content
    return "\n\n".join(doc.page_content for doc in top_docs)

# CSV Retriever Tool

In [9]:
from langchain.tools import Tool

def bm25_embedding_rerank_csv(query: str, top_n: int = 3) -> str:
    """
    Retrieves and reranks documents from the CSV dataset (where metadata "source" == "csv").
    """
    return bm25_embedding_rerank_documents(query, top_n, filter={"source": "csv"})

csv_tool = Tool(
    name="Example_Queries",
    func=bm25_embedding_rerank_csv,
    description="Use for EXISTING QUERY EXAMPLES when user provides similar scenarios. Input: natural language question matching known patterns."
)

# Documentation Retriever Tool

In [10]:
def bm25_embedding_rerank_docs(query: str, top_n: int = 2) -> str:
    """
    Retrieves and reranks documents from the official documentation (where metadata "source" == "documentation").
    """
    return bm25_embedding_rerank_documents(query, top_n, filter={"source": "documentation"})

docs_tool = Tool(
    name="Documentation_Reference",
    func=bm25_embedding_rerank_docs,
    description="Use for SYNTAX/FUNCTION REFERENCES when needing operator usage, function parameters, or conceptual explanations."
)

# Metric Name Retriever Tool

In [11]:
def bm25_embedding_rerank_metrics(query: str, top_n: int = 3) -> str:
    """
    Retrieves and reranks metric names from the vectorstore 
    (where metadata "source" == "metric")
    """
    return bm25_embedding_rerank_documents(query, top_n, filter={"source": "metric"}
    )

metrics_tool = Tool(
    name="Metric_Resolver",
    func=bm25_embedding_rerank_metrics,
    description="Use for IDENTIFYING RELEVANT METRIC NAMES when query refers to specific metrics or measurements."
)

# Query Generation Tool

In [12]:
def generate_promql_query(query: str) -> str:
    """
    Converts a natural language query into an optimized PromQL query.
    Ensures syntactic correctness, efficiency, and adherence to PromQL best practices.
    Returns only the refined PromQL query text.
    """
    prompt = f"""
    You are an expert in PromQL query generation. Convert the given natural language request into a valid, efficient, and optimized PromQL query.

    - Ensure syntactic correctness and adherence to PromQL best practices.
    - Use appropriate aggregations (e.g., sum, rate) and filters.
    - Avoid inefficient operations like unnecessary subqueries.
    - Return only the PromQL query without explanations.

    Examples:
    - Input: "Total CPU usage in the last 5 minutes?"
      Output: sum(rate(cpu_usage[5m]))
    - Input: "What is the 95th percentile of request duration over the last 10 minutes?"
      Output: histogram_quantile(0.95, sum(rate(request_duration_bucket[10m])) by (le))
    - Input: "Average memory usage per node over the last hour?"
      Output: avg_over_time(node_memory_usage[1h])

    Natural Language Query: "{query}"

    PromQL Query:
    """
    
    generated_query = llm.invoke(prompt)
    return generated_query.content.strip()


query_generation_tool = Tool(
    name="PromQL_Query_Generator",
    func=generate_promql_query,
    description="Use to DIRECTLY CONVERT well-specified requirements to PromQL when confident, or after consulting examples/references."
)

# Agent Prompt

In [13]:
from langchain_core.prompts import PromptTemplate

template = '''You are an expert PromQL engineer assisting with metric query creation. Strictly Follow these guidelines:
1. Prefer direct generation for simple/clear requests
2. Consult references only when uncertain about syntax or patterns or metrics name
3. Never use more than 5 actions total
4. Never use the same tool consecutively 

Available Tools:
{tools}

Response Format:
Question: Natural language query to convert
Thought: Your analysis of requirements and approach
Action: Tool name (choose from [{tool_names}])
Action Input: Input for the tool
Observation: Tool's response
... (repeat as needed, but never perform same action consecutively)
Thought: I now know the final answer
Final Answer: ONLY the final PromQL query

Special Cases:
- If query seems complete/valid after generation: STOP IMMEDIATELY
- If references disagree with generation: PRIORITIZE references
- If tools return irrelevant info → Trust generation
- If conflicting info: FLAG uncertainty in thought process

Begin!

Question: {input}
Thought:{agent_scratchpad}'''

agent_prompt = PromptTemplate.from_template(template)

# Agent

In [14]:
from langchain.agents import AgentExecutor, create_react_agent

tools = [csv_tool, docs_tool, query_generation_tool, metrics_tool]

model = llm

agent = create_react_agent(model, tools, agent_prompt)
agent_executor = AgentExecutor(
    agent=agent, 
    tools=tools, 
    verbose=True,
    max_iterations=7,  # Hard stop after 7 steps
    early_stopping_method="generate",
    handle_parsing_errors="Return best available answer immediately" # Add error recovery
)

In [16]:
example_query = "Histogram quantile of 0.75 for the 'payment-processing' service response times in the last 5 minutes"
# example_query = "Total database query time in the last 5 minute"
agent_executor.invoke({"input": example_query})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: The query seems to be asking for the 75th percentile of response times for the 'payment-processing' service over the last 5 minutes. This involves identifying the relevant metric name, determining the quantile function, and specifying the time range.

Action: Metric_Resolver
Action Input: response times for the 'payment-processing' service[0m[36;1m[1;3mdescription: Measures the response time for checkout operations.
metric_name: checkout_response_time

description: Measures the response time for HTTP requests.
metric_name: http_response_time

description: Measures the response time for API calls.
metric_name: api_response_time[0m[32;1m[1;3mThought: Based on the Metric Resolver's output, it seems that there are multiple relevant metrics related to response times. However, the query specifically mentions the 'payment-processing' service, which suggests that 'checkout_response_time' might be the most relevant metr

{'input': "Histogram quantile of 0.75 for the 'payment-processing' service response times in the last 5 minutes",
 'output': 'histogram_quantile(0.75, sum(rate(checkout_response_time[5m])) by (le))'}