In [None]:
import operator
from typing import Annotated, List, TypedDict, Union, Optional
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_openai import ChatOpenAI
import functools

### Agents
from langchain.agents import AgentExecutor, create_openai_tools_agent
from langchain_core.messages import BaseMessage, HumanMessage
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
##

### Tools
from langchain_core.tools import tool
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_experimental.tools import PythonREPLTool
# Utils
from pathlib import Path
###

from langgraph.constants import Send
from langgraph.graph import END, StateGraph, START, MessagesState

In [None]:
import os
from dotenv import load_dotenv
os.environ["LANGCHAIN_TRACING_V2"] = "false"
load_dotenv() ## ensure that your .env file in the same directory as this notebook has the OpenAI API Key

In [None]:
# tool utils

WORKING_DIRECTORY = Path('./playground/single_trace/dsph_fL1_orig/')

# tavily_tool = TavilySearchResults(max_results=5) # TODO: get TavilyAPIKey
python_repl_tool = PythonREPLTool()

@tool
def scrape_webpages(urls: List[str]) -> str:
    """Use requests and bs4 to scrape the provided web pages for detailed information."""
    loader = WebBaseLoader(urls)
    docs = loader.load()
    return "\n\n".join(
        [
            f'\n{doc.page_content}\n'
            for doc in docs
        ]
    )

@tool # DEP:
def get_telemetry(queries: List[str]) -> str: 
    """Use Alessandro's API to request samples of traces and/or metrics (gauges or counters) from prometheus and jaeger.
    @TODO: refine type signature for input to be a tuple of counted calls
    @TODO: implement the actual functionality
    """
    pass


## -- File System Interaction Toolkit

# This will be run before the anomaly enumeration agents
# It makes it so they are more aware of the current state
# of the working directory.
def prelude(state):
    written_files = []
    if not WORKING_DIRECTORY.exists():
        WORKING_DIRECTORY.mkdir()
    try:
        written_files = [
            f.relative_to(WORKING_DIRECTORY) for f in WORKING_DIRECTORY.rglob("*")
        ]
    except Exception:
        pass
    if not written_files:
        return {**state, "current_files": "No files written."}
    return {
        **state,
        "current_files": "\nBelow are the files that exist in metrics filesystem DB:\n"
        + "\n".join([f" - {f}" for f in written_files]),
    }

# TODO: we need to experiment about whether we need to read this at all.
@tool
def read_document(
    file_name: Annotated[str, "File path to save the document."],
    start: Annotated[Optional[int], "The start line. Default is 0"] = None,
    end: Annotated[Optional[int], "The end line. Default is None"] = None,
) -> str:
    """Read the specified document."""
    with (WORKING_DIRECTORY / file_name).open("r") as file:
        lines = file.readlines()
    if start is not None:
        start = 0
    return "\n".join(lines[start:end])

In [None]:
# LLM setup 
llm = ChatOpenAI(model="gpt-4o")

In [None]:
def create_agent(llm: ChatOpenAI, tools: list, system_prompt: str):
    # Each worker node will be given a name and some tools.
    prompt = ChatPromptTemplate.from_messages(
        [
            (
                "system",
                system_prompt,
            ),
            MessagesPlaceholder(variable_name="messages"),
            MessagesPlaceholder(variable_name="agent_scratchpad"),
        ]
    )
    agent = create_openai_tools_agent(llm, tools, prompt)
    executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
    return executor

def agent_node(state, agent, name):
    result = agent.invoke(state)
    return {"messages": [HumanMessage(content=result["output"], name=name)]}

In [None]:
# gt_prompt = (
#     "You are a telemeter tasked with using the provided tool to retrieve"
#     " all requested metrics and traces by selecting and using the appropriate"
#     " Jaeger and/or Prometheus API endpoints."
# )

# telemetry_retriever = create_agent(llm, [get_telemetry], gt_prompt)

In [None]:
anomdet_prompt = (
    "You are an expert at detecting anomalies in microservice application"
    " traces and metrics. You will be provided a description of an SLO violation"
    " as well as telemetrics to mark anomalies. Specifically, you will be provided"
    " with a working directory's content comprising:"
    "   1. `main_traces.csv`: which will summarize Jaeger traces for all calls in"
    "       our workload. This will include the following headers, respectively:"
    "       traceID,duration-ms,startTime,endTime,rpcErrors,operation,services_involved"
    "   2. A file for each services Prometheus metrics, exported by cAvisor. The name"
    "       the file will be the name of the microservice whose system performance"
    "       metrics we are collecting."
    " Use the provided tools and write and execute any safe code necessary to investigate"
    " potential anomalies. Please mark any anomalies you find by returning their trace IDs"
    " in the case of traces, or by returning the value ranges you think are anomalous in"
    " the case of system metrics in a simple list. Elaborate in a simple summary your rationale"
    " for marking anomalies. If anomalies have not occured, please return a FALSE."
)

substitute_prompt = """
You are expert at detecting anomalies in Jaeger application traces. You will be given a file in a directory to detect if its trace is anomalous.
The trace tree consists of: traceID,duration-ms,startTime,endTime,rpcErrors,operation,services_involved. Be mindful of the fact that the
traces provided will be logically cohesive. Anomalies may be in the form of errors or extreme bottlenecks, recursive retries, and otherwise.
Please employ all provided tools, code containing any statistical testing or otherwise, read document, do all that is necessary to analyze this trace tree and
identify potential anomalies. 

The following files have been provided: {current_files}
"""


substitute_prompt_for_baseline = """
You are expert at detecting anomalies in Jaeger application traces. You will be given a file in a directory to detect if its trace is anomalous.
The trace tree for spansconsists of: traceID,spanID, flags, operationName, references, startTime,duration-ms, tags, logs, warnings. Services involved 
can be deduced by usign the processID and then looking at the processes part of the JSON. Be mindful of the fact that the traces provided will be logically cohesive.
Further be mindful that the trace files are extremely large and need to be interacted with programmatically so as to conserve your context window. Anomalies may be inthe form of errors or extreme bottlenecks, recursive retries, and otherwise.
Please employ all provided tools, code containing any statistical testing or otherwise, do all that is necessary to analyze this trace tree and
identify potential anomalies. 

The following files have been provided: {current_files}
"""
# in the baseline we remove the agents ability to read docs.
anomaly_detector = create_agent(llm, [python_repl_tool], substitute_prompt_for_baseline)
# anomaly_detector = create_agent(llm, [read_document,python_repl_tool], substitute_prompt_for_baseline)
context_aware_anomaly_marker = prelude | anomaly_detector
anomaly_detection_agent = functools.partial(agent_node, agent=context_aware_anomaly_marker, name="AnomalyDetector")


In [None]:
class AnomalousTrace(TypedDict):
    trace_id: str
    justification: str

class AnomalousMetric(TypedDict):
    service: str
    start_time: int
    end_time: int
    justification: str
    
class Anomalies(BaseModel):
    anomaly_occured: bool
    anomalous_traces: Annotated[Optional[List[AnomalousTrace]], "A list of anomolous TraceIDs"]
    anomalous_system: Annotated[Optional[List[AnomalousMetric]], "ranges of anomalous system metrics"]
    commentary: str
    
# class NonAnomaly(BaseModel):
#     anomaly_occured: bool
#     pass

# AnomalyDetectorOut = Annotated[Union[Anomaly, NonAnomaly], Field(discriminator='anomaly_occured')]

In [None]:
response = anomaly_detection_agent({"messages": [("user","Please check if the traces in the provided directory are anomalous.")]})
print(response)

In [None]:
response