In [None]:
# Import LLM libraries
from langchain_openai import ChatOpenAI
from langgraph.prebuilt import create_react_agent
from langchain_core.messages import SystemMessage, AIMessage, HumanMessage, ToolMessage

In [None]:
import time
import json

In [None]:
import pandas as pd
import warnings

In [None]:
from tqdm import tqdm

In [None]:
from langchain_community.tools.tavily_search import TavilySearchResults

In [None]:
import os
os.environ["LANGCHAIN_PROJECT"]="LLM_TEST-gpt4-turbo"

In [None]:
warnings.filterwarnings('ignore')

In [None]:
model = "gpt-4-turbo"

In [None]:
llm = ChatOpenAI(model=model, temperature=0, api_key="set_your_key_here")

In [None]:
test_runs = [str(i).zfill(2) for i in range(1, 11)]

In [None]:
%run ../tool_list_operators-struct.ipynb

In [None]:
%run ../tool_summarize_states-struct.ipynb

In [None]:
%run ../tool_prometheus-struct.ipynb

In [None]:
%run ../tool_mlasp-struct.ipynb

In [None]:
warnings.filterwarnings("ignore", category=DeprecationWarning)

In [None]:
%run ../tool_rag-struct.ipynb

In [None]:
#tools = [TavilySearchResults(max_results=1), tool_operators_list]

In [None]:
tools = [tool_operators_list,
         tool_namespace_pods_summary, tool_namespace_svc_summary, 
         tool_prometheus_all_metrics, tool_prometheus_metric_range, tool_time_value,
         tool_plot_prometheus_metric_range_as_file,
         tool_mlasp_config,
         tool_retriever
        ]

In [None]:
sys_message = SystemMessage(content="You are a helpfull assistant called Dave.")

In [None]:
agent_react_chat = create_react_agent(llm, tools, state_modifier=sys_message)

In [None]:
questions = {
    "Q-01": "Hi, who are you?",
    "Q-02": "What tools do you have access to?",
    "Q-03": "Give me the list of tools you have access to.",
    "Q-04": "Give me the list and a short description of the tools you have access to.",
    "Q-05": "What operators are in namespace demo?",
    "Q-06": "What operators are in namespace demo? Please provide only the name and the version for each operator.",
    "Q-07": "How can I create a Data Science Project?",
    "Q-08": "Can you describe Paris in 100 words or less?",
    "Q-09": "Is there a river?",
    "Q-10": "Tell me about the pods in namespace demo.",
    "Q-11": "Give me a summary of the running pods in namespace demo. Please include service and route information in the response.",
    "Q-12": "Give me the complete summary of the pods in namespace demo.",
    "Q-13": "Give me a summary of the running pods in namespace demo. Give me only the names and the route if they have one.",
    "Q-14": "What day is today?",
    "Q-15": "What is the current date time?",
    "Q-16": "What is the current timestamp?",
    "Q-17": "What is the timestamp and date time for 3 hours ago?",
    "Q-18": "What is the timestamp and date time for 3 hours from now?",
    "Q-19": "What is the timestamp and date time for 3 hours ago?",
    "Q-20": "Is there a prometheus service running in namespace demo? If so, give me its name and port values.",
    "Q-21": "Find out the service name and port number of the Prometheus service running in namespace demo. Then use that information to retrieve the list of metrics filtered by namespace demo.",
    "Q-22": "Find out the Prometheus service name and port number running in namespace demo. Give me all the metrics stored by it that have a name that starts with load_generator.",
    "Q-23": "What configuration of WireMock supports a throughput KPI of 307 within a 2.9 percent precision? Search for 100 epochs to find the result.",
    "Q-24": "Find out the Prometheus service name and port number running in namespace demo. Use it to to plot all the prometheus metric data for the metric load_generator_total_msg starting 40 days ago until now. Return only the file name and nothing else.",
    "Q-25": "Find out the Prometheus service name and port number running in namespace demo. Use that to get all the prometheus metric data for the metric load_generator_total_msg starting 40 days ago until now. Print out only the metric values and their associated timestamp as a CSV table.",
}

In [None]:
def process_request(request):
    """
    Process an LLM agent request/response.
    :param request: the request to pass on to the LLM agent
    :return: a dictionary with various metadata 
    """
    is_success = 0
    start = time.time()
    final_response = ""

    try:
        messages = agent_react_chat.invoke({"messages": [("user", request)]})
        stop = time.time()
        is_success = 1
    except Exception as e:
        final_response = str(e)
        stop = time.time()

    elapsed = stop-start
    metadata = {}

    try:
        total_tokens = 0
        msg_counter = 1
        msg_sequence = []
        ai_tool_calls = {}
        tool_call_responses = {}

        for msg in messages["messages"]:
            msg_sequence.append(str(type(msg)))
            if isinstance(msg, AIMessage):
                t_call = {"name": "AI_did_not_call_any_tools"}
                for item in msg.tool_calls:
                    t_call = item
                ai_tool_calls[msg_counter] = t_call
                msg_counter += 1
                total_tokens += msg.usage_metadata["total_tokens"]
            if isinstance(msg, ToolMessage):
                tool_call_responses[msg.tool_call_id] = msg.content

        if is_success == 1:
            final_response = messages["messages"][-1].content

        metadata['final_response'] = final_response
        metadata['total_tokens'] = total_tokens
        metadata['total_time_seconds'] = elapsed
        metadata['msg_sequence'] = msg_sequence
        metadata['ai_tool_calls'] = ai_tool_calls
        metadata['tool_call_responses'] = tool_call_responses

    except Exception as e:
        metadata['final_response'] = final_response
        metadata['total_tokens'] = 0
        metadata['total_time_seconds'] = elapsed
        metadata['msg_sequence'] = ['Exception in process_request: '+str(e)]
        metadata['ai_tool_calls'] = {"name": "AI_exception"}
        metadata['tool_call_responses'] = {}

    return metadata

In [None]:
iteration_summary = {}

In [None]:
for iteration in tqdm(test_runs):
    test_run_summary = {}
    for key in tqdm(questions):
        result = process_request(questions[key])

        fname = f'Test-{model}-run-{iteration}-{key}.json'

        with open(fname, 'w') as fp:
            json.dump(result, fp)

        metadata = {}
        metadata['total_tokens'] = result['total_tokens']
        metadata['total_time_seconds'] = result['total_time_seconds']
        metadata['final_response'] = result['final_response']
        test_run_summary[key] = metadata

    dframe = pd.DataFrame.from_dict(test_run_summary, orient="index")
    dframe_fname = f"Test-summary-{model}-iteration-{iteration}.csv"
    dframe.to_csv(dframe_fname, header=True)

    iteration_summary[iteration] = test_run_summary

In [None]:
dframe_summary = pd.DataFrame.from_dict(iteration_summary, orient="index")

In [None]:
dframe_summary

In [None]:
dframe_summary.to_csv(f"test-all-runs-{model}.csv")

In [None]:
iteration_summary

In [None]:
dframe_summary.reset_index(inplace=True)

In [None]:
dframe_summary.drop(['index'], axis=1, inplace=True)

In [None]:
for column in dframe_summary.columns:
    df = pd.json_normalize(dframe_summary[column])
    df.columns = [f'{column}-' + str(col) for col in df.columns]
    dframe_summary = pd.concat([dframe_summary.drop([column], axis = 1), df], axis = 1)

In [None]:
dframe_summary.columns

In [None]:
dframe_summary.to_csv(f"test-all-runs-expanded-{model}.csv")

In [None]:
aggregation = {}

In [None]:
for column in dframe_summary.columns:
    if 'total_tokens' in column:
        aggregation[f'average-{column}'] = dframe_summary[column].mean()

    if 'total_time_seconds' in column:
        aggregation[f'p50-{column}'] = dframe_summary[column].quantile(0.5)
        aggregation[f'p90-{column}'] = dframe_summary[column].quantile(0.9)
        aggregation[f'max-{column}'] = dframe_summary[column].max()

In [None]:
element = {}
element[0] = aggregation

In [None]:
dframe_aggregation = pd.DataFrame.from_dict(element, orient="index")

In [None]:
dframe_aggregation

In [None]:
dframe_aggregation.to_csv(f"test-all-runs-aggregated-{model}.csv")

In [None]:
pd.set_option('display.max_rows', 500)

In [None]:
dframe_aggregation = pd.read_csv("test-all-runs-aggregated-gpt-4-turbo.csv")

In [None]:
dframe_aggregation.T