In [None]:
!python3 -m pip install openai
!python3 -m pip install gradio
!python3 -m pip install --upgrade jupyter ipywidgets
!python3 -m pip install requests

In [None]:
import gradio as gr
from openai import OpenAI
import sys
import requests
import json
from config import json_format

# Update the BaseURL of the model (we are using the service name)
# $ kubectl get service
agent_base_url = "http://llama3/v1"
code_base_url = "http://codellama/v1"
optimization_base_url = "http://cuopt-cuopt-deployment-cuopt-service:5000/cuopt/routes"

# Define parameters for different models
agent_model_config = {
    "base_url": agent_base_url,
    "api_key": "dummy",
    "temperature": 0,
    "top_p": 1,
    "max_tokens": 1024,
    "stream": True
}

code_model_config = {
    "base_url": code_base_url,
    "api_key": "dummy",
    "temperature": 0,
    "top_p": 1,
    "max_tokens": 256,
    "stream": True
}


# Cache for discovered models
cached_models = {}

def discover_model(client):
    if client in cached_models:
        return cached_models[client]
    
    available_models = client.models.list()
    if len(available_models.data):
        model = available_models.data[0].id
        print(f"Discovered model is: {model}")
        cached_models[client] = model
    else:
        print("No model discovered")
        sys.exit(1)
    return model

def create_completion(client, messages, config):
    model = discover_model(client)
    if model is None:
        return []
    try:
        return client.chat.completions.create(
            model=model,
            messages=messages,
            temperature=config["temperature"],
            top_p=config["top_p"],
            max_tokens=config["max_tokens"],
            stream=config["stream"]
        )
    except Exception as e:
        print("Error: ", e)
        return []

def get_decision_from_agent(query):
    system_messages = {
        "role": "system",
        "content": f"Please classify the following query into one of the three categories: 'code', 'optimization', or 'general'. 'code' for queries asking for code examples, programming syntax, or specific implementation details. 'optimization' for queries related to improving performance, enhancing efficiency, or refining algorithms. 'general' for queries that do not specifically pertain to coding or optimization but instead involve general concepts, explanations, or broad questions. Answer only with 'code', 'optimization', or 'general':"
    }
    user_messages = {"role": "user", "content": query}
    messages = [system_messages, user_messages]
    response = create_completion(client_agent, messages, {**agent_model_config, "max_tokens": 10})
    decision = ""
    for chunk in response:
        if chunk.choices[0].delta.content is not None:
            decision += chunk.choices[0].delta.content
    return decision.strip().lower()

def get_general_response_from_agent(query):
    messages = [{"role": "user", "content": query}]
    response = create_completion(client_agent, messages, agent_model_config)
    agent_response = ""
    for chunk in response:
        if chunk.choices[0].delta.content is not None:
            agent_response += chunk.choices[0].delta.content
    return agent_response

def get_response_from_code_model(query):
    messages = [{"role": "user", "content": query}]
    response = create_completion(client_code, messages, code_model_config)
    codemodel_response = ""
    for chunk in response:
        if chunk.choices[0].delta.content is not None:
            codemodel_response += chunk.choices[0].delta.content
    return codemodel_response.strip().lower()

def prepare_optimization_input(query):
    messages = [{
        "role": "user",
        "content": f'''You need to generate a payload for cuOpt to be able to answer the Question. Bellow is the structure of the json {json_format}, and you need to capture the values from the Question and populate the JSON.
        The question will be like this: Optimize the routes for three delivery trucks. Truck 1 has a capacity of 4 units and starts at location [0, 0]. Truck 2 has a capacity of 6 units and starts at location [0, 0]. Truck 3 has a capacity of 2 units and starts at location [0, 0]. They need to deliver packages to locations [2, 2] with a demand of 1 unit, [4, 4] with a demand of 3 units, and [6, 6] with a demand of 2 units. All the locations have a time window from 0 to 1080, and the service time at each location is 1 units. The cost to travel between each location is provided in the following cost matrix: from [0, 0] to [2, 2] costs 10, from [0, 0] to [4, 4] costs 20, from [0, 0] to [6, 6] costs 30, from [2, 2] to [4, 4] costs 10, from [2, 2] to [6, 6] costs 20, from [4, 4] to [6, 6] costs 10. This is the expected JSON output for the example question: {{"cost_matrix_data":{{"data":{{"0":[[0,10,20,30],[10,0,10,20],[20,10,0,10],[30,20,10,0]]}}}},"task_data":{{"task_locations":[1,2,3],"demand":[[1,3,2]],"task_time_windows":[[0,1080],[0,1080],[0,1080]],"service_times":[1,1,1]}},"fleet_data":{{"vehicle_locations":[[0,0],[0,0],[0,0]],"capacities":[[4,6,2]],"vehicle_time_windows":[[0,1080],[0,1080],[0,1080]]}},"solver_config":{{"time_limit":2}}}}.
        Another example of question is:Optimize the delivery routes for two delivery trucks. Truck 1 has a capacity of 15 units and starts at location [0, 0]. Truck 2 has a capacity of 3 units and starts at location [0, 0]. They need to deliver packages to locations [2, 2] with a demand of 2 units and [3, 3] with a demand of 15 unit. Both locations have a time window from 0 to 1080, and the service time at each location is 1 unit. The cost to travel between each location is provided in the following cost matrix: from [0, 0] to [2, 2] costs 10, from [0, 0] to [3, 3] costs 15, from [2, 2] to [3, 3] costs 35. This is the expected JSON for the second example question: {{"cost_matrix_data":{{"data":{{"0":[[0,10,15],[10,0,35],[15,35,0]]}}}},"task_data":{{"task_locations":[1,2],"demand":[[2,15]],"task_time_windows":[[0,1080],[0,1080]],"service_times":[1,1]}},"fleet_data":{{"vehicle_locations":[[0,0],[0,0]],"capacities":[[15,3]],"vehicle_time_windows":[[0,1080],[0,1080]]}},"solver_config":{{"time_limit":2}}}}.
        Respond only with the JSON. Do not include any additional text.
        When generating the JSON don`t change the provided JSON structure. The only exception is the cost_matrix_data where the number of locations define the dimension of the square matrix. It needs to contains one list for each locatation, and the lenght of each list is the total number of locations.
        Learn to identify the correct intergers in the question provided as example and generate the JSON for the next question. 
        Question: {query}'''
    }]
    response = create_completion(client_agent, messages, agent_model_config)
    payload_response = ""
    for chunk in response:
        if chunk.choices[0].delta.content is not None:
            payload_response += chunk.choices[0].delta.content
    return payload_response.strip().lower()


def get_response_from_optimization(query):
    generated_text = prepare_optimization_input(query)
    log = "----------------------\n"
    log += f"JSON generated by Llama3 that will be passed to Optimization Agent(CuOpt):\n{generated_text}\n"
    print(generated_text)

    try:
        cuopt_payload = json.loads(generated_text)
    except json.JSONDecodeError as e:
        print("Failed to parse generated text:", e)
        cuopt_payload = None
    
    optimized_routes = "Failed to generate optimized routes."
    if cuopt_payload:
    # Send the payload to cuOpt
        response_cuopt = requests.post(optimization_base_url, json=cuopt_payload)
        if response_cuopt.status_code == 200:
            optimized_routes = response_cuopt.json()
        else:
            print("Failed:", response_cuopt.status_code, response_cuopt.text)
    else:
        print("Invalid payload generated by Agent Workflow.")
    return optimized_routes, log

def get_preliminary_response(query):
    decision = get_decision_from_agent(query)
    log = f"Decision by Agent Workflow: '{decision}'\n"
    log += "----------------------\n"
    
    if "code" in decision:
        log += "Response generated by: Code Agent(CodeLlama)\n"
        log += "----------------------\n"
        response = get_response_from_code_model(query)
    elif "optimization" in decision:
        response, optimization_log = get_response_from_optimization(query)
        log += optimization_log
        log += "Response generated by: Optimization Agent(CuOpt)\n"
        log += "----------------------\n"
    else:
        log += "Response generated by: General Agent(Llama3)\n"
        log += "----------------------\n"
        response = get_general_response_from_agent(query)
    
    log += f"Preliminary response:\n{response}\n"
    log += "----------------------\n"
    return response, log

def get_final_response_from_agent_workflow(query, preliminary_response):
    system_messages = {"role": "system", "content": f"Add the {preliminary_response} at the begining of the answer. Use this information {preliminary_response} and generate the response for the question:"}
    user_messages = {"role": "user", "content": query}
    messages = [system_messages, user_messages]
    final_response = create_completion(client_agent, messages, agent_model_config)
    final_response_text = ""
    for chunk in final_response:
        if chunk.choices[0].delta.content is not None:
            final_response_text += chunk.choices[0].delta.content
    return final_response_text

def gradio_interface(query):
    preliminary_response, log = get_preliminary_response(query)
    final_response = get_final_response_from_agent_workflow(query, preliminary_response)
    log += f"Final response:\n{final_response}"
    log += "----------------------\n"
    return log

def set_api_keys(agent_key, code_key):
    global agent_model_config, code_model_config
    agent_model_config["api_key"] = agent_key
    code_model_config["api_key"] = code_key
    return agent_model_config, code_model_config

def client_init(agent_config, code_config):
    client_agent = OpenAI(
        base_url=agent_config["base_url"],
        api_key=agent_config["api_key"]
    )
    client_code = OpenAI(
        base_url=code_config["base_url"],
        api_key=code_config["api_key"]
    )
    return client_agent, client_code

def set_keys_and_init_clients(agent_key, code_key):
    agent_config, code_config = set_api_keys(agent_key, code_key)
    global client_agent, client_code
    client_agent, client_code = client_init(agent_config, code_config)
    return "API keys set and clients initialized."

api_key_interface = gr.Interface(
    fn=set_keys_and_init_clients,
    inputs=[gr.Textbox(type="password", placeholder="Enter agent API key"), gr.Textbox(type="password", placeholder="Enter code API key")],
    outputs="text",
    title="Set API Keys",
    description="Input the API keys for the agent and code models."
)
workflow_interface = gr.Interface(
    fn=gradio_interface,
    inputs="text",
    outputs="text",
    title="Agent Workflow",
    description="Enter your query below:"
)

gr.TabbedInterface([api_key_interface, workflow_interface], ["Set API Keys", "Agent Workflow"]).launch(share=True)


In [None]:
# Example of questions for cuopt agent:
Optimize the delivery routes for two delivery trucks. Truck 1 has a capacity of 15 units and starts at location [0, 0]. Truck 2 has a capacity of 3 units and starts at location [0, 0]. They need to deliver packages to locations [2, 2] with a demand of 2 units and [3, 3] with a demand of 15 unit. Both locations have a time window from 0 to 1080, and the service time at each location is 1 unit. The cost to travel between each location is provided in the following cost matrix: from [0, 0] to [2, 2] costs 10, from [0, 0] to [3, 3] costs 15, from [2, 2] to [3, 3] costs 35.
Optimize the routes for three delivery trucks. Truck 1 has a capacity of 4 units and starts at location [0, 0]. Truck 2 has a capacity of 6 units and starts at location [0, 0]. Truck 3 has a capacity of 2 units and starts at location [0, 0]. They need to deliver packages to locations [2, 2] with a demand of 1 unit, [4, 4] with a demand of 3 units, and [6, 6] with a demand of 2 units. All the locations have a time window from 0 to 1080, and the service time at each location is 1 units. The cost to travel between each location is provided in the following cost matrix: from [0, 0] to [2, 2] costs 10, from [0, 0] to [4, 4] costs 20, from [0, 0] to [6, 6] costs 30, from [2, 2] to [4, 4] costs 10, from [2, 2] to [6, 6] costs 20, from [4, 4] to [6, 6] costs 10.
Optimize the delivery route for one delivery truck. The truck 1 has a capacity of 2 units and starts at location [0,0]. It need to deliver packages to locations [2, 2] with a demand of 1 unit. The location have a time window from 0 to 1080, and the service time at each location is 1 unit. The cost to travel from [0, 0] to [2, 2] costs 10.
Optimize the routes for four delivery trucks. Truck 1 has a capacity of 10 units and starts at location [0, 0]. Truck 2 has a capacity of 8 units and starts at location [0, 0]. Truck 3 has a capacity of 5 units and starts at location [0, 0]. Truck 4 has a capacity of 7 units and starts at location [0, 0]. They need to deliver packages to locations [2, 2] with a demand of 3 units, [4, 4] with a demand of 4 units, [6, 6] with a demand of 2 units, and [8, 8] with a demand of 5 units. All the locations have a time window from 0 to 1080, and the service time at each location is 1 unit. The cost to travel between each location is provided in the following cost matrix: from [0, 0] to [2, 2] costs 10, from [0, 0] to [4, 4] costs 20, from [0, 0] to [6, 6] costs 30, from [0, 0] to [8, 8] costs 40, from [2, 2] to [4, 4] costs 10, from [2, 2] to [6, 6] costs 20, from [2, 2] to [8, 8] costs 30, from [4, 4] to [6, 6] costs 10, from [4, 4] to [8, 8] costs 20, from [6, 6] to [8, 8] costs 10.

#Example of questions for code generation:


#Example of questions for general answer:

                                                                                                                                                                                                                                                                                                                                        