In [None]:
!python3 -m pip install openai
!python3 -m pip install gradio
!python3 -m pip install --upgrade jupyter ipywidgets

In [None]:
import gradio as gr
from openai import OpenAI
import sys

# Update the BaseURL of the model (we are using the service name)
# $ kubectl get service
agent_base_url = "http://llama3/v1"
code_base_url = "http://codellama/v1"

# Define parameters for different models
agent_model_config = {
    "base_url": agent_base_url,
    "api_key": "dummy",
    "temperature": 0,
    "top_p": 1,
    "max_tokens": 256,
    "stream": True
}

code_model_config = {
    "base_url": code_base_url,
    "api_key": "dummy",
    "temperature": 0,
    "top_p": 1,
    "max_tokens": 256,
    "stream": True
}

# Cache for discovered models
cached_models = {}

def discover_model(client):
    if client in cached_models:
        return cached_models[client]
    
    available_models = client.models.list()
    if len(available_models.data):
        model = available_models.data[0].id
        print(f"Discovered model is: {model}")
        cached_models[client] = model
    else:
        print("No model discovered")
        sys.exit(1)
    return model

def create_completion(client, messages, config):
    model = discover_model(client)
    if model is None:
        return []
    try:
        return client.chat.completions.create(
            model=model,
            messages=messages,
            temperature=config["temperature"],
            top_p=config["top_p"],
            max_tokens=config["max_tokens"],
            stream=config["stream"]
        )
    except Exception as e:
        print("Error: ", e)
        return []

def get_decision_from_agent(query):
    messages = [{
        "role": "user",
        "content": f"Please classify the following query into one of the three categories: 'code', 'optimization', or 'general'. 'code' for queries asking for code examples, programming syntax, or specific implementation details. 'optimization' for queries related to improving performance, enhancing efficiency, or refining algorithms. 'general' for queries that do not specifically pertain to coding or optimization but instead involve general concepts, explanations, or broad questions. Answer only with 'code', 'optimization', or 'general': {query}"
    }]
    response = create_completion(client_agent, messages, {**agent_model_config, "max_tokens": 10})
    decision = ""
    for chunk in response:
        if chunk.choices[0].delta.content is not None:
            decision += chunk.choices[0].delta.content
    return decision.strip().lower()

def get_general_response_from_agent(query):
    messages = [{"role": "user", "content": query}]
    response = create_completion(client_agent, messages, agent_model_config)
    agent_response = ""
    for chunk in response:
        if chunk.choices[0].delta.content is not None:
            agent_response += chunk.choices[0].delta.content
    print(agent_response)
    return agent_response

def get_response_from_code_model(query):
    messages = [{"role": "user", "content": query}]
    response = create_completion(client_code, messages, code_model_config)
    codemodel_response = ""
    for chunk in response:
        if chunk.choices[0].delta.content is not None:
            codemodel_response += chunk.choices[0].delta.content
    print(codemodel_response)
    return codemodel_response.strip().lower()

def get_preliminary_response(query):
    decision = get_decision_from_agent(query)
    log = f"Decision by Agent Workflow: '{decision}'\n"
    
    if "code" in decision:
        log += "Response generated by: Code Agent\n"
        response = get_response_from_code_model(query)
    elif "optimization" in decision:
        log += "Response generated by: Optimization Agent\n"
        response = get_response_from_optimization(query)
    else:
        log += "Response generated by: General Agent\n"
        response = get_general_response_from_agent(query)
    
    log += f"Preliminary response:\n{response}\n"
    return response, log

def get_final_response_from_agent_workflow(query, preliminary_response):
    messages = [{"role": "user", "content": f"Use this information {preliminary_response} and generate the response for the question: {query}"}]
    final_response = create_completion(client_agent, messages, agent_model_config)
    final_response_text = ""
    for chunk in final_response:
        if chunk.choices[0].delta.content is not None:
            final_response_text += chunk.choices[0].delta.content
    return final_response_text

def gradio_interface(query):
    preliminary_response, log = get_preliminary_response(query)
    final_response = get_final_response_from_agent_workflow(query, preliminary_response)
    log += f"Final response:\n{final_response}"
    return log

def set_api_keys(agent_key, code_key):
    global agent_model_config, code_model_config
    agent_model_config["api_key"] = agent_key
    code_model_config["api_key"] = code_key
    return agent_model_config, code_model_config

def client_init(agent_config, code_config):
    client_agent = OpenAI(
        base_url=agent_config["base_url"],
        api_key=agent_config["api_key"]
    )
    client_code = OpenAI(
        base_url=code_config["base_url"],
        api_key=code_config["api_key"]
    )
    return client_agent, client_code

def set_keys_and_init_clients(agent_key, code_key):
    agent_config, code_config = set_api_keys(agent_key, code_key)
    global client_agent, client_code
    client_agent, client_code = client_init(agent_config, code_config)
    return "API keys set and clients initialized."

api_key_interface = gr.Interface(
    fn=set_keys_and_init_clients,
    # inputs=["text", "text"],
    inputs=[gr.Textbox(type="password", placeholder="Enter agent API key"), gr.Textbox(type="password", placeholder="Enter code API key")],
    outputs="text",
    title="Set API Keys",
    description="Input the API keys for the agent and code models."
)
workflow_interface = gr.Interface(
    fn=gradio_interface,
    inputs="text",
    outputs="text",
    title="Agent Workflow",
    description="Enter your query below:"
)

gr.TabbedInterface([api_key_interface, workflow_interface], ["Set API Keys", "Agent Workflow"]).launch(share=True)
