In [37]:
import requests
import json
import sys
import os
import time
import gradio as gr
from dotenv import load_dotenv


load_dotenv()
OLLAMA_HOST = os.getenv("OLLAMA_HOST")

CHAT_ENDPOINT = f"{OLLAMA_HOST}/api/chat"
TAGS_ENDPOINT = f"{OLLAMA_HOST}/api/tags"

In [38]:
def find_llms():
    response = requests.get(TAGS_ENDPOINT)
    response_json = response.json()
    llm_list = {}
    count = 1
    try:
        for model in response_json["models"]:
            llm_list[count] = model["name"]
            count += 1
    except:
        sys.exit("No models found")
    return llm_list
    

In [39]:
def query_llm(prompt, model_name):
  payload =   {
      "model": model_name,
      "messages": [
          {"role": "user",
          "content": prompt
          }
      ],
      "stream": False
  }

  response = requests.post(CHAT_ENDPOINT, json=payload)

  response_json = json.loads(response.text)

  tokens_per_second = (response_json["eval_count"] / response_json["eval_duration"]) * 10**9

  llm_reply = response_json["message"]["content"]

  return llm_reply, tokens_per_second

In [40]:
# Text-Based Chatbot

def chatbot():
    prompt_list = []
    prompt = ""
    while(True):
        print("\nHello! I am your personal assistant! I am here to answer your prompt.")
        user_input = input("\nEnter your prompt or 'exit' to quit: ")
        if user_input == "exit":
            break
        else:
            user_prompt = user_input
            prompt_list.append("User: ")
            prompt_list.append(user_prompt)
            
            prompt = " ".join(prompt_list)
            
            llm_list = find_llms()
    
            print("\nAvailable Models:\n")
            for num, model in llm_list.items():
                print(f" {num}: {model}")
    
            print("\nChoose which model you would like to answer your prompt:\n")   
    
            model_choice = int(input("Enter a model number: "))
            if model_choice not in (list(llm_list.keys())):
                print("Invalid model choice. Restart the chatbot to continue.")
                continue
    
            model_name = llm_list[model_choice]
            response_llm, tokens_per_second = query_llm(prompt, model_name)
            prompt_list.append("Response: ")
            prompt_list.append(response_llm)
            prompt = " ".join(prompt_list)
            print("\n")
            print(model_name, ": ", response_llm)
            print("\n")
            print(f"Tokens Per Second: {tokens_per_second:.2f}")
            user_input = input("\nContinue Conversation (y/n)?: ")
            if user_input == 'y':
                print(prompt)
                continue
            elif user_input == 'n':
                prompt_list = []
                prompt = ""
                count = 1
                continue




In [41]:
def respond(user_input, history, model_name):
    context = "\n".join([f"User: {u}\nAssistant: {a}" for u, a in history])
    prompt = f"{context}\nUser: {user_input}\nAssistant:"
    # Testing Context
    # print(f"Context: {context} \n")
    # print(f"Prompt: {prompt} \n")
    # print("<--------------------------------------->")

    response_llm, tokens_per_second = query_llm(prompt, model_name)
    combined_response= f"{response_llm}\n\n_({tokens_per_second:.2f} tokens/sec)_"
    for i in range(len(combined_response)):
        time.sleep(0.01)
        yield combined_response[: i + 1]


In [42]:
# Chat Interface

llm_list = find_llms()
with gr.Blocks(title="AmithGPT", theme='soft') as ui:
    # gr.Markdown("## Hello! I am your personal assistant! I am here to answer your prompt.")
    # user_input = gr.Textbox(label="Prompt", placeholder="e.g., write me a 100 word fantasy story")
    # model_selection = gr.Dropdown(choices=llm_list.values(), label="Model")
    # submit_btn = gr.Button("Submit")
    # llm_output = gr.Textbox(label="Response", placeholder="llm reply here", interactive=False)
    # tkns_per_second = gr.Textbox(label="Tokens Per Second", interactive=False)

    # submit_btn.click(respond, inputs=[user_input, model_selection], outputs=[llm_output, tkns_per_second])

    model_selection = gr.Dropdown(choices=llm_list.values(), label="Model")
    chat = gr.ChatInterface(title="AmithGPT", fn=respond,autofocus=False, additional_inputs=model_selection, description="What can I help with today?")
     

# demo = gr.Interface(
#     fn = respond,
#     inputs = ["text", "number"],
#     outputs=["text", "text"],
# )

if __name__ == "__main__":
    ui.launch()



  self.chatbot = Chatbot(


* Running on local URL:  http://127.0.0.1:7874
* To create a public link, set `share=True` in `launch()`.
