### Install libraries

In [3]:
from huggingface_hub import InferenceClient
import gradio as gr
from dotenv import load_dotenv
import os
import time

### Load API key

In [4]:
# Load environment variables from the .env file
dotenv_path = 'huggingface/huggingface-playground/chat-app/.env'
load_dotenv(dotenv_path=dotenv_path)

True

In [5]:
# Set the API key
api_key = os.getenv("HUGGINGFACE_API_KEY")
print(f"API Key loaded successfully")

if api_key is None:
    raise ValueError("HUGGINGFACE_API_KEY environment variable not set.")

API Key loaded successfully


### Set inference client

In [6]:
client = InferenceClient("microsoft/phi-1_5", token=api_key)

### Create interface for model

In [7]:
def respond(message, history, system_message, max_tokens, temperature, top_p):
    try:
        messages = [{"role": "system", "content": system_message}]
        for val in history:
            if val[0]:
                messages.append({"role": "user", "content": val[0]})
            if val[1]:
                messages.append({"role": "assistant", "content": val[1]})
        
        messages.append({"role": "user", "content": message})

        response = ""
        while True:
            try:
                for message in client.chat_completion(
                    messages=messages,
                    model="microsoft/phi-1_5",
                    max_tokens=max_tokens,
                    temperature=temperature,
                    top_p=top_p,
                    stream=True
                ):
                    token = message["choices"][0]["delta"]["content"]
                    response += token
                    yield response
                break
            except Exception as e:
                if '429' in str(e):
                    print(f"Rate limit reached. Retrying in 60 seconds...")
                    time.sleep(60)  # Wait for 60 seconds before retrying
                else:
                    raise e

    except Exception as e:
        print(f"Error occurred: {e}")
        yield f"An error occurred: {e}"

demo = gr.Interface(
    fn=respond,
    inputs=[
        gr.Textbox(lines=2, placeholder="Enter your message here..."),
        gr.State(),  # This must match with one State output
        gr.Textbox(lines=1, placeholder="System message"),
        gr.Slider(minimum=10, maximum=500, step=10, value=200, label="Max tokens"),
        gr.Slider(minimum=0.1, maximum=1.0, step=0.1, value=0.7, label="Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, step=0.1, value=0.9, label="Top P")
    ],
    outputs=[
        "text",     # Response text output
        gr.State()  # This must match with one State input
    ]
)

if __name__ == "__main__":
    demo.launch()


Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.
