In [8]:
import os
from PIL import Image
import requests 
requests.adapters.DEFAULT_TIMEOUT = 60
from huggingface_hub import InferenceClient
import gradio as gr
import random

from dotenv import load_dotenv, find_dotenv

_ = load_dotenv(find_dotenv()) # read local .env file
hf_api_key = os.environ['HF_API_KEY']
hf_api_key_pro = os.environ['HF_API_KEY_PRO']

>mistralai/Mistral-Nemo-Instruct-2407

In [2]:
client = InferenceClient(
    "mistralai/Mistral-Nemo-Instruct-2407",
    token=hf_api_key,
)

for message in client.chat_completion(
	messages=[{"role": "user", "content": "What you know about Paris Olympics in 2024?"}],
	max_tokens=500,
	stream=True,
):
    print(message.choices[0].delta.content, end="")


The Paris Olympics in 2024, officially known as the Games of the XXXIII Olympiad, will be held in Paris, France from July 26 to August 11, 2024. Here are some key points about these upcoming Olympics:

1. **Venues**: Paris will use a combination of existing facilities, new venues specifically built for the Games, and temporary structures. Some iconic locations include the Eiffel Tower (for archery), the Stade de France (for athletics), and the Stade de Seine (for surfing, which makes its Olympic debut in 2024).

2. **Sports**: The Paris Olympics will feature 41 sports, with 32 disciplines among them. In addition to the usual sports like swimming, gymnastics, and track and field, the 2024 Games will include breaking (also known as breakdancing), sport climbing, surfing, and skateboarding, which were introduced at the Tokyo Olympics in 2020.

3. **Paralympics**: The Paris 2024 Paralympic Games will follow the Olympics, taking place from August 28 to September 8, 2024. They will also feat

>meta-llama/Meta-Llama-3.1-8B-Instruct

llama3 requires a pro subscription

In [3]:
prompt = "What is the capital of Türkiye?"
for message in client.chat_completion(
	messages=[{"role": "user", "content": prompt}],
	max_tokens=256,
	stream=True,
):
    print(message.choices[0].delta.content, end="")

The capital of Türkiye is Ankara.

In [4]:
def generate(input, slider):
    messages = [{"role": "user", "content": input}]
    output = ""
    for message in client.chat_completion(messages=messages, max_tokens=slider, stream=True):
        output += message.choices[0].delta.content
    return output

demo = gr.Interface(
    fn=generate,
    inputs=[gr.Textbox(label="Prompt"), 
            gr.Slider(label="Max new tokens", value=20, maximum=1024, minimum=1)], 
    outputs=[gr.Textbox(label="Completion")]
)

gr.close_all()
demo.launch(share=True, server_port=int(os.environ['PORT1']))

Running on local URL:  http://127.0.0.1:7860
Running on public URL: https://13c61141caee85984b.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




In [5]:
gr.close_all()

Closing server running on port: 7860


mock chatbot example with predefined messages.

In [6]:
def respond(message, chat_history):
        #No LLM here, just respond with a random pre-made message
        bot_message = random.choice(["Tell me more about it", 
                                     "Cool, but I'm not interested", 
                                     "Hmmmm, ok then"]) 
        chat_history.append((message, bot_message))
        return "", chat_history

with gr.Blocks() as demo:
    chatbot = gr.Chatbot(height=240) #just to fit the notebook
    msg = gr.Textbox(label="Prompt")
    btn = gr.Button("Submit")
    clear = gr.ClearButton(components=[msg, chatbot], value="Clear console")

    btn.click(respond, inputs=[msg, chatbot], outputs=[msg, chatbot])
    msg.submit(respond, inputs=[msg, chatbot], outputs=[msg, chatbot]) #Press enter to submit

gr.close_all()
demo.launch(share=True, server_port=int(os.environ['PORT2']))

Closing server running on port: 7860
Running on local URL:  http://127.0.0.1:7861
Running on public URL: https://5fbdc1322af0ecb336.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




In [7]:
gr.close_all()

Closing server running on port: 7860


In [24]:
def format_chat_prompt(message, chat_history):
    prompt = ""
    for turn in chat_history:
        user_message, bot_message = turn
        prompt = f"{prompt}\nUser: {user_message}\nAssistant: {bot_message}"
    prompt = f"{prompt}\nUser: {message}\nAssistant:"
    return prompt

def generate(formatted_prompt, slider):
    messages = [{"role": "user", "content": formatted_prompt}]
    output = ""
    for message in client.chat_completion(messages=messages, max_tokens=slider, stream=True):
        output += message.choices[0].delta.content
        yield output  # Stream the output to update the chatbot immediately

def respond(message, chat_history):
    formatted_prompt = format_chat_prompt(message, chat_history)
    bot_response_stream = generate(formatted_prompt, 1024)

    # Append the user's message to chat history before generating the bot's response
    chat_history.append((message, ""))

    for bot_message in bot_response_stream:
        chat_history[-1] = (message, bot_message)
        yield "", chat_history

with gr.Blocks() as demo:
    chatbot = gr.Chatbot(height=240)  # Just to fit the notebook
    msg = gr.Textbox(label="Prompt")
    btn = gr.Button("Submit")
    clear = gr.ClearButton(components=[msg, chatbot], value="Clear console")

    btn.click(respond, inputs=[msg, chatbot], outputs=[msg, chatbot])
    msg.submit(respond, inputs=[msg, chatbot], outputs=[msg, chatbot])  # Press enter to submit

gr.close_all()
demo.launch(share=True, server_port=int(os.environ['PORT3']))

Closing server running on port: 7860
Running on local URL:  http://127.0.0.1:7866
Running on public URL: https://edbe6fcbe67306d456.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




In [26]:
gr.close_all()

Closing server running on port: 7860


In [28]:
def format_chat_prompt(message, chat_history, system_message):
    prompt = system_message
    for turn in chat_history:
        user_message, bot_message = turn
        prompt = f"{prompt}\nUser: {user_message}\nAssistant: {bot_message}"
    prompt = f"{prompt}\nUser: {message}\nAssistant:"
    return prompt

def generate(formatted_prompt, slider, temperature):
    messages = [{"role": "user", "content": formatted_prompt}]
    output = ""
    for message in client.chat_completion(messages=messages, max_tokens=slider, temperature=temperature, stream=True):
        output += message.choices[0].delta.content
        yield output  # Stream the output to update the chatbot immediately

def respond(message, chat_history, system_message, temperature):
    formatted_prompt = format_chat_prompt(message, chat_history, system_message)
    bot_response_stream = generate(formatted_prompt, 1024, temperature)

    # Append the user's message to chat history before generating the bot's response
    chat_history.append((message, ""))

    for bot_message in bot_response_stream:
        chat_history[-1] = (message, bot_message)
        yield "", chat_history

with gr.Blocks() as demo:
    chatbot = gr.Chatbot(height=240)  # Just to fit the notebook
    msg = gr.Textbox(label="Prompt")
    with gr.Accordion("Advanced Options", open=False):
        system_msg = gr.Textbox(label="System Message", value="You are a helpful assistant.", placeholder="Set a custom system message.")
        temperature = gr.Slider(label="Temperature", minimum=0.0, maximum=1.0, value=0.7, step=0.1)
    btn = gr.Button("Submit")
    clear = gr.ClearButton(components=[msg, chatbot], value="Clear console")

    
    btn.click(respond, inputs=[msg, chatbot, system_msg, temperature], outputs=[msg, chatbot])
    msg.submit(respond, inputs=[msg, chatbot, system_msg, temperature], outputs=[msg, chatbot])  # Press enter to submit

gr.close_all()
demo.launch(share=True, server_port=int(os.environ['PORT4']))

Closing server running on port: 7860
Running on local URL:  http://127.0.0.1:7868
Running on public URL: https://32310d8c56a53e1dac.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




In [31]:
gr.close_all()

Closing server running on port: 7860
