In [1]:
import os
import logging
import gradio as gr
from time import sleep
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import pipeline
import numpy as np
import torch
from scipy.io import wavfile

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
logging.basicConfig(
    level=logging.DEBUG,  # Change to DEBUG for more details
    format="%(asctime)s - %(levelname)s - %(message)s",
    handlers=[
        logging.FileHandler(r"Q:\Projects\Multimodal-Jarvis\data\logs\app.log"),  # Save logs to a file
        logging.StreamHandler()  # Show logs in the console
    ]
)

logging.info("Logging is set up!")

2025-02-06 17:09:20,995 - INFO - Logging is set up!


### Theming

In [2]:
# https://www.gradio.app/guides/theming-guide
# https://huggingface.co/spaces/gstaff/xkcd/blob/main/app.py
# test_theme = gr.Theme.from_hub("gstaff/xkcd")
# gr.themes.builder()

: 

In [3]:
gr.TabItem?

[1;31mInit signature:[0m
[0mgr[0m[1;33m.[0m[0mTabItem[0m[1;33m([0m[1;33m
[0m    [0mlabel[0m[1;33m:[0m [1;34m'str | None'[0m [1;33m=[0m [1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mvisible[0m[1;33m:[0m [1;34m'bool'[0m [1;33m=[0m [1;32mTrue[0m[1;33m,[0m[1;33m
[0m    [0minteractive[0m[1;33m:[0m [1;34m'bool'[0m [1;33m=[0m [1;32mTrue[0m[1;33m,[0m[1;33m
[0m    [1;33m*[0m[1;33m,[0m[1;33m
[0m    [0mid[0m[1;33m:[0m [1;34m'int | str | None'[0m [1;33m=[0m [1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0melem_id[0m[1;33m:[0m [1;34m'str | None'[0m [1;33m=[0m [1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0melem_classes[0m[1;33m:[0m [1;34m'list[str] | str | None'[0m [1;33m=[0m [1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mscale[0m[1;33m:[0m [1;34m'int'[0m [1;33m=[0m [1;36m0[0m[1;33m,[0m[1;33m
[0m    [0mrender[0m[1;33m:[0m [1;34m'bool'[0m [1;33m=[0m [1;32mTrue[0m[1;33m,[0m[1;33m
[0m[1;33m)[0m[1;33m

In [3]:
theme = gr.themes.Default(
    font=['Noto Sans', 'Helvetica', 'ui-sans-serif', 'system-ui', 'sans-serif'],
    font_mono=['IBM Plex Mono', 'ui-monospace', 'Consolas', 'monospace'],
).set(
    border_color_primary='#c5c5d2',
    button_large_padding='6px 12px',
    body_text_color_subdued='#484848',
    background_fill_secondary='#eaeaea',
    background_fill_primary='var(--neutral-50)',
    body_background_fill="white",
    block_background_fill="#f4f4f4",
    body_text_color="#333",
    button_secondary_background_fill="#f4f4f4",
    button_secondary_border_color="var(--border-color-primary)"
)

### Prototyping

In [None]:
def enable_input():
    return gr.update(interactive=True)

def print_like_dislike(x: gr.LikeData):
    print(x.index, x.value, x.liked)

def handle_undo(history: list[gr.ChatMessage], undo_data: gr.UndoData) -> tuple[list[gr.ChatMessage], str]:
    return history[:undo_data.index], history[undo_data.index].content 


def handle_edit(history: list[gr.ChatMessage], edit_data: gr.EditData) -> list[gr.ChatMessage]:
    new_history = history[:edit_data.index]
    new_history[-1].content = edit_data.value  
    return new_history


def add_message(history, message):
    if message["text"] is not None:
      history.append(gr.ChatMessage(role = "user", content = message["text"])) 

    for file_path in message["files"]:
        if file_path.endswith(".wav"):
            transcribed_text = audiofile_to_text(file_path)
            history.append(gr.ChatMessage(role = "user", content = transcribed_text))
    return history, gr.MultimodalTextbox(value=None, interactive=False)

def bot_output(history: list):
    try:
        history.append(gr.ChatMessage(role="assistant", content=""))
        
        text = tokenizer.apply_chat_template(
            history,    # [msg.dict() for msg in history]
            tokenize=False,
            add_generation_prompt=True,
        )

        model_inputs  = tokenizer([text], return_tensors="pt").to(model.device)
        with torch.no_grad():
            generated_ids = model.generate(**model_inputs, max_new_tokens=512)
        generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]
        generated_text  = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
        
        if not generated_text or not isinstance(generated_text, str):  
            generated_text = "I'm sorry, but I couldn't generate a response."

        for char in generated_text :
            history[-1].content += char
            sleep(0.05) 
            yield history

        history = text_to_audiofile(generated_text, history)
        yield history
    except Exception as e:
        history.append(gr.ChatMessage(role="system", content=f"Failed to create text: {str(e)}"))
        yield history
        raise gr.Error(f"Failed to create text: {str(e)}")

def audiofile_to_text(wav_path):
    try:
        sample_rate, audio_data = wavfile.read(wav_path)
        audio_data = np.array(audio_data, dtype=np.float32)
        audio_data /= np.max(np.abs(audio_data))
        
        transcribed_text = transcriber_model({"raw": audio_data, "sampling_rate": sample_rate})["text"]
        return str(transcribed_text)
    
    except Exception as e:
        raise gr.Error(f"Failed to transcribe audio: {e}")

def text_to_audiofile(input_text, history):
    try:
        speech = synthesiser(input_text, forward_params = {"do_sample": True})
        rate_speech = speech["sampling_rate"]
        data_speech = speech["audio"]
        data_speech = data_speech.flatten()
        data_speech = np.int16(data_speech / np.max(np.abs(data_speech)) * 32767)

        wavfile.write(r"Q:\Projects\Multimodal-Jarvis\data\audio\bark_out.wav", rate=rate_speech, data=data_speech)
        history.append(gr.ChatMessage(role="assistant", content= 
            gr.Audio(r"Q:\Projects\Multimodal-Jarvis\data\audio\bark_out.wav"),
            metadata={"title": rf"🛠️ Used tool {model_name_tts}"}))
        return history
    except Exception as e:
        raise gr.Error(f"Failed to convert text to audio: {e}")

model_name_nlp = r"Q:\Projects\Multimodal-Jarvis\models\nlp\Qwen2.5-1.5B-Instruct"
model_name_stt = r"Q:\Projects\Multimodal-Jarvis\models\stt\whisper-large-v3-turbo"
model_name_tts = r"Q:\Projects\Multimodal-Jarvis\models\tts\Suno-Bark"

tokenizer = AutoTokenizer.from_pretrained(model_name_nlp)
model = AutoModelForCausalLM.from_pretrained(
    model_name_nlp, 
    device_map="auto", 
    torch_dtype="auto"
)
transcriber_model = pipeline("automatic-speech-recognition", model = model_name_stt)
synthesiser = pipeline("text-to-speech", model = model_name_tts)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using {device} for inference")


# callback = gr.CSVLogger()
def bot_ui():
    with gr.Blocks(theme=theme) as blocks:
        blocks.analytics_enabled = True
        gr.Markdown(
        f"""
        # {" ".join(os.path.basename(model_name_nlp).split("-"))} Test
        """)
        with gr.Row():
            with gr.Column(scale=10):
                chatbot = gr.Chatbot(elem_id="chatbot", 
                    height=500,
                    type="messages",
                    bubble_full_width=False,
                    placeholder=f"<strong><br><big>JARvis</strong>",
                    editable=True
                )

                chat_input  = gr.MultimodalTextbox(
                    interactive=True,
                    file_count="multiple",
                    placeholder="Ask me a question",
                    container=False,
                    show_label=False,
                    sources=["microphone", "upload"],
                )

                chat_msg = chat_input.submit(
                    add_message, [chatbot, chat_input], [chatbot, chat_input]
                )
                # callback.setup([chat_msg, chat_input], "chat_messages.csv")

                bot_msg = chat_msg.then(bot_output, chatbot, chatbot, api_name="bot_response")
                bot_msg.then(lambda: gr.MultimodalTextbox(interactive=True), None, [chat_input], concurrency_limit = 40)

                chatbot.like(print_like_dislike, None, None)
                chatbot.edit(handle_edit, chatbot, chatbot)
                chatbot.undo(handle_undo, chatbot, [chatbot, chat_input])
    return blocks
    

if __name__ == "__main__":
    demo = bot_ui()
    demo.queue(api_open=False)
    demo.launch(show_error=True, 
                show_api=True, 
                debug=True, 
                allowed_paths = [r"Q:\Projects\Multimodal-Jarvis\data\audio"],
                enable_monitoring=True, share=True)

Device set to use cpu
  self.register_buffer("padding_total", torch.tensor(kernel_size - stride, dtype=torch.int64), persistent=False)
Device set to use cpu
2025-02-03 16:19:13,778 - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
2025-02-03 16:19:13,802 - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None


Using cpu for inference


2025-02-03 16:19:14,122 - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
2025-02-03 16:19:14,122 - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x000002170CD5EF50>
2025-02-03 16:19:14,232 - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x000002170CC2C640> server_hostname='api.gradio.app' timeout=3
2025-02-03 16:19:14,441 - DEBUG - Using selector: SelectSelector
2025-02-03 16:19:14,465 - DEBUG - connect_tcp.started host='127.0.0.1' port=7860 local_address=None timeout=None socket_options=None
2025-02-03 16:19:14,467 - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x000002170CFB1000>
2025-02-03 16:19:14,468 - DEBUG - send_request_headers.started request=<Request [b'GET']>
2025-02-03 16:19:14,472 - DEBUG - send_request_headers.complete
2025-02-03 16:19:14,473 - DEBUG - send_request_body.started request=<Request [b'GET']>
2025-02-03 16:19:14,474 -

* Running on local URL:  http://127.0.0.1:7860
Monitoring URL: http://127.0.0.1:7860/monitoring/wQA_QSM6iPiclgqjEEHN2w

To create a public link, set `share=True` in `launch()`.


2025-02-03 16:19:14,540 - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
2025-02-03 16:19:14,673 - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x000002170CC611E0>
2025-02-03 16:19:14,675 - DEBUG - send_request_headers.started request=<Request [b'GET']>
2025-02-03 16:19:14,677 - DEBUG - send_request_headers.complete
2025-02-03 16:19:14,678 - DEBUG - send_request_body.started request=<Request [b'GET']>
2025-02-03 16:19:14,679 - DEBUG - send_request_body.complete
2025-02-03 16:19:14,682 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
2025-02-03 16:19:14,755 - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/launched HTTP/1.1" 200 0
2025-02-03 16:19:14,887 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Mon, 03 Feb 2025 14:19:15 GMT'), (b'Content-Type', b'application/json'), (b'Content-Length', b'21'), (b'Connection', b'keep-alive'), (b'Server', b'ngi

Keyboard interruption in main thread... closing server.


In [None]:
import os
import logging
import gradio as gr
from time import sleep
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import pipeline
import numpy as np
import torch
from scipy.io import wavfile

model_name_nlp = r"Q:\Projects\Multimodal-Jarvis\models\nlp\Qwen2.5-1.5B-Instruct"
model_name_stt = r"Q:\Projects\Multimodal-Jarvis\models\stt\whisper-large-v3-turbo"
model_name_tts = r"Q:\Projects\Multimodal-Jarvis\models\tts\Suno-Bark"

tokenizer = AutoTokenizer.from_pretrained(model_name_nlp)
model = AutoModelForCausalLM.from_pretrained(
    model_name_nlp,
    device_map="auto",
    torch_dtype="auto"
)
transcriber_model = pipeline("automatic-speech-recognition", model = model_name_stt)
synthesiser = pipeline("text-to-speech", model = model_name_tts)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using {device} for inference")

def audiofile_to_text(wav_path):
    try:
        sample_rate, audio_data = wavfile.read(wav_path)
        audio_data = np.array(audio_data, dtype=np.float32)
        audio_data /= np.max(np.abs(audio_data))
        
        transcribed_text = transcriber_model({"raw": audio_data, "sampling_rate": sample_rate})["text"]
        return str(transcribed_text)
    
    except Exception as e:
        raise gr.Error(f"Failed to transcribe audio: {e}")

def text_to_audiofile(input_text, history):
    try:
        speech = synthesiser(input_text, forward_params = {"do_sample": True})
        rate_speech = speech["sampling_rate"]
        data_speech = speech["audio"]
        data_speech = data_speech.flatten()
        data_speech = np.int16(data_speech / np.max(np.abs(data_speech)) * 32767)

        wavfile.write(r"Q:\Projects\Multimodal-Jarvis\data\audio\bark_out.wav", rate=rate_speech, data=data_speech)
        history.append(gr.ChatMessage(role="assistant", content= 
            gr.Audio(r"Q:\Projects\Multimodal-Jarvis\data\audio\bark_out.wav"),
            metadata={"title": rf"🛠️ Used tool {model_name_tts}"}))
        return history
    except Exception as e:
        raise gr.Error(f"Failed to convert text to audio: {e}")


def bot_output(message, history: list):
    try:
        output_history = [] # append transcribed text to output_history

        for file_path in message["files"]:
            try:
                transcribed_text = audiofile_to_text(file_path)
                history.append(gr.ChatMessage(role="user", content=transcribed_text))
                output_history.append(gr.ChatMessage(role="assistant", content=f"**{transcribed_text}**"))

            except Exception as transcription_error:
                print(f"Error transcribing {file_path}: {transcription_error}")
                history.append(gr.ChatMessage(role="assistant", 
                                                content=f"Failed to transcribe {file_path}"))
           
        output_history.append(gr.ChatMessage(role="assistant", content=""))
        text = tokenizer.apply_chat_template(
            history,
            tokenize=False,
            add_generation_prompt=True,
        )

        model_inputs  = tokenizer([text], return_tensors="pt").to(model.device)
        generated_ids = model.generate(**model_inputs, max_new_tokens=512)
        generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]
        generated_text  = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
        
        if not generated_text or not isinstance(generated_text, str):  
            generated_text = "I'm sorry, but I couldn't generate a response."

        for char in generated_text :
            output_history[-1].content += char
            sleep(0.01) 
            yield output_history

        output_history = text_to_audiofile(generated_text, output_history)
        yield output_history

    except Exception as e:
        raise gr.Error(f"Failed to create text: {str(e)}")

def chat_ui():
    with gr.Blocks(theme=theme) as blocks:
        gr.Markdown(
            f"""
            # {" ".join(os.path.basename(model_name_nlp).split("-"))} Test
            """)
        gr.ChatInterface(
            bot_output,
            api_name = "chat",
            editable=True,
            theme=theme,
            type="messages",
            flagging_mode = 'manual',
            save_history=True,

            chatbot = gr.Chatbot(elem_id="chatbot", 
                        height=500,
                        type="messages",
                        placeholder=f"<strong><br><big>JARvis</strong>",
                        editable=True
                    ),
            textbox = gr.MultimodalTextbox(
                        interactive=True,
                        file_count="multiple",
                        container=False,
                        show_label=False,
                        placeholder="Ask me a question",
                        sources=["microphone", "upload"],
                    ),
        )
        return blocks

if __name__ == "__main__":
    chat_ui().queue().launch(
        show_error=True, 
        show_api=True, 
        debug=True, 
        allowed_paths = [r"Q:\Projects\Multimodal-Jarvis\data\audio"],
        enable_monitoring=True)

Device set to use cpu
  self.register_buffer("padding_total", torch.tensor(kernel_size - stride, dtype=torch.int64), persistent=False)
Device set to use cpu
2025-02-06 17:50:19,086 - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
2025-02-06 17:50:19,130 - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None
2025-02-06 17:50:19,256 - DEBUG - Starting new HTTPS connection (1): huggingface.co:443


Using cpu for inference


2025-02-06 17:50:19,270 - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None
2025-02-06 17:50:19,549 - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
2025-02-06 17:50:19,581 - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x000001515E8605B0>
2025-02-06 17:50:19,613 - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x000001515E71A540> server_hostname='api.gradio.app' timeout=3
2025-02-06 17:50:19,673 - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x000001515E7033A0>
2025-02-06 17:50:19,709 - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x000001515E719840> server_hostname='api.gradio.app' timeout=3
2025-02-06 17:50:19,772 - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
2025-02-06 17:50:20,076 - DEBUG - start_tls.complete return_va

* Running on local URL:  http://127.0.0.1:7860
Monitoring URL: http://127.0.0.1:7860/monitoring/RLPB9INg5i1V1ffxtrfBRw

To create a public link, set `share=True` in `launch()`.


2025-02-06 17:50:21,090 - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
2025-02-06 17:50:21,394 - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/launched HTTP/1.1" 200 0
2025-02-06 17:50:31,918 - DEBUG - Calling on_part_begin with no data
2025-02-06 17:50:31,919 - DEBUG - Calling on_header_field with data[42:61]
2025-02-06 17:50:31,925 - DEBUG - Calling on_header_value with data[63:108]
2025-02-06 17:50:31,926 - DEBUG - Calling on_header_end with no data
2025-02-06 17:50:31,928 - DEBUG - Calling on_header_field with data[110:122]
2025-02-06 17:50:31,929 - DEBUG - Calling on_header_value with data[124:148]
2025-02-06 17:50:31,931 - DEBUG - Calling on_header_end with no data
2025-02-06 17:50:31,933 - DEBUG - Calling on_headers_finished with no data
2025-02-06 17:50:31,938 - DEBUG - Calling on_part_data with data[152:212992]
2025-02-06 17:50:31,943 - DEBUG - Calling on_part_data with data[0:30634]
2025-02-06 17:50:31,945 - DEBUG - Calling on_part_end with n

Keyboard interruption in main thread... closing server.


In [None]:
def create_chat_ui():
    blocks = gr.Blocks()
    with blocks:
        gr.Markdown(
        f"""
        # {" ".join(os.path.basename(model_name_nlp).split("-"))} Test
        """)
        with gr.Row():
            with gr.Column(scale=10):
                chatbot = gr.Chatbot(elem_id="chatbot", 
                    height=500,
                    type="messages",
                    bubble_full_width=False,
                    placeholder=f"<strong><br><big>JARvis</strong>",
                    editable=True
                )

                chat_input  = gr.MultimodalTextbox(
                    interactive=True,
                    file_count="multiple",
                    placeholder="Ask me a question",
                    container=False,
                    scale=7,
                    show_label=False,
                    sources=["microphone", "upload"],
                )

                chat_msg = chat_input.submit(
                    add_message, 
                    [chatbot, chat_input], 
                    [chatbot, chat_input]
                )
                bot_msg = chat_msg.then(bot_output, chatbot, chatbot, api_name="bot_response")
                bot_msg.then(lambda: gr.MultimodalTextbox(interactive=True), None, [chat_input])

                chatbot.like(print_like_dislike, None, None)
                chatbot.undo(handle_undo, chatbot, [chatbot, chat_input])
    return blocks

def create_ui():
    with gr.Blocks() as interface:
        with gr.Tab('Chat', id='Chat', elem_id='chat-tab'):
            create_chat_ui()
        with gr.Tab('Chat1', id='Chat1', elem_id='tab'):
            gr.Markdown("Chat1")
        
    return interface
if __name__ == "__main__":
    demo = create_ui()
    demo.launch(show_error=True, show_api=True, debug=True)

* Running on local URL:  http://127.0.0.1:7862

To create a public link, set `share=True` in `launch()`.


Keyboard interruption in main thread... closing server.


: 

In [34]:
import gradio as gr
from functools import partial

def process_message(chat_histories, chat_id, message):
    """
    Add the new message (and a simple bot reply) to the chat history for the provided chat_id.
    """
    if chat_id not in chat_histories:
        chat_histories[chat_id] = []
    # For demonstration, the bot responds with the reversed message.
    bot_response = f"Bot: {message[::-1]}"
    chat_histories[chat_id].append((f"You: {message}", bot_response))
    # Return updated chat histories and the conversation for the active chat.
    return chat_histories, chat_histories[chat_id]

def load_chat(chat_id, chat_histories):
    """
    Return the chat history for a given chat_id.
    """
    return chat_histories.get(chat_id, []), chat_id

with gr.Blocks() as demo:
    gr.Markdown("## Chatbot with Dynamic Chat History Buttons")
    
    # State variable to hold all chat histories as a dictionary:
    #   { chat_id: [ (user message, bot response), ... ] }
    chat_histories_state = gr.State({})
    
    with gr.Row():
        chat_id_input = gr.Textbox(
            label="Chat Session ID",
            placeholder="Enter a unique chat ID (e.g., user1)"
        )
        user_message = gr.Textbox(
            label="Your Message",
            placeholder="Type your message here"
        )
        send_btn = gr.Button("Send")
    
    # Chatbot display that shows the current conversation.
    chatbot_display = gr.Chatbot(label="Conversation")
    
    # A container where the dynamic chat buttons will be rendered.
    chat_buttons_container = gr.Column()
    
    # When the Send button is clicked, process the new message.
    send_btn.click(
        process_message,
        inputs=[chat_histories_state, chat_id_input, user_message],
        outputs=[chat_histories_state, chatbot_display]
    ).then(
        # Then re-render the dynamic buttons using our render_buttons function.
        lambda state: state,  # Pass through the state (dummy function)
        outputs=[]  # No direct outputs here.
    ).then(
        # Now update the dynamic buttons.
        lambda state: state,  # Again, pass through
        outputs=[]  # We just trigger a state change to update our render.
    )
    
    # ---
    # The dynamic rendering function: whenever chat_histories_state changes,
    # this function re-renders the buttons.
    @gr.render(inputs=chat_histories_state)
    def render_buttons(chat_histories):
        with gr.Column() as container:
            # If there are no chat sessions yet, display a message.
            if not chat_histories:
                gr.Markdown("### No chats available")
            else:
                # For each chat session, create a button.
                for cid in chat_histories.keys():
                    btn = gr.Button(f"Chat: {cid}", key=cid)
                    # When this button is clicked, load that chat's history.
                    # We use functools.partial to bind the current chat ID.
                    btn.click(
                        fn=partial(load_chat, chat_id=cid, chat_histories=chat_histories),
                        inputs=[],  # No additional inputs needed.
                        outputs=[chatbot_display, chat_id_input]
                    )
        return container

    # Place the dynamic buttons container in the layout.
    chat_buttons_container.render(render_buttons, chat_histories_state)

demo.launch()


2025-02-01 17:45:51,685 - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
2025-02-01 17:45:51,697 - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None


TypeError: Block.render() takes 1 positional argument but 2 were given

2025-02-01 17:45:52,101 - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
2025-02-01 17:45:52,113 - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x000001F483752200>
2025-02-01 17:45:52,114 - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x000001F4FFD00DC0> server_hostname='api.gradio.app' timeout=3
2025-02-01 17:45:52,564 - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x000001F480161C30>
2025-02-01 17:45:52,565 - DEBUG - send_request_headers.started request=<Request [b'GET']>
2025-02-01 17:45:52,566 - DEBUG - send_request_headers.complete
2025-02-01 17:45:52,568 - DEBUG - send_request_body.started request=<Request [b'GET']>
2025-02-01 17:45:52,569 - DEBUG - send_request_body.complete
2025-02-01 17:45:52,570 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
2025-02-01 17:45:52,782 - DEBUG - receive_response_headers.complete re

In [None]:
import gradio as gr
import inspect
from gradio.components.multimodal_textbox import MultimodalPostprocess
from typing import Literal, Union, cast
import anyio
import copy
import dataclasses
from gradio import utils
from gradio.helpers import special_args
from collections.abc import AsyncGenerator
from gradio.components.chatbot import (
    ChatMessage,
    Message,
    MessageDict,
    TupleFormat,
)
multimodal = True
limiter = None

############################################################
############### Bussines Logic #############################
############################################################

def bot_output(_unused, history: list):
    try:
        history.append(gr.ChatMessage(role="assistant", content=""))
        text = tokenizer.apply_chat_template(
            history,    # [msg.dict() for msg in history]
            tokenize=False,
            add_generation_prompt=True,
        )

        model_inputs  = tokenizer([text], return_tensors="pt").to(model.device)
        generated_ids = model.generate(**model_inputs, max_new_tokens=512)
        generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]
        generated_text  = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
        
        if not generated_text or not isinstance(generated_text, str):  
            generated_text = "I'm sorry, but I couldn't generate a response."

        for char in generated_text :
            history[-1].content += char
            sleep(0.01) 
            yield history

    
        yield history

    except Exception as e:
        history.append(gr.ChatMessage(role="system", content=f"Failed to create text: {str(e)}"))
        yield history
        raise gr.Error(f"Failed to create text: {str(e)}")

fn = bot_output
############################################################
############### Inner Logic ################################
############################################################

def save_conversation(index: int | None, conversation: list[gr.MessageDict], saved_conversations: list[list[gr.MessageDict]]):
    if index is not None:
        saved_conversations[index] = conversation
    else:
        saved_conversations.append(conversation)
        index = len(saved_conversations) - 1
    return index, saved_conversations

def delete_conversation(index: int | None, saved_conversations: list[list[gr.MessageDict]]):

    if index is not None:
        saved_conversations.pop(index)
    return None, saved_conversations

def generate_chat_title(conversation: list[gr.MessageDict]) -> str:
        """
        Generate a title for a conversation by taking the first user message that is a string
        and truncating it to 40 characters. If files are present, add a 📎 to the title.
        """
        title = ""
        for message in conversation:
            if message["role"] == "user":
                if isinstance(message["content"], str):
                    title += message["content"]
                    break
                else:
                    title += "📎 "
        if len(title) > 40:
            title = title[:40] + "..."
        return title or "Conversation"

def load_conversation(index: int, conversations: list[list[gr.MessageDict]]):
    return (
        index,
        gr.Chatbot(
            value=conversations[index],  # type: ignore
            feedback_value=[],
        ),
    )

def load_chat_history(conversations):
        return gr.Dataset(
            samples=[
                [generate_chat_title(conv)]
                for conv in conversations or []
                if conv
            ]
        )

def clear_and_save_textbox(
        message: str | MultimodalPostprocess,
    ) -> tuple[
        gr.Textbox | gr.MultimodalTextbox,
        str | MultimodalPostprocess,
    ]:
        return (
            type(chat_input)("", interactive=False, placeholder=""),
            message,
        )

@staticmethod
def messages_to_tuples(history_messages: list[gr.MessageDict]) -> TupleFormat:
    history_tuples = []
    for message in history_messages:
        if message["role"] == "user":
            history_tuples.append((message["content"], None))
        elif history_tuples and history_tuples[-1][1] is None:
            history_tuples[-1] = (history_tuples[-1][0], message["content"])
        else:
            history_tuples.append((None, message["content"]))
    return history_tuples

@staticmethod
def tuples_to_messages(history_tuples: TupleFormat) -> list[MessageDict]:
    history_messages = []
    for message_tuple in history_tuples:
        if message_tuple[0]:
            history_messages.append({"role": "user", "content": message_tuple[0]})
        if message_tuple[1]:
            history_messages.append(
                {"role": "assistant", "content": message_tuple[1]}
            )
    return history_messages

def message_as_message_dict(
        message: gr.MessageDict | Message | str | gr.Component | MultimodalPostprocess | list,
        role: Literal["user", "assistant"],
    ) -> list[MessageDict]:
        """
        Converts a user message, example message, or response from the chat function to a
        list of MessageDict objects that can be appended to the chat history.
        """
        message_dicts = []
        if not isinstance(message, list):
            message = [message]
        for msg in message:
            if isinstance(msg, Message):
                message_dicts.append(msg.model_dump())
            elif isinstance(msg, ChatMessage):
                msg.role = role
                message_dicts.append(
                    dataclasses.asdict(msg, dict_factory=utils.dict_factory)
                )
            elif isinstance(msg, (str, gr.Component)):
                message_dicts.append({"role": role, "content": msg})
            elif (
                isinstance(msg, dict) and "content" in msg
            ):  # in MessageDict format already
                msg["role"] = role
                message_dicts.append(msg)
            else:  # in MultimodalPostprocess format
                for x in msg.get("files", []):
                    if isinstance(x, dict):
                        x = x.get("path")
                    message_dicts.append({"role": role, "content": (x,)})
                if msg["text"] is None or not isinstance(msg["text"], str):
                    pass
                else:
                    message_dicts.append({"role": role, "content": msg["text"]})
        return message_dicts

def append_message_to_history(
        message: gr.MessageDict | Message | str | gr.Component | MultimodalPostprocess | list,
        history: list[gr.MessageDict] | TupleFormat,
        role: Literal["user", "assistant"] = "user",
    ) -> list[gr.MessageDict] | TupleFormat:
        message_dicts = message_as_message_dict(message, role)
        if type == "tuples":
            history = tuples_to_messages(history)  # type: ignore
        else:
            history = copy.deepcopy(history)
        history.extend(message_dicts)  # type: ignore
        if type == "tuples":
            history = messages_to_tuples(history)  # type: ignore
        return history

def pop_last_user_message(
        history: list[MessageDict] | TupleFormat,
    ) -> tuple[list[MessageDict] | TupleFormat, str | MultimodalPostprocess]:
        """
        Removes the message (or set of messages) that the user last sent from the chat history and returns them.
        If self.multimodal is True, returns a MultimodalPostprocess (dict) object with text and files.
        If self.multimodal is False, returns just the message text as a string.
        """
        if not history:
            return history, "" if not multimodal else {"text": "", "files": []}

        if type == "tuples":
            history = tuples_to_messages(history)  # type: ignore
        i = len(history) - 1
        while i >= 0 and history[i]["role"] == "assistant":  # type: ignore
            i -= 1
        while i >= 0 and history[i]["role"] == "user":  # type: ignore
            i -= 1
        last_messages = history[i + 1 :]
        last_user_message = ""
        files = []
        for msg in last_messages:
            assert isinstance(msg, dict)  # noqa: S101
            if msg["role"] == "user":
                content = msg["content"]
                if isinstance(content, tuple):
                    files.append(content[0])
                else:
                    last_user_message = content
        return_message = (
            {"text": last_user_message, "files": files}
            if multimodal
            else last_user_message
        )
        history_ = history[: i + 1]
        if type == "tuples":
            history_ = messages_to_tuples(history_)  # type: ignore
        return history_, return_message  # type: ignore


async def submit_fn(message: str | MultimodalPostprocess, history: TupleFormat | list[MessageDict], request: gr.Request, *args,
) -> tuple:
    inputs, _, _ = special_args(
        fn, inputs=[message, history, *args], request=request
    )
    if is_async:
        response = await fn(*inputs)
    else:
        response = await anyio.to_thread.run_sync(
            fn, *inputs, limiter=limiter
        )
    if additional_outputs:
        response, *additional_outputs = response
    else:
        additional_outputs = None
    history = append_message_to_history(message, history, "user")
    history = append_message_to_history(response, history, "assistant")
    if additional_outputs:
        return response, history, *additional_outputs
    return response, history

async def stream_fn(message: str | MultimodalPostprocess, history: TupleFormat | list[MessageDict], request: gr.Request, *args,
) -> AsyncGenerator[
    tuple,
    None,
]:
    inputs, _, _ = special_args(
        fn, inputs=[message, history, *args], request=request
    )
    if is_async:
        generator = fn(*inputs)
    else:
        generator = await anyio.to_thread.run_sync(
            fn, *inputs, limiter=limiter
        )
        generator = utils.SyncToAsyncIterator(generator, limiter)

    history = append_message_to_history(message, history, "user")
    additional_outputs = None
    try:
        first_response = await utils.async_iteration(generator)
        if additional_outputs:
            first_response, *additional_outputs = first_response
        history_ = append_message_to_history(
            first_response, history, "assistant"
        )
        if not additional_outputs:
            yield first_response, history_
        else:
            yield first_response, history_, *additional_outputs
    except StopIteration:
        yield None, history
    async for response in generator:
        if additional_outputs:
            response, *additional_outputs = response
        history_ = append_message_to_history(response, history, "assistant")
        if not additional_outputs:
            yield response, history_
        else:
            yield response, history_, *additional_outputs

def edit_message(history: list[MessageDict] | TupleFormat, edit_data: gr.EditData) -> tuple[
        list[MessageDict] | TupleFormat,
        list[MessageDict] | TupleFormat,
        str | MultimodalPostprocess,
    ]:
        if isinstance(edit_data.index, (list, tuple)):
            history = history[: edit_data.index[0]]
        else:
            history = history[: edit_data.index]
        return history, history, edit_data.value

############################################################
############### UI Logic from ChatInterface ################
############################################################

with gr.Blocks() as demo:
    
    with gr.Row():
        with gr.Column(scale=1, min_width=100):
            
            new_chat_button = gr.Button(
                "New chat",
                variant="primary",
                size="md",
            )
            chat_history_dataset = gr.Dataset(
                components=[gr.Textbox(visible=False)],
                show_label=False,
                layout="table",
                type="index",
            )

        with gr.Column(scale=5):
            chatbot = gr.Chatbot(elem_id="chatbot", 
                height=500,
                type="messages",
                placeholder=f"<strong><br><big>JARvis</strong>",
                editable=True
            )
            chat_input = gr.MultimodalTextbox(
                interactive=True,
                file_count="multiple",
                container=False,
                show_label=False,
                placeholder="Ask me a question",
                sources=["microphone", "upload"],
            )

        chatbot_state = gr.State(chatbot.value if chatbot.value else [])
        chatbot_value = gr.State(chatbot.value if chatbot.value else [])
        null_component = gr.State()
        is_generator = inspect.isgeneratorfunction(
            fn
        ) or inspect.isasyncgenfunction(fn)
        is_async = inspect.iscoroutinefunction(
            fn
        ) or inspect.isasyncgenfunction(fn)
        submit_fn = stream_fn if is_generator else submit_fn

        saved_conversations = gr.BrowserState(
                [], storage_key=f"saved_conversations_{id}"
            )
        conversation_id = gr.State(None)
        saved_input = gr.State() 
        synchronize_chat_state_kwargs = {
                    "fn": lambda x: (x, x),
                    "inputs": [chatbot],
                    "outputs": [chatbot_state, chatbot_value],
                    "show_api": False,
                    "queue": False,
                }
        save_fn_kwargs = {
            "fn": save_conversation,
            "inputs": [
                conversation_id,
                chatbot_state,
                saved_conversations,
            ],
            "outputs": [conversation_id, saved_conversations],
            "show_api": False,
            "queue": False,
        }
        submit_fn_kwargs = {
            "fn": submit_fn,
            "inputs": [saved_input, chatbot_state],
            "outputs": [null_component, chatbot],
            "show_api": False,
            "concurrency_limit": cast(
                Union[int, Literal["default"], None], 'default'
            ),
            "show_progress": cast(
                Literal["full", "minimal", "hidden"], 'full'
            ),
        }


        submit_event = (
        chat_input.submit( 
            clear_and_save_textbox,
            [chat_input],
            [chat_input, saved_input],
            show_api=False,
            queue=False,
        ).then(  # The reason we do this outside of the submit_fn is that we want to update the chatbot UI with the user message immediately, before the submit_fn is called
                append_message_to_history,
                [saved_input, chatbot],
                [chatbot],
                show_api=False,
                queue=False,
            ).then(**submit_fn_kwargs)
        )
        submit_event.then(**synchronize_chat_state_kwargs).then(
            lambda: gr.update(value=None, interactive=True),
            None,
            chat_input,
            show_api=False,
        ).then(**save_fn_kwargs)

        retry_event = (
            chatbot.retry(
                pop_last_user_message,
                [chatbot_state],
                [chatbot_state, saved_input],
                show_api=False,
                queue=False,
            )
            .then(
                append_message_to_history,
                [saved_input, chatbot_state],
                [chatbot],
                show_api=False,
                queue=False,
            )
            .then(
                lambda: gr.update(interactive=False, placeholder=""),
                outputs=[chat_input],
                show_api=False,
            ).then(**submit_fn_kwargs)
        )
        retry_event.then(**synchronize_chat_state_kwargs).then(
            lambda: gr.update(interactive=True),
            outputs=[chat_input],
            show_api=False,
        ).then(**save_fn_kwargs)


        chatbot.undo(
            pop_last_user_message,
            [chatbot],
            [chatbot, chat_input],
            show_api=False,
            queue=False,
        ).then(**synchronize_chat_state_kwargs).then(**save_fn_kwargs)

        chatbot.clear(**synchronize_chat_state_kwargs).then(
            delete_conversation,
            [conversation_id, saved_conversations],
            [conversation_id, saved_conversations],
            show_api=False,
            queue=False,
        )

        new_chat_button.click(
            lambda: (None, []),
            None,
            [conversation_id, chatbot],
            show_api=False,
            queue=False,
        ).then(
            lambda x: x,
            [chatbot],
            [chatbot_state],
            show_api=False,
            queue=False,
        )

        saved_conversations.change(
            fn=load_chat_history,
            inputs=[saved_conversations],
            outputs=[chat_history_dataset],
            show_api=False,
            queue=False,
        )

        chat_history_dataset.click(
            lambda: [],
            None,
            [chatbot],
            show_api=False,
            queue=False,
            show_progress="hidden",
        ).then(
            load_conversation,
            [chat_history_dataset, saved_conversations],
            [conversation_id, chatbot],
            show_api=False,
            queue=False,
            show_progress="hidden",
        ).then(**synchronize_chat_state_kwargs)

        if chatbot.editable:
            chatbot.edit(
                edit_message,
                [chatbot],
                [chatbot, chatbot_state, saved_input],
                show_api=False,
            ).success(**submit_fn_kwargs).success(**synchronize_chat_state_kwargs).then(
                **save_fn_kwargs
            )

demo.launch(show_error=True, show_api=True, debug=True)

2025-02-03 16:19:29,113 - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
2025-02-03 16:19:29,130 - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None
2025-02-03 16:19:29,460 - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
2025-02-03 16:19:29,491 - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x000002170D00EC20>
2025-02-03 16:19:29,533 - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x0000021776F2DC40> server_hostname='api.gradio.app' timeout=3
2025-02-03 16:19:29,744 - DEBUG - Using selector: SelectSelector
2025-02-03 16:19:29,765 - DEBUG - connect_tcp.started host='127.0.0.1' port=7860 local_address=None timeout=None socket_options=None
2025-02-03 16:19:29,767 - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x000002170E7E5F90>
2025-02-03 16:19:29,769 - DEBUG - send_reque

* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


2025-02-03 16:19:29,825 - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
2025-02-03 16:19:29,968 - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x000002177D2FA8C0>
2025-02-03 16:19:29,970 - DEBUG - send_request_headers.started request=<Request [b'GET']>
2025-02-03 16:19:29,970 - DEBUG - send_request_headers.complete
2025-02-03 16:19:29,973 - DEBUG - send_request_body.started request=<Request [b'GET']>
2025-02-03 16:19:29,974 - DEBUG - send_request_body.complete
2025-02-03 16:19:29,975 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
2025-02-03 16:19:30,050 - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/launched HTTP/1.1" 200 0
2025-02-03 16:19:30,181 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Mon, 03 Feb 2025 14:19:30 GMT'), (b'Content-Type', b'application/json'), (b'Content-Length', b'21'), (b'Connection', b'keep-alive'), (b'Server', b'ngi

Keyboard interruption in main thread... closing server.




# Adaptation from text-generation-webui

In [None]:
import gradio as gr

css = r"src/static/static.css"

sidebar_html = r"src/templates/main.html"

def create_chat_ui():
    with gr.Blocks() as blocks:
      gr.Chatbot(elem_id="chatbot", 
          height=620,
          type="messages",
          bubble_full_width=False,
          placeholder=f"<strong><br><big>JARvis</br></strong>",
          editable=True
      )
      gr.MultimodalTextbox(
          interactive=True,
          file_count="multiple",
          placeholder="Ask me a question",
          container=False,
          scale=7,
          show_label=False,
          sources=["microphone", "upload"],
      )
    return blocks

def create_setting_ui():
    with gr.Blocks() as blocks:
       gr.Slider(minimum=0, maximum=100, label="Volume")
       gr.Slider(minimum=0, maximum=100, label="Brightness")
       gr.Slider(minimum=0, maximum=100, label="Contrast")

    return blocks

def create_interface():
  with gr.Blocks(css=css, theme=theme) as demo:
    with gr.Row():
      gr.HTML(sidebar_html)

      with gr.Column(scale=8, elem_classes="main-content"):
        
        with gr.Group(visible=True) as chat_group:
          create_chat_ui()
        
        with gr.Group(visible=False) as settings_group:
          create_setting_ui()

      return demo

if __name__ == "__main__":
    demo = create_interface()
    demo.launch(show_error=True, show_api=True, debug=True)

* Running on local URL:  http://127.0.0.1:7862

To create a public link, set `share=True` in `launch()`.


Keyboard interruption in main thread... closing server.
