# Multi-Backend Chatbot with Gradio

This notebook demonstrates how to build a chatbot that can switch between multiple LLM providers:
- **Ollama** (local models)
- **OpenRouter** (multiple cloud models)
- **GitHub Models** (GitHub's AI models)
- **Groq** (fast inference)
- **Google AI Studio** (Gemini models)

In [1]:
import gradio as gr
import os
from dotenv import load_dotenv
from openai import OpenAI
import google.generativeai

# Load environment variables
load_dotenv(override=True)

True

In [2]:
# Install required packages (run once if needed)
%pip install google-generativeai openai python-dotenv gradio -q

Note: you may need to restart the kernel to use updated packages.


In [3]:
def chat_with_ollama(message, history, model="llama3.2:1b"):
    """
    Chat with a local Ollama model.
    Ensures the Ollama server is running locally at http://localhost:11434/v1
    """
    try:
        client = OpenAI(
            base_url="http://localhost:11434/v1",
            api_key="ollama"  # Required but ignored by Ollama
        )
        
        messages = []
        # Construct messages from history
        for user_msg, bot_msg in history:
            messages.append({"role": "user", "content": user_msg})
            messages.append({"role": "assistant", "content": bot_msg})
        
        messages.append({"role": "user", "content": message})

        response = client.chat.completions.create(
            model=model,
            messages=messages,
            stream=True
        )
        
        partial_message = ""
        for chunk in response:
            if chunk.choices and chunk.choices[0].delta.content:
                partial_message += chunk.choices[0].delta.content
                yield partial_message
                
    except Exception as e:
        yield f"Error connecting to Ollama: {str(e)}. Make sure Ollama is running."

In [4]:
def chat_with_openrouter(message, history, model="openai/gpt-3.5-turbo"):
    """
    Chat with OpenRouter models.
    Requires OPENROUTER_API_KEY in .env
    """
    api_key = os.getenv("OPENROUTER_API_KEY")
    if not api_key:
        yield "Error: OPENROUTER_API_KEY not found in environment variables."
        return

    try:
        client = OpenAI(
            base_url="https://openrouter.ai/api/v1",
            api_key=api_key
        )
        
        messages = []
        for user_msg, bot_msg in history:
            messages.append({"role": "user", "content": user_msg})
            messages.append({"role": "assistant", "content": bot_msg})
            
        messages.append({"role": "user", "content": message})

        response = client.chat.completions.create(
            model=model,
            messages=messages,
            stream=True,
            extra_headers={
                "HTTP-Referer": "http://localhost:7860",
                "X-Title": "Local Chatbot"
            }
        )
        
        partial_message = ""
        for chunk in response:
            if chunk.choices and chunk.choices[0].delta.content:
                partial_message += chunk.choices[0].delta.content
                yield partial_message
                
    except Exception as e:
        yield f"Error connecting to OpenRouter: {str(e)}"

In [5]:
def chat_with_github(message, history, model="gpt-4o-mini"):
    """
    Chat with GitHub Models.
    Requires GITHUB_TOKEN in .env
    """
    api_key = os.getenv("GITHUB_TOKEN")
    if not api_key:
        yield "Error: GITHUB_TOKEN not found in environment variables."
        return

    try:
        client = OpenAI(
            base_url="https://models.github.ai/inference",
            api_key=api_key
        )
        
        messages = []
        for user_msg, bot_msg in history:
            messages.append({"role": "user", "content": user_msg})
            messages.append({"role": "assistant", "content": bot_msg})
            
        messages.append({"role": "user", "content": message})

        response = client.chat.completions.create(
            model=model,
            messages=messages,
            stream=True
        )
        
        partial_message = ""
        for chunk in response:
            if chunk.choices and chunk.choices[0].delta.content:
                partial_message += chunk.choices[0].delta.content
                yield partial_message
                
    except Exception as e:
        yield f"Error connecting to GitHub Models: {str(e)}"

In [6]:
def chat_with_groq(message, history, model="llama-3.3-70b-versatile"):
    """
    Chat with Groq models.
    Requires GROQ_API_KEY in .env
    """
    api_key = os.getenv("GROQ_API_KEY")
    if not api_key:
        yield "Error: GROQ_API_KEY not found in environment variables."
        return

    try:
        client = OpenAI(
            base_url="https://api.groq.com/openai/v1",
            api_key=api_key
        )
        
        messages = []
        for user_msg, bot_msg in history:
            messages.append({"role": "user", "content": user_msg})
            messages.append({"role": "assistant", "content": bot_msg})
            
        messages.append({"role": "user", "content": message})

        response = client.chat.completions.create(
            model=model,
            messages=messages,
            stream=True
        )
        
        partial_message = ""
        for chunk in response:
            if chunk.choices and chunk.choices[0].delta.content:
                partial_message += chunk.choices[0].delta.content
                yield partial_message
                
    except Exception as e:
        yield f"Error connecting to Groq: {str(e)}"

In [7]:
def chat_with_gemini(message, history, model="gemini-2.0-flash"):
    """
    Chat with Google AI Studio (Gemini) models.
    Requires GOOGLE_API_KEY in .env
    """
    api_key = os.getenv("GOOGLE_API_KEY")
    if not api_key:
        yield "Error: GOOGLE_API_KEY not found in environment variables."
        return

    try:
        google.generativeai.configure(api_key=api_key)
        gemini = google.generativeai.GenerativeModel(
            model_name=model,
            system_instruction="You are a helpful assistant"
        )
        
        # Build chat history for Gemini
        chat_history = []
        for user_msg, bot_msg in history:
            chat_history.append({"role": "user", "parts": [user_msg]})
            chat_history.append({"role": "model", "parts": [bot_msg]})
        
        chat = gemini.start_chat(history=chat_history)
        response = chat.send_message(message, stream=True)
        
        partial_message = ""
        for chunk in response:
            partial_message += chunk.text
            yield partial_message
                
    except Exception as e:
        yield f"Error connecting to Gemini: {str(e)}"

In [8]:
def chat_router(message, history, backend, ollama_model, openrouter_model, github_model, groq_model, gemini_model):
    """Route chat requests to the appropriate backend."""
    if backend == "Ollama":
        yield from chat_with_ollama(message, history, ollama_model)
    elif backend == "OpenRouter":
        yield from chat_with_openrouter(message, history, openrouter_model)
    elif backend == "GitHub Models":
        yield from chat_with_github(message, history, github_model)
    elif backend == "Groq":
        yield from chat_with_groq(message, history, groq_model)
    elif backend == "Gemini":
        yield from chat_with_gemini(message, history, gemini_model)
    else:
        yield "Error: Unknown backend selected."

In [9]:
# Create Enhanced Gradio Interface

# Model options for each backend (Updated December 2025)
OLLAMA_MODELS = [
    "llama3.2:1b", "llama3.2:3b", "llama3.1:8b", "llama3.1:70b",
    "mistral:7b", "mixtral:8x7b", "codellama:7b", "codellama:34b",
    "phi3:mini", "phi3:medium", "gemma2:9b", "qwen2.5:7b", "deepseek-r1:7b"
]

# OpenRouter Free Models (Updated December 2025)
OPENROUTER_MODELS = [
    "meta-llama/llama-3.3-70b-instruct:free", "google/gemini-2.0-flash-exp:free",
    "tngtech/deepseek-r1t2-chimera:free", "tngtech/deepseek-r1t-chimera:free",
    "tngtech/tng-r1t-chimera:free", "allenai/olmo-3-32b-think:free",
    "alibaba/tongyi-deepresearch-30b-a3b:free", "kwaipilot/kat-coder-pro:free",
    "qwen/qwen3-4b:free", "google/gemma-3-27b-it:free", "google/gemma-3-12b-it:free",
    "google/gemma-3-4b-it:free", "google/gemma-3n-e2b-it:free",
    "nvidia/nemotron-nano-12b-v2-vl:free", "nvidia/nemotron-nano-9b-v2:free",
    "mistralai/mistral-small-3.1-24b-instruct:free", "mistralai/mistral-7b-instruct:free",
    "openai/gpt-oss-20b:free", "z-ai/glm-4.5-air:free", "amazon/nova-2-lite-v1:free",
    "cognitivecomputations/dolphin-mistral-24b-venice-edition:free", "arcee-ai/trinity-mini:free"
]

# GitHub Models Marketplace (Free Tier)
GITHUB_MODELS = [
    "gpt-5", "gpt-5-chat", "gpt-5-mini", "gpt-5-nano",
    "o3", "o3-mini", "o4-mini", "gpt-4o", "gpt-4o-mini",
    "Llama-3.3-70B-Instruct", "Llama-3.2-90B-Vision-Instruct",
    "Llama-3.2-11B-Vision-Instruct", "Phi-4", "Phi-4-reasoning",
    "Phi-4-multimodal-instruct", "Phi-4-mini-reasoning", "Phi-4-mini-instruct",
    "Codestral-2501", "Mistral-Small-3-1-multimodal",
    "DeepSeek-R1", "DeepSeek-V3-0324", "MAI-DS-R1", "AI21-Jamba-1-5-Large"
]

# Groq Free Tier Models
GROQ_MODELS = [
     "meta-llama/llama-4-maverick-17b-128e-instruct", "meta-llama/llama-4-scout-17b-16e-instruct",
    "llama-3.3-70b-versatile", "llama-3.1-8b-instant",
    "openai/gpt-oss-120b", "openai/gpt-oss-20b", "openai/gpt-oss-safeguard-20b",
    "moonshotai/kimi-k2-instruct-0905", "qwen/qwen3-32b",
    "whisper-large-v3", "whisper-large-v3-turbo"
]

# Google AI Studio / Gemini API Models
GEMINI_MODELS = [
    "gemini-3-pro-preview", "gemini-3-pro-image-preview",
    "gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.5-flash-lite",
    "gemini-2.0-flash-exp",
    "gemma-3-27b-it", "gemma-3-12b-it", "gemma-3-4b-it", "gemma-3-1b-it",
    "gemma-3n-e4b-it", "gemma-3n-e2b-it"
]

# Custom CSS for enhanced styling
custom_css = """
.gradio-container {
    max-width: 1200px !important;
}
.chat-header {
    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
    padding: 20px;
    border-radius: 10px;
    margin-bottom: 20px;
}
.status-connected {
    color: #22c55e;
    font-weight: bold;
}
.status-error {
    color: #ef4444;
    font-weight: bold;
}
footer {
    visibility: hidden;
}
"""

def check_api_status():
    """Check which APIs are configured."""
    status = []
    apis = {
        "Ollama": ("Always available (local)", True),
        "OpenRouter": (os.getenv("OPENROUTER_API_KEY"), bool(os.getenv("OPENROUTER_API_KEY"))),
        "GitHub Models": (os.getenv("GITHUB_TOKEN"), bool(os.getenv("GITHUB_TOKEN"))),
        "Groq": (os.getenv("GROQ_API_KEY"), bool(os.getenv("GROQ_API_KEY"))),
        "Gemini": (os.getenv("GOOGLE_API_KEY"), bool(os.getenv("GOOGLE_API_KEY")))
    }
    
    for name, (key, available) in apis.items():
        icon = "‚úÖ" if available else "‚ùå"
        status.append(f"{icon} {name}")
    
    return " | ".join(status)

def get_model_choices(backend):
    """Get model choices based on selected backend."""
    model_map = {
        "Ollama": OLLAMA_MODELS,
        "OpenRouter": OPENROUTER_MODELS,
        "GitHub Models": GITHUB_MODELS,
        "Groq": GROQ_MODELS,
        "Gemini": GEMINI_MODELS
    }
    return gr.update(choices=model_map.get(backend, []), value=model_map.get(backend, [""])[0])

def enhanced_chat_router(message, history, backend, model, system_prompt, temperature, max_tokens):
    """Enhanced chat router with system prompt and temperature support."""
    
    if not message.strip():
        yield "Please enter a message."
        return
    
    # Add system prompt handling for backends that support it
    if backend == "Ollama":
        yield from chat_with_ollama_enhanced(message, history, model, system_prompt, temperature, max_tokens)
    elif backend == "OpenRouter":
        yield from chat_with_openrouter_enhanced(message, history, model, system_prompt, temperature, max_tokens)
    elif backend == "GitHub Models":
        yield from chat_with_github_enhanced(message, history, model, system_prompt, temperature, max_tokens)
    elif backend == "Groq":
        yield from chat_with_groq_enhanced(message, history, model, system_prompt, temperature, max_tokens)
    elif backend == "Gemini":
        yield from chat_with_gemini_enhanced(message, history, model, system_prompt, temperature, max_tokens)
    else:
        yield "Error: Unknown backend selected."

def chat_with_ollama_enhanced(message, history, model, system_prompt, temperature, max_tokens):
    """Enhanced Ollama chat with system prompt and temperature."""
    try:
        client = OpenAI(base_url="http://localhost:11434/v1", api_key="ollama")
        
        messages = []
        if system_prompt.strip():
            messages.append({"role": "system", "content": system_prompt})
        
        for user_msg, bot_msg in history:
            messages.append({"role": "user", "content": user_msg})
            messages.append({"role": "assistant", "content": bot_msg})
        messages.append({"role": "user", "content": message})

        response = client.chat.completions.create(
            model=model,
            messages=messages,
            stream=True,
            temperature=temperature,
            max_tokens=max_tokens if max_tokens > 0 else None
        )
        
        partial_message = ""
        for chunk in response:
            if chunk.choices and chunk.choices[0].delta.content:
                partial_message += chunk.choices[0].delta.content
                yield partial_message
                
    except Exception as e:
        yield f"‚ùå **Ollama Error:** {str(e)}\n\nüí° Make sure Ollama is running: `ollama serve`"

def chat_with_openrouter_enhanced(message, history, model, system_prompt, temperature, max_tokens):
    """Enhanced OpenRouter chat with system prompt and temperature."""
    api_key = os.getenv("OPENROUTER_API_KEY")
    if not api_key:
        yield "‚ùå **Error:** OPENROUTER_API_KEY not found. Please add it to your .env file."
        return

    try:
        client = OpenAI(base_url="https://openrouter.ai/api/v1", api_key=api_key)
        
        messages = []
        if system_prompt.strip():
            messages.append({"role": "system", "content": system_prompt})
        
        for user_msg, bot_msg in history:
            messages.append({"role": "user", "content": user_msg})
            messages.append({"role": "assistant", "content": bot_msg})
        messages.append({"role": "user", "content": message})

        response = client.chat.completions.create(
            model=model,
            messages=messages,
            stream=True,
            temperature=temperature,
            max_tokens=max_tokens if max_tokens > 0 else None,
            extra_headers={
                "HTTP-Referer": "http://localhost:7860",
                "X-Title": "Universal Chatbot"
            }
        )
        
        partial_message = ""
        for chunk in response:
            if chunk.choices and chunk.choices[0].delta.content:
                partial_message += chunk.choices[0].delta.content
                yield partial_message
                
    except Exception as e:
        yield f"‚ùå **OpenRouter Error:** {str(e)}"

def chat_with_github_enhanced(message, history, model, system_prompt, temperature, max_tokens):
    """Enhanced GitHub Models chat with system prompt and temperature."""
    api_key = os.getenv("GITHUB_TOKEN")
    if not api_key:
        yield "‚ùå **Error:** GITHUB_TOKEN not found. Please add it to your .env file."
        return

    try:
        client = OpenAI(base_url="https://models.github.ai/inference", api_key=api_key)
        
        messages = []
        if system_prompt.strip():
            messages.append({"role": "system", "content": system_prompt})
        
        for user_msg, bot_msg in history:
            messages.append({"role": "user", "content": user_msg})
            messages.append({"role": "assistant", "content": bot_msg})
        messages.append({"role": "user", "content": message})

        response = client.chat.completions.create(
            model=model,
            messages=messages,
            stream=True,
            temperature=temperature,
            max_tokens=max_tokens if max_tokens > 0 else None
        )
        
        partial_message = ""
        for chunk in response:
            if chunk.choices and chunk.choices[0].delta.content:
                partial_message += chunk.choices[0].delta.content
                yield partial_message
                
    except Exception as e:
        yield f"‚ùå **GitHub Models Error:** {str(e)}"

def chat_with_groq_enhanced(message, history, model, system_prompt, temperature, max_tokens):
    """Enhanced Groq chat with system prompt and temperature."""
    api_key = os.getenv("GROQ_API_KEY")
    if not api_key:
        yield "‚ùå **Error:** GROQ_API_KEY not found. Please add it to your .env file."
        return

    try:
        client = OpenAI(base_url="https://api.groq.com/openai/v1", api_key=api_key)
        
        messages = []
        if system_prompt.strip():
            messages.append({"role": "system", "content": system_prompt})
        
        for user_msg, bot_msg in history:
            messages.append({"role": "user", "content": user_msg})
            messages.append({"role": "assistant", "content": bot_msg})
        messages.append({"role": "user", "content": message})

        response = client.chat.completions.create(
            model=model,
            messages=messages,
            stream=True,
            temperature=temperature,
            max_tokens=max_tokens if max_tokens > 0 else None
        )
        
        partial_message = ""
        for chunk in response:
            if chunk.choices and chunk.choices[0].delta.content:
                partial_message += chunk.choices[0].delta.content
                yield partial_message
                
    except Exception as e:
        yield f"‚ùå **Groq Error:** {str(e)}"

def chat_with_gemini_enhanced(message, history, model, system_prompt, temperature, max_tokens):
    """Enhanced Gemini chat with system prompt and temperature."""
    api_key = os.getenv("GOOGLE_API_KEY")
    if not api_key:
        yield "‚ùå **Error:** GOOGLE_API_KEY not found. Please add it to your .env file."
        return

    try:
        google.generativeai.configure(api_key=api_key)
        
        generation_config = {
            "temperature": temperature,
        }
        if max_tokens > 0:
            generation_config["max_output_tokens"] = max_tokens
        
        gemini = google.generativeai.GenerativeModel(
            model_name=model,
            system_instruction=system_prompt if system_prompt.strip() else "You are a helpful assistant",
            generation_config=generation_config
        )
        
        chat_history = []
        for user_msg, bot_msg in history:
            chat_history.append({"role": "user", "parts": [user_msg]})
            chat_history.append({"role": "model", "parts": [bot_msg]})
        
        chat = gemini.start_chat(history=chat_history)
        response = chat.send_message(message, stream=True)
        
        partial_message = ""
        for chunk in response:
            partial_message += chunk.text
            yield partial_message
                
    except Exception as e:
        yield f"‚ùå **Gemini Error:** {str(e)}"

def export_chat(history):
    """Export chat history to markdown format."""
    if not history:
        return "No chat history to export."
    
    markdown = "# Chat Export\n\n"
    for i, (user_msg, bot_msg) in enumerate(history, 1):
        markdown += f"## Turn {i}\n\n"
        markdown += f"**User:** {user_msg}\n\n"
        markdown += f"**Assistant:** {bot_msg}\n\n"
        markdown += "---\n\n"
    
    return markdown

# Build the enhanced interface
with gr.Blocks(css=custom_css, theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="purple")) as demo:
    
    # Header
    gr.Markdown("""
    # ü§ñ Universal AI Chatbot
    ### Connect to multiple AI providers with a unified interface
    """)
    
    # API Status
    with gr.Row():
        api_status = gr.Markdown(f"**API Status:** {check_api_status()}")
        refresh_btn = gr.Button("üîÑ Refresh Status", size="sm", scale=0)
    
    refresh_btn.click(fn=lambda: f"**API Status:** {check_api_status()}", outputs=api_status)
    
    with gr.Row():
        # Left sidebar for settings
        with gr.Column(scale=1, min_width=300):
            gr.Markdown("### ‚öôÔ∏è Settings")
            
            backend_dropdown = gr.Dropdown(
                choices=["Ollama", "OpenRouter", "GitHub Models", "Groq", "Gemini"],
                value="Ollama",
                label="üîå Backend Provider",
                info="Select your AI provider"
            )
            
            model_dropdown = gr.Dropdown(
                choices=OLLAMA_MODELS,
                value=OLLAMA_MODELS[0],
                label="üß† Model",
                info="Select the model to use",
                allow_custom_value=True
            )
            
            with gr.Accordion("üéõÔ∏è Advanced Settings", open=False):
                system_prompt = gr.Textbox(
                    value="You are a helpful, friendly, and knowledgeable AI assistant.",
                    label="System Prompt",
                    lines=3,
                    placeholder="Enter a system prompt to set the AI's behavior..."
                )
                
                temperature = gr.Slider(
                    minimum=0.0,
                    maximum=2.0,
                    value=0.7,
                    step=0.1,
                    label="üå°Ô∏è Temperature",
                    info="Higher = more creative, Lower = more focused"
                )
                
                max_tokens = gr.Slider(
                    minimum=0,
                    maximum=4096,
                    value=0,
                    step=128,
                    label="üìè Max Tokens",
                    info="0 = no limit"
                )
            
            # Preset system prompts
            with gr.Accordion("üìù Preset Prompts", open=False):
                preset_prompts = gr.Radio(
                    choices=[
                        "Default Assistant",
                        "Code Expert",
                        "Creative Writer",
                        "Data Analyst",
                        "Teacher"
                    ],
                    label="Quick Presets",
                    value="Default Assistant"
                )
            
            # Export functionality
            with gr.Accordion("üíæ Export Chat", open=False):
                export_btn = gr.Button("üì• Export to Markdown", variant="secondary")
                export_output = gr.Textbox(label="Exported Chat", lines=5, visible=False)
        
        # Main chat area
        with gr.Column(scale=3):
            chatbot_display = gr.Chatbot(
                label="Chat",
                height=500,
                show_copy_button=True,
                avatar_images=(None, "https://api.dicebear.com/7.x/bottts/svg?seed=ai"),
                type="messages"
            )
            
            with gr.Row():
                msg_input = gr.Textbox(
                    placeholder="Type your message here... (Press Enter to send)",
                    label="Message",
                    scale=4,
                    show_label=False
                )
                send_btn = gr.Button("üì§ Send", variant="primary", scale=1)
            
            with gr.Row():
                clear_btn = gr.Button("üóëÔ∏è Clear Chat", variant="secondary")
                stop_btn = gr.Button("‚èπÔ∏è Stop", variant="stop")
    
    # Footer
    gr.Markdown("""
    ---
    <center>
    
    üí° **Tips:** Use the sidebar to switch providers, adjust temperature for creativity, and set custom system prompts.
    
    </center>
    """)
    
    # Event handlers
    backend_dropdown.change(
        fn=get_model_choices,
        inputs=backend_dropdown,
        outputs=model_dropdown
    )
    
    # Preset prompt handler
    def set_preset_prompt(preset):
        presets = {
            "Default Assistant": "You are a helpful, friendly, and knowledgeable AI assistant.",
            "Code Expert": "You are an expert programmer. Provide clean, efficient, well-documented code with clear explanations. Always consider best practices and edge cases.",
            "Creative Writer": "You are a creative writer with a flair for engaging storytelling. Use vivid language, creative metaphors, and compelling narratives.",
            "Data Analyst": "You are a data analysis expert. Provide insights, explain statistical concepts clearly, and suggest appropriate visualization and analysis methods.",
            "Teacher": "You are a patient and encouraging teacher. Explain concepts step by step, use examples, and check for understanding."
        }
        return presets.get(preset, presets["Default Assistant"])
    
    preset_prompts.change(
        fn=set_preset_prompt,
        inputs=preset_prompts,
        outputs=system_prompt
    )
    
    # Chat function
    def respond(message, history, backend, model, system_prompt, temperature, max_tokens):
        if not message.strip():
            return "", history
        
        # Convert messages format history to tuples for the chat functions
        tuple_history = []
        for msg in history:
            if isinstance(msg, dict):
                if msg.get("role") == "user":
                    tuple_history.append([msg.get("content", ""), ""])
                elif msg.get("role") == "assistant" and tuple_history:
                    tuple_history[-1][1] = msg.get("content", "")
        
        # Add user message
        history = history + [{"role": "user", "content": message}]
        
        for response in enhanced_chat_router(message, tuple_history, backend, model, system_prompt, temperature, max_tokens):
            # Add/update assistant response
            if history and history[-1].get("role") == "assistant":
                history[-1]["content"] = response
            else:
                history = history + [{"role": "assistant", "content": response}]
            yield "", history
    
    # Submit handlers
    submit_event = msg_input.submit(
        fn=respond,
        inputs=[msg_input, chatbot_display, backend_dropdown, model_dropdown, system_prompt, temperature, max_tokens],
        outputs=[msg_input, chatbot_display]
    )
    
    click_event = send_btn.click(
        fn=respond,
        inputs=[msg_input, chatbot_display, backend_dropdown, model_dropdown, system_prompt, temperature, max_tokens],
        outputs=[msg_input, chatbot_display]
    )
    
    # Stop button
    stop_btn.click(fn=None, cancels=[submit_event, click_event])
    
    # Clear chat
    clear_btn.click(fn=lambda: ([], ""), outputs=[chatbot_display, msg_input])
    
    # Export chat
    def do_export(history):
        # Convert messages format to text
        if not history:
            return gr.update(visible=True, value="No chat history to export.")
        
        markdown = "# Chat Export\n\n"
        turn = 0
        for msg in history:
            if isinstance(msg, dict):
                role = msg.get("role", "unknown")
                content = msg.get("content", "")
                if role == "user":
                    turn += 1
                    markdown += f"## Turn {turn}\n\n"
                    markdown += f"**User:** {content}\n\n"
                elif role == "assistant":
                    markdown += f"**Assistant:** {content}\n\n---\n\n"
        
        return gr.update(visible=True, value=markdown)
    
    export_btn.click(fn=do_export, inputs=chatbot_display, outputs=export_output)

if __name__ == "__main__":
    demo.launch(share=False)

* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.
