In [1]:
import os
from typing import Dict, List, Generator, Optional
from dataclasses import dataclass

from dotenv import load_dotenv
import gradio as gr

# Tool: DuckDuckGo Search
try:
    from duckduckgo_search import DDGS
    _HAS_DDGS = True
except Exception:
    _HAS_DDGS = False

load_dotenv()

True

In [2]:
# -------------------------
# Message helpers (OpenAI-style)
# -------------------------
def gradio_messages_to_oai(messages: List[Dict], system_prompt: Optional[str]) -> List[Dict]:
    """Convert Gradio type='messages' into OpenAI-style list and inject optional system prompt."""
    out = []
    if system_prompt:
        out.append({"role": "system", "content": system_prompt})
    for m in messages:
        role = m["role"]
        content = m.get("content", "")
        if not content:
            continue
        # Gradio uses 'assistant'/'user' roles; keep as-is
        out.append({"role": role, "content": content})
    return out

def add_tool_context(messages: List[Dict], tool_block: str) -> List[Dict]:
    """Append tool context as a system message."""
    if not tool_block:
        return messages
    return messages + [{"role": "system", "content": tool_block}]

In [3]:
# -------------------------
# Web Search Tool
# -------------------------
def run_web_search(query: str, k: int = 5) -> str:
    if not _HAS_DDGS:
        return "Web search unavailable: duckduckgo_search not installed."
    if not query or not query.strip():
        return ""
    try:
        results = []
        with DDGS() as ddgs:
            for r in ddgs.text(query, max_results=k):
                # r: {'title':..., 'href':..., 'body':...}
                results.append(r)
        if not results:
            return "No results."
        # Format compactly for model context
        lines = []
        for i, r in enumerate(results, 1):
            title = r.get("title", "").strip()
            href = r.get("href", "").strip()
            body = r.get("body", "").strip()
            lines.append(f"{i}. {title}\nURL: {href}\nSnippet: {body}")
        return "WEB_SEARCH_RESULTS\n" + "\n\n".join(lines)
    except Exception as e:
        return f"Web search error: {e}"

In [35]:
# -------------------------
# Providers
# -------------------------
@dataclass
class ChatConfig:
    provider: str
    model: str
    temperature: float = 0.2

class BaseChat:
    def stream_chat(self, messages: List[Dict], cfg: ChatConfig) -> Generator[str, None, None]:
        raise NotImplementedError

class OpenAIChat(BaseChat):
    def stream_chat(self, messages: List[Dict], cfg: ChatConfig):
        from openai import OpenAI
        api_key = os.getenv("OPENAI_API_KEY")
        if not api_key:
            yield "[OpenAI] Missing OPENAI_API_KEY."
            return
        client = OpenAI(api_key=api_key)
        try:
            stream = client.chat.completions.create(
                model=cfg.model,
                messages=messages,
                temperature=cfg.temperature,
                stream=True,
            )
            for chunk in stream:
                delta = chunk.choices[0].delta
                if delta and delta.content:
                    yield delta.content
        except Exception as e:
            yield f"[OpenAI] Error: {e}"

class AnthropicChat(BaseChat):
    def stream_chat(self, messages: List[Dict], cfg: ChatConfig):
        import anthropic
        api_key = os.getenv("ANTHROPIC_API_KEY")
        if not api_key:
            yield "[Anthropic] Missing ANTHROPIC_API_KEY."
            return
        client = anthropic.Anthropic(api_key=api_key)
        # Convert OpenAI-style messages to Anthropic format
        sys_prompt = ""
        user_turns = []
        for m in messages:
            if m["role"] == "system":
                sys_prompt += (m["content"] + "\n")
            elif m["role"] == "user":
                user_turns.append({"role": "user", "content": m["content"]})
            elif m["role"] == "assistant":
                user_turns.append({"role": "assistant", "content": m["content"]})
        try:
            with client.messages.stream(
                model=cfg.model,
                max_tokens=2048,
                temperature=cfg.temperature,
                system=sys_prompt if sys_prompt else None,
                messages=user_turns,
            ) as stream:
                for text in stream.text_stream:
                    if text:
                        yield text
        except Exception as e:
            yield f"[Anthropic] Error: {e}"

class GoogleChat(BaseChat):
    def stream_chat(self, messages: List[Dict], cfg: ChatConfig):
        import google.generativeai as genai
        api_key = os.getenv("GOOGLE_API_KEY")
        if not api_key:
            yield "[Google] Missing GOOGLE_API_KEY."
            return
        genai.configure(api_key=api_key)
        # Split out system instruction and history
        system_instruction = "\n".join([m["content"] for m in messages if m["role"] == "system"])
        dialog = []
        for m in messages:
            if m["role"] == "user":
                dialog.append({"role": "user", "parts": [m["content"]]})
            elif m["role"] == "assistant":
                dialog.append({"role": "model", "parts": [m["content"]]})
        try:
            model = genai.GenerativeModel(model_name=cfg.model, system_instruction=system_instruction or None)
            resp = model.generate_content(dialog if dialog else [{"role":"user","parts":["Hello"]}],
                                          generation_config={"temperature": cfg.temperature},
                                          stream=True)
            for r in resp:
                if hasattr(r, "text") and r.text:
                    yield r.text
        except Exception as e:
            yield f"[Google] Error: {e}"

class OllamaChat(BaseChat):
    def stream_chat(self, messages: List[Dict], cfg: ChatConfig):
        try:
            import ollama
        except ImportError:
            yield "[Ollama] Error: ollama package not installed. Run: pip install ollama"
            return
            
        try:
            # First try non-streaming to debug
            response = ollama.chat(
                model=cfg.model,
                messages=messages,
                stream=False,
                options={'temperature': cfg.temperature}
            )
            
            # Get the full response content
            if response and 'message' in response:
                content = response['message'].get('content', '')
                if content:
                    # Yield the full content at once (simpler than streaming)
                    yield content
                else:
                    yield f"[Ollama] Empty response from model '{cfg.model}'"
            else:
                yield f"[Ollama] Invalid response format from model '{cfg.model}'"
                
        except Exception as e:
            yield f"[Ollama] Error: {e}"


PROVIDER_IMPLS = {
    "openai": OpenAIChat(),
    "anthropic": AnthropicChat(),
    "google": GoogleChat(),
    "ollama": OllamaChat(),
}

In [36]:
# -------------------------
# Defaults
# -------------------------
DEFAULT_SYSTEM = (
    "You are a senior NLP/ML engineer. Answer technical questions concisely. "
    "Show minimal code when useful. Cite sources only if provided in context. "
    "When users provide error messages, diagnose and propose precise fixes."
)

OPENAI_DEFAULT = "gpt-4o-mini"
ANTHROPIC_DEFAULT = "claude-sonnet-4-0"
GOOGLE_DEFAULT = "gemini-2.0-flash"
OLLAMA_DEFAULT = "llama3.2"  # or another local model you have pulled


In [37]:
# -------------------------
# Chat function for Gradio
# -------------------------
def chat_fn(user_message, history, provider, model, temperature, use_web_tool, web_k, system_prompt):
    """Gradio callback. Must yield text chunks for streaming."""
    # 1) Build base messages from history + current user message
    if history is None:
        history = []
    # history is already [{"role":..., "content":...}] because we set type='messages'
    base = gradio_messages_to_oai(history, system_prompt or DEFAULT_SYSTEM)
    base.append({"role": "user", "content": user_message})

    # 2) Optional web tool
    tool_block = ""
    if use_web_tool:
        tool_block = run_web_search(user_message, k=web_k)
        if tool_block:
            base = add_tool_context(base, tool_block)

    # 3) Dispatch to provider
    provider_key = (provider or "").strip().lower()
    impl = PROVIDER_IMPLS.get(provider_key)
    if not impl:
        yield f"[Error] Unsupported provider: {provider}"
        return

    cfg = ChatConfig(provider=provider_key, model=model, temperature=temperature)

    # 4) If tool used, show a short header first (so users see the tool at work), then stream the model
    if tool_block:
        # Short preface, then a horizontal rule
        summary = "Using web search context (top results included in system context).\n---\n"
        yield summary

    # 5) Stream
    response = ""
    for chunk in impl.stream_chat(base, cfg):
        response += chunk or ""
        yield response

        if not response:
            yield "[No response]"

# -------------------------
# UI
# -------------------------
with gr.Blocks(title="Technical Q/A Prototype") as demo:
    gr.Markdown("### Technical Q/A Prototype — streaming, system prompt, model switch, simple web tool")

    with gr.Row():
        with gr.Column(scale=1, min_width=320):
            provider = gr.Dropdown(
                label="Provider",
                choices=["openai", "anthropic", "google", "ollama"],
                value="openai",
            )
            model = gr.Textbox(
                label="Model name",
                value=OPENAI_DEFAULT,
                placeholder="e.g., gpt-4o-mini / claude-3-5-sonnet-latest / gemini-1.5-pro / llama3.1",
            )
            temperature = gr.Slider(0.0, 1.0, value=0.2, step=0.05, label="Temperature")
            use_web_tool = gr.Checkbox(label="Use web search tool (DuckDuckGo)", value=False)
            web_k = gr.Slider(1, 10, value=5, step=1, label="Web results (k)")
            system_prompt = gr.Textbox(
                label="System prompt",
                value=DEFAULT_SYSTEM,
                lines=4,
            )
            gr.Markdown(
                "API keys via `.env`: `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `GOOGLE_API_KEY`. "
                "Ollama requires a local server."
            )

        with gr.Column(scale=2):
            chat = gr.ChatInterface(
                fn=chat_fn,
                type="messages",  # important for future gradio compatibility
                additional_inputs=[provider, model, temperature, use_web_tool, web_k, system_prompt],
                textbox=gr.Textbox(placeholder="Ask a technical question…", lines=3, autofocus=True, submit_btn=True),
                cache_examples=False,
                
            )

    # Sensible defaults on provider change
    def _set_default_model(p):
        p = (p or "").lower()
        if p == "openai":
            return OPENAI_DEFAULT
        if p == "anthropic":
            return ANTHROPIC_DEFAULT
        if p == "google":
            return GOOGLE_DEFAULT
        if p == "ollama":
            return OLLAMA_DEFAULT
        return ""
    provider.change(_set_default_model, inputs=provider, outputs=model)

demo.launch(inline=True)

* Running on local URL:  http://127.0.0.1:7870
* To create a public link, set `share=True` in `launch()`.




In [29]:
!ollama list

NAME                ID              SIZE      MODIFIED     
deepseek-r1:1.5b    e0979632db5a    1.1 GB    26 hours ago    
llama3.2:latest     a80c4f17acd5    2.0 GB    26 hours ago    
