# Week 2 Exercise — Technical Q&A Prototype (profe-ssor)

Full prototype of the **technical question/answerer** from Week 1, with:

- **Gradio UI** — chat with history
- **Streaming** — token-by-token responses
- **System prompt** — technical tutor expertise
- **Model switch** — GPT (OpenAI/OpenRouter) or Llama (Ollama)
- **Tool** — `look_up_definition(term)` for quick definitions

**Run all cells in order**, then use the app. Optional: add audio (Whisper + TTS) for voice.

In [None]:
import os
import json
from dotenv import load_dotenv
from openai import OpenAI
import gradio as gr
import ollama

In [None]:
load_dotenv(override=True)
openrouter_api_key = os.getenv("OPENROUTER_API_KEY")

if openrouter_api_key:
    openai_client = OpenAI(api_key=openrouter_api_key, base_url="https://openrouter.ai/api/v1")
    GPT_MODEL = "openai/gpt-4o-mini"
    print("GPT: OpenRouter")
else:
    openai_client = OpenAI()
    GPT_MODEL = "gpt-4o-mini"
    print("GPT: OpenAI")

OLLAMA_MODEL = "llama3.2"
print(\"Llama:\", OLLAMA_MODEL, \"(Ollama)\")

## System prompt

In [None]:
SYSTEM_PROMPT = """You are a helpful technical tutor. You answer questions about Python code, software engineering, data science, and LLMs.
Give clear, accurate explanations. If you don't know something, say so.
You may call look_up_definition(term) to get a quick definition before explaining.
"""

## Tool: look_up_definition

In [None]:
def look_up_definition(term: str) -> str:
    glossary = {
        "lru_cache": "functools.lru_cache: decorator that caches recent calls (LRU = Least Recently Used).",
        "transformer": "Neural network architecture based on self-attention (GPT, BERT).",
        "token": "Unit of text (word or subword) that an LLM processes.",
        "streaming": "Sending model output incrementally as it is generated.",
        "api": "Application Programming Interface: a way for programs to talk to each other.",
    }
    return glossary.get(term.strip().lower(), f"No definition for '{term}'.")

tools = [{
    "type": "function",
    "function": {
        "name": "look_up_definition",
        "description": "Get a brief technical definition (e.g. lru_cache, transformer, token).",
        "parameters": {
            "type": "object",
            "properties": {"term": {"type": "string"}},
            "required": ["term"],
            "additionalProperties": False,
        },
    },
}]

In [None]:
def handle_tool_calls(message):
    out = []
    for tc in message.tool_calls:
        if tc.function.name == "look_up_definition":
            args = json.loads(tc.function.arguments)
            out.append({"role": "tool", "content": look_up_definition(args.get("term", "")), "tool_call_id": tc.id})
    return out

## Chat (streaming + model switch)

In [None]:
def chat_stream(message, history, model_choice):
    messages = [{"role": "system", "content": SYSTEM_PROMPT}]
    for h in history:
        messages.append({"role": h["role"], "content": h["content"] or ""})
    messages.append({"role": "user", "content": message})

    if model_choice == "Llama (Ollama)":
        reply = ""
        try:
            for chunk in ollama.chat(model=OLLAMA_MODEL, messages=messages, stream=True):
                part = (chunk.get("message") or {}).get("content") or ""
                reply += part
                yield reply
        except Exception as e:
            yield f"Ollama error: {e}. Is Ollama running? Model {OLLAMA_MODEL} pulled?"
        return

    # GPT + tools
    r = openai_client.chat.completions.create(model=GPT_MODEL, messages=messages, tools=tools, tool_choice="auto")
    while r.choices[0].finish_reason == "tool_calls":
        msg = r.choices[0].message
        messages.append(msg)
        messages.extend(handle_tool_calls(msg))
        r = openai_client.chat.completions.create(model=GPT_MODEL, messages=messages, tools=tools, tool_choice="auto")

    content = (r.choices[0].message.content or "").strip()
    if content:
        yield content
        return
    stream = openai_client.chat.completions.create(model=GPT_MODEL, messages=messages, stream=True)
    result = ""
    for chunk in stream:
        result += (chunk.choices[0].delta.content or "")
        yield result
    if not result:
        yield "(No text reply.)"

## Gradio UI

In [None]:
def chat(message, history, model_choice):
    """Gradio ChatInterface: yield streamed assistant reply."""
    if not message or not message.strip():
        return
    for partial in chat_stream(message, history, model_choice):
        yield partial

with gr.Blocks(title="Technical Q&A", theme=gr.themes.Soft(), css=".main { max-width: 700px; margin: auto; }") as demo:
    gr.Markdown("### Technical tutor — Python, ML, LLMs. Ask e.g. *What is lru_cache?*")
    model_dropdown = gr.Dropdown(
        ["GPT (OpenAI/OpenRouter)", "Llama (Ollama)"],
        value="GPT (OpenAI/OpenRouter)",
        label="Model",
    )
    gr.ChatInterface(
        fn=chat,
        type="messages",
        additional_inputs=[model_dropdown],
        chatbot=gr.Chatbot(type="messages", height=400),
        textbox=gr.Textbox(placeholder="Ask a technical question...", lines=2, label="Message"),
        submit_btn="Send",
    )
demo.launch()