Technical Question Explainer with Voice functionality

A tool that takes a technical question (or code snippet) and returns a structured explanation.  
Supports **OpenAI** and **Ollama** (local models) ‚Äî switchable from the Gradio UI.

---
### Setup
```bash
uv add openai gradio
```
For **Ollama**, make sure it's running locally and you've pulled a model:
```bash
ollama pull llama3.2

In [2]:
import os
import io
import tempfile
import gradio as gr
from openai import OpenAI

In [3]:
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
OPENAI_MODEL   = "gpt-4o-mini"


In [4]:
OLLAMA_BASE_URL = "http://localhost:11434/v1"
OLLAMA_MODEL    = "llama3.2" 

In [5]:
WHISPER_MODEL = "whisper-1"
TTS_VOICE     = "alloy"
TTS_MODEL     = "tts-1"

In [6]:
SYSTEM_PROMPT = """You are an expert technical educator.
When given a technical question or code snippet, provide:
1. A clear, concise explanation (2-3 sentences)
2. A simple analogy to make it relatable
3. A line-by-line or concept breakdown (especially for code)
4. Key takeaways as bullet points

Keep explanations accessible but accurate. Format your response in clean markdown."""

In [10]:
question = """
Please explain what this code does and why:
yield from {book.get("author") for book in books if book.get("author")}
"""

In [11]:
def get_openai_client() -> OpenAI:
    """Return an OpenAI client (always needed for Whisper & TTS)."""
    if not OPENAI_API_KEY :
        raise ValueError("Please set OPENAI_API_KEY in the Configuration cell.")
    return OpenAI(api_key=OPENAI_API_KEY)



def get_client(provider: str) -> tuple[OpenAI, str]:
    """Return (OpenAI client, model name) for the chosen provider."""
    if provider == "Ollama (Local)":
        client = OpenAI(
            base_url=OLLAMA_BASE_URL,
            api_key="ollama",   
        )
        return client, OLLAMA_MODEL
    else:
        return get_openai_client(), OPENAI_MODEL

In [12]:
def explain_streaming(question: str, provider: str):
    """Gradio streaming generator ‚Äî yields progressively longer markdown strings."""
    if not question.strip():
        yield "-*- Please enter a question or code snippet."
        return

    try:
        client, model = get_client(provider)
    except ValueError as e:
        yield f" **Error:** {e}"
        return

    messages = [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user",   "content": question},
    ]

    accumulated = f"*Using **{model}** via {provider}‚Ä¶*\n\n---\n\n"
    try:
        with client.chat.completions.create(
            model=model,
            messages=messages,
            stream=True,
        ) as stream:
            for chunk in stream:
                delta = chunk.choices[0].delta.content or ""
                accumulated += delta
                yield accumulated
    except Exception as e:
        yield f"-*- **API Error:** {e}"

In [13]:
def transcribe_audio(audio_path: str) -> str:
    """Transcribe a recorded audio file to text using OpenAI Whisper."""
    if audio_path is None:
        return ""
    try:
        client = get_openai_client()
        with open(audio_path, "rb") as f:
            transcript = client.audio.transcriptions.create(
                model=WHISPER_MODEL,
                file=f,
            )
        return transcript.text
    except Exception as e:
        return f"[Transcription error: {e}]"

In [14]:
def text_to_speech(text: str) -> str | None:
    """Convert text to speech and return a path to the audio file."""
    if not text or text.startswith("-*-"):
        return None

    import re
    clean = re.sub(r"[#*`_>\-]+", " ", text)   # remove markdown symbols
    clean = re.sub(r"\s+", " ", clean).strip()
    # Limit length to avoid very long TTS calls
    clean = clean[:3000]

    try:
        client = get_openai_client()
        response = client.audio.speech.create(
            model=TTS_MODEL,
            voice=TTS_VOICE,
            input=clean,
        )
        # Save to a temp file and return the path for Gradio
        tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
        tmp.write(response.content)
        tmp.close()
        return tmp.name
    except Exception as e:
        print(f"TTS error: {e}")
        return None


In [15]:
def handle_audio_input(audio_path: str, provider: str):
    """Transcribe audio, run explanation, return (question_text, markdown, audio_path)."""
    question = transcribe_audio(audio_path)
    if not question or question.startswith("["):
        yield question, "-*- Could not transcribe audio.", None
        return

    full_markdown = ""
    for chunk in explain_streaming(question, provider):
        full_markdown = chunk
        yield question, full_markdown, None   # stream text; audio comes at the end

    audio_out = text_to_speech(full_markdown)
    yield question, full_markdown, audio_out

In [16]:
EXAMPLE_QUESTION = question.strip()

with gr.Blocks(
    title="Tech Explainer",
    theme=gr.themes.Base(
        primary_hue="teal",
        neutral_hue="zinc",
        font=[gr.themes.GoogleFont("IBM Plex Mono"), "monospace"],
    ),
    css="""
        #header { text-align:center; padding:1rem 0 0.25rem; }
        #sub    { text-align:center; color:#71717a; margin-bottom:1.5rem; }
        footer  { display:none !important; }
    """,
) as demo:

    gr.Markdown("# üî¨ Technical Question Explainer", elem_id="header")
    gr.Markdown(
        "Type or speak your question ‚Äî get a structured explanation with audio playback.",
        elem_id="sub",
    )

    with gr.Row():
        provider_radio = gr.Radio(
            choices=["OpenAI (GPT-4o-mini)", "Ollama (Local)"],
            value="OpenAI (GPT-4o-mini)",
            label="LLM Provider",
        )

    with gr.Tabs():

        with gr.TabItem("‚å®Ô∏è  Text"):
            question_box = gr.Textbox(
                label="Question or Code Snippet",
                placeholder="Paste code or ask a technical question‚Ä¶",
                lines=6,
                value=EXAMPLE_QUESTION,
            )
            with gr.Row():
                submit_btn = gr.Button("Explain", variant="primary", scale=4)
                clear_btn  = gr.Button("Clear",  variant="secondary", scale=1)

    
        with gr.TabItem("üéôÔ∏è  Voice"):
            gr.Markdown(
                "Record your question. It will be transcribed by **Whisper**, "
                "explained by the selected LLM, and read back to you via **TTS**.  \n"
                "*(Whisper & TTS always use OpenAI regardless of the LLM provider chosen above.)*"
            )
            audio_input = gr.Audio(
                sources=["microphone"],
                type="filepath",
                label="Record your question",
            )
            voice_btn = gr.Button("Transcribe & Explain", variant="primary")

    answer_box = gr.Markdown(
        label="Explanation",
        value="*Your explanation will appear here‚Ä¶*",
    )
    audio_output = gr.Audio(
        label="Listen to the Explanation",
        type="filepath",
        autoplay=True,
        visible=True,
    )

    gr.Examples(
        examples=[
            [EXAMPLE_QUESTION],
            ["What is the difference between a process and a thread?"],
            ["Explain Python's GIL and when it actually matters."],
            ["What is gradient descent and how does backpropagation use it?"],
            ["What does async/await do under the hood in Python?"],
            ["Explain the CAP theorem in distributed systems."],
        ],
        inputs=question_box,
        label="Example Questions ‚Äî click to load",
    )

    def explain_then_speak(question, provider):
        """Stream text explanation, then generate audio at the end."""
        full_markdown = ""
        for chunk in explain_streaming(question, provider):
            full_markdown = chunk
            yield full_markdown, None
        audio_path = text_to_speech(full_markdown)
        yield full_markdown, audio_path

    submit_btn.click(
        explain_then_speak,
        inputs=[question_box, provider_radio],
        outputs=[answer_box, audio_output],
    )
    question_box.submit(
        explain_then_speak,
        inputs=[question_box, provider_radio],
        outputs=[answer_box, audio_output],
    )
    clear_btn.click(
        lambda: (EXAMPLE_QUESTION, "*Your explanation will appear here‚Ä¶*", None),
        outputs=[question_box, answer_box, audio_output],
    )


    voice_btn.click(
        handle_audio_input,
        inputs=[audio_input, provider_radio],
        outputs=[question_box, answer_box, audio_output],
    )       



In [17]:
demo.launch(inbrowser=True)

* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.


