# Additional End of week Exercise - week 2

Now use everything you've learned from Week 2 to build a full prototype for the technical question/answerer you built in Week 1 Exercise.

This should include a Gradio UI, streaming, use of the system prompt to add expertise, and the ability to switch between models. Bonus points if you can demonstrate use of a tool!

If you feel bold, see if you can add audio input so you can talk to it, and have it respond with audio. ChatGPT or Claude can help you, or email me if you have questions.

I will publish a full solution here soon - unless someone beats me to it...

There are so many commercial applications for this, from a language tutor, to a company onboarding solution, to a companion AI to a course (like this one!) I can't wait to see your results.

In [None]:
#imports
import os
from dotenv import load_dotenv
from openai import OpenAI
import gradio as gr
import transformers
from transformers import pipeline
import torch
from accelerate import Accelerator
from huggingface_hub import login
from groq import Groq
import tempfile


In [None]:
# Load env variables
load_dotenv(override=True)
openrouter_api_key = os.getenv("OPENROUTER_API_KEY")
hf_token = os.getenv("HF_TOKEN")
groq_api_key = os.getenv("GROQ_API_KEY")

if openrouter_api_key:
    print("OPENROUTER_API_KEY is set.")
else:
    print("OPENROUTER_API_KEY is not set.")

if hf_token:
    print("HuggingFace token found.")
else:
    print("No HuggingFace token found.")

if groq_api_key:
    print("GROQ_API_KEY is set.")
else:
    print("GROQ_API_KEY is not set.")

In [None]:
# Constants
MODEL_GPT = 'openai/gpt-4o-mini'
MODEL_GEMINI = 'google/gemini-2.5-flash-lite'
openrouter_url = "https://openrouter.ai/api/v1"

# List of models
models = [MODEL_GPT, MODEL_GEMINI]

In [None]:
# Connect to OpenRouter and Groq
client_llm = OpenAI(base_url=openrouter_url, api_key=openrouter_api_key)
client_groq = Groq(api_key=groq_api_key)


In [None]:
system_prompt = """ 
You are a technical assistant.
Your task is to take a technical question and produce a clear, accurate, and well-structured explanation.
Guidelines:
- Prioritize clarity over complexity.
- If the question lacks necessary details, state assumptions clearly.
- Avoid fluff, marketing language, or unnecessary verbosity.
- Respond in one concise paragraph, using simple language and examples when helpful
"""

In [None]:
def get_user_prompt(question):
    user_prompt = f"""
    You are a technical assistant. 
    Please answer the following question in a clear, concise, and structured manner, following the guidelines provided.
    Question: {question}
    """
    return user_prompt

In [None]:
# Use accelerator to manage device placement
device = Accelerator().device

In [None]:
# Load the Whisper model for speech recognition
transcriber = pipeline(
    "automatic-speech-recognition", 
    model="openai/whisper-base.en", 
    device=device
    )

In [None]:
# Function to convert text to audio using Groq Orpheus API
def text_to_audio(text, 
                  model="canopylabs/orpheus-v1-english", 
                  voice="troy", 
                  response_format="wav", 
                  ):
    """
    Converts input text to speech audio using Groq Orpheus API.
    Returns the path to the generated audio file.
    """
    response = client_groq.audio.speech.create(
        model=model,
        voice=voice,
        input=text,
        response_format=response_format
    )

    output_path = os.path.join(tempfile.gettempdir(), f"output.{response_format}")
    response.write_to_file(output_path)
    return output_path

In [None]:
# Main function to process audio input, get LLM response, and convert it to audio
def process_input(audio_file,history,model):
    # process audio and return llm response
    history = [{"role":h["role"], "content":h["content"]} for h in history]

    transcription = transcriber(audio_file)["text"]
    prompt = get_user_prompt(transcription)
    messages = [{"role": "system", "content": system_prompt}] + history + [{"role": "user", "content": prompt}]
    stream = client_llm.chat.completions.create(
        model=model,
        messages=messages,
        stream=True
    )
    
    # Append the new exchange to history before returning
    history.append({"role": "user", "content": transcription})
    history.append({"role": "assistant", "content": ""})

    full_response = ""
    for chunk in stream:
        delta = chunk.choices[0].delta.content or ""
        full_response += delta
        history[-1]["content"] = full_response  
        yield history, None  

    print("LLM Response: ", full_response)
    try:
        output_audio = text_to_audio(full_response)
    except Exception as e:
        print("Error in text-to-audio conversion: ", e)
        output_audio = None

    return history, output_audio

In [None]:
# Build the Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# Technical Q&A Assistant with Audio")
    with gr.Row():
        chatbot = gr.Chatbot(height=400) 
    with gr.Row():
        model = gr.Dropdown(
            choices=models,
            value=models[0],
            label="Select Model",
            interactive=True
            )
        
    with gr.Row():
        with gr.Column(scale=6):
            audio_input = gr.Audio(sources="microphone", type="filepath",label="Record your question")
            submit_btn = gr.Button("Submit Audio")
        with gr.Column(scale=6):
            audio_output = gr.Audio(label="Audio Response", streaming=True, autoplay=True)
    submit_btn.click(fn=process_input, inputs=[audio_input,chatbot,model], outputs=[chatbot,audio_output])        

demo.launch(inbrowser=True)