In [4]:
pip install gtts

Collecting gtts
  Downloading gTTS-2.5.4-py3-none-any.whl.metadata (4.1 kB)
Collecting click<8.2,>=7.1 (from gtts)
  Downloading click-8.1.8-py3-none-any.whl.metadata (2.3 kB)
Downloading gTTS-2.5.4-py3-none-any.whl (29 kB)
Downloading click-8.1.8-py3-none-any.whl (98 kB)
Installing collected packages: click, gtts

  Attempting uninstall: click

    Found existing installation: click 8.2.1

    Uninstalling click-8.2.1:

      Successfully uninstalled click-8.2.1

   ---------------------------------------- 0/2 [click]
   -------------------- ------------------- 1/2 [gtts]
   -------------------- ------------------- 1/2 [gtts]
   -------------------- ------------------- 1/2 [gtts]
   ---------------------------------------- 2/2 [gtts]

Successfully installed click-8.1.8 gtts-2.5.4
Note: you may need to restart the kernel to use updated packages.


In [11]:
import gradio as gr
import whisper
from gtts import gTTS
from dotenv import load_dotenv
import requests
import tempfile
import base64
import os

In [12]:
# Load Whisper model 
whisper_model = whisper.load_model("base")


In [13]:
#Load environment variables from .env
load_dotenv()
API_KEY = os.getenv("OPENROUTER_API_KEY")

In [14]:
# System prompt to act like a teacher
def initialize_messages():
    return [{
        "role": "system",
        "content": (
            "You are an AI teacher built for classrooms. Your job is to explain concepts clearly, "
            "like a kind, knowledgeable, and engaging tutor. Use text, diagrams (describe them in words), "
            "and analogies to help students understand deeply."
        )
    }]


In [15]:
messages_prmt = initialize_messages()

In [16]:
# Convert text to audio (base64 mp3)
def text_to_base64_audio(text):
    tts = gTTS(text)
    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
        tts.save(temp_audio.name)
        audio_data = open(temp_audio.name, "rb").read()
    os.remove(temp_audio.name)
    return base64.b64encode(audio_data).decode("utf-8")


In [17]:
# Main function
def voice_teacher(audio_file):
    global messages_prmt

    # Step 1: Transcribe using Whisper
    transcription = whisper_model.transcribe(audio_file)["text"]
    messages_prmt.append({"role": "user", "content": transcription})

    # Step 2: Get AI reply from OpenRouter
    headers = {
        "Authorization": f"Bearer {API_KEY}",
        "Content-Type": "application/json"
    }
    data = {
        "model": "deepseek/deepseek-r1-0528:free",
        "messages": messages_prmt
    }
    response = requests.post("https://openrouter.ai/api/v1/chat/completions", headers=headers, json=data)
    result = response.json()

    if "choices" not in result:
        return "<p>Sorry, something went wrong with OpenRouter!</p>"

    bot_reply = result["choices"][0]["message"]["content"]
    messages_prmt.append({"role": "assistant", "content": bot_reply})

    # Step 3: Convert bot reply to base64 mp3
    b64_audio = text_to_base64_audio(bot_reply)

    # Step 4: HTML output with auto-playing audio
    audio_html = f"""
    <p><b>You asked:</b> {transcription}</p>
    <p><b>AI says:</b> {bot_reply}</p>
    <audio autoplay controls style="width: 100%;">
        <source src="data:audio/mp3;base64,{b64_audio}" type="audio/mp3">
        Your browser does not support the audio element.
    </audio>
    """

    return audio_html

In [18]:
# Launch Gradio app
iface = gr.Interface(
    fn=voice_teacher,
    inputs=gr.Audio(type="filepath", label="Speak your question"),
    outputs=gr.HTML(),
    title="Voice-Enabled AI Teacher",
    description="Speak a question. It replies with voice automatically!",
)

iface.launch(share=True)

* Running on local URL:  http://127.0.0.1:7871

Could not create share link. Please check your internet connection or our status page: https://status.gradio.app.




