In [1]:
# Install required packages
!pip install SpeechRecognition pydub numpy

Collecting SpeechRecognition
  Downloading speechrecognition-3.14.3-py3-none-any.whl.metadata (30 kB)
Downloading speechrecognition-3.14.3-py3-none-any.whl (32.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m32.9/32.9 MB[0m [31m19.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: SpeechRecognition
Successfully installed SpeechRecognition-3.14.3


In [2]:
from IPython.display import Javascript, display
from google.colab import output
import base64
import numpy as np
import wave

def record_audio_browser(seconds=3):
    print("🎤 Please allow microphone access when prompted.")

    js_code = """
    async function recordAudio() {
      const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
      const ctx = new AudioContext();
      const src = ctx.createMediaStreamSource(stream);
      const dest = ctx.createMediaStreamDestination();
      src.connect(dest);

      const streamLeft = new MediaStream([dest.stream.getAudioTracks()[0]]);
      const mediaRecorder = new MediaRecorder(streamLeft);

      const chunks = [];
      mediaRecorder.ondataavailable = e => chunks.push(e.data);
      mediaRecorder.start();

      await new Promise(resolve => setTimeout(resolve, %d));
      mediaRecorder.stop();

      return new Promise(resolve => {
        mediaRecorder.onstop = () => {
          const blob = new Blob(chunks, { type: 'audio/webm' });
          const reader = new FileReader();
          reader.onload = () => resolve(reader.result);
          reader.readAsDataURL(blob);
        };
      });
    }
    """ % (seconds * 1000)

    display(Javascript(js_code))
    print("🎙️ Recording will start... Speak now!")

    try:
        webm_data_b64 = output.eval_js("recordAudio()")
        header, encoded = webm_data_b64.split(",", 1)
        webm_bytes = base64.b64decode(encoded)

        # Save WebM file
        with open("temp_recording.webm", "wb") as f:
            f.write(webm_bytes)

        print(" Raw audio saved as WebM.")
        return "temp_recording.webm"
    except Exception as e:
        print(" Error during recording:", str(e))
        return None

In [3]:
!apt-get -qq install ffmpeg
!pip install pydub
from pydub import AudioSegment

def convert_to_wav(input_path="temp_recording.webm", output_path="output.wav"):
    try:
        audio = AudioSegment.from_file(input_path, format="webm")
        audio = audio.set_frame_rate(16000).set_channels(1)  # Mono, 16kHz
        audio.export(output_path, format="wav")
        print(" Converted to valid WAV.")
        return output_path
    except Exception as e:
        print(" Conversion failed:", str(e))
        return None



In [4]:
import speech_recognition as sr

def recognize_speech():
    webm_path = record_audio_browser(seconds=3)
    if webm_path is None:
        return None

    wav_path = convert_to_wav(webm_path)
    if wav_path is None:
        return None

    r = sr.Recognizer()
    with sr.AudioFile(wav_path) as source:
        audio = r.record(source)

    try:
        text = r.recognize_google(audio)
        print(" Recognized:", text)
        return text
    except sr.UnknownValueError:
        print(" Could not understand audio.")
        return None
    except sr.RequestError:
        print(" Google API unavailable.")
        return None

In [12]:

def evaluate_expression(text):
    if not text:
        return None

    replacements = {
        "plus": "+",
        "minus": "-",
        "times": "*",
        "multiplied by": "*",
        "divided by": "/",
        "into": "*",
        "what is": "",
        "?": ""
    }

    expression = text.lower()
    for word, symbol in replacements.items():
        expression = expression.replace(word, symbol)

    import re
    tokens = re.findall(r'[\+\-\*/]|\d+', expression)
    if not tokens:
        return None

    expr_str = ''.join(tokens)
    try:
        result = eval(expr_str)
        return result
    except:
        return None

In [13]:
print(" Click the play button and allow microphone access when prompted.")
print(" Say something like: 'What is five plus three?'")

spoken_text = recognize_speech()

if spoken_text:
    result = evaluate_expression(spoken_text)
    if result is not None:
        print(f"\n Result of '{spoken_text}': {result}")
    else:
        print(" Could not parse or calculate the result.")
else:
    print(" No valid input detected.")

 Click the play button and allow microphone access when prompted.
 Say something like: 'What is five plus three?'
🎤 Please allow microphone access when prompted.


<IPython.core.display.Javascript object>

🎙️ Recording will start... Speak now!
 Raw audio saved as WebM.
 Converted to valid WAV.
 Recognized: what is 2 * 5

 Result of 'what is 2 * 5': 10
