In [2]:
!pip install flask

Collecting flask
  Downloading flask-3.1.1-py3-none-any.whl.metadata (3.0 kB)
Collecting blinker>=1.9.0 (from flask)
  Using cached blinker-1.9.0-py3-none-any.whl.metadata (1.6 kB)
Collecting click>=8.1.3 (from flask)
  Downloading click-8.2.1-py3-none-any.whl.metadata (2.5 kB)
Collecting itsdangerous>=2.2.0 (from flask)
  Using cached itsdangerous-2.2.0-py3-none-any.whl.metadata (1.9 kB)
Collecting werkzeug>=3.1.0 (from flask)
  Using cached werkzeug-3.1.3-py3-none-any.whl.metadata (3.7 kB)
Downloading flask-3.1.1-py3-none-any.whl (103 kB)
Using cached blinker-1.9.0-py3-none-any.whl (8.5 kB)
Downloading click-8.2.1-py3-none-any.whl (102 kB)
Using cached itsdangerous-2.2.0-py3-none-any.whl (16 kB)
Using cached werkzeug-3.1.3-py3-none-any.whl (224 kB)
Installing collected packages: werkzeug, itsdangerous, click, blinker, flask

   ---------------------------------------- 0/5 [werkzeug]
   ---------------------------------------- 0/5 [werkzeug]
   ----------------------------------------

In [4]:
!pip install soundfile

Collecting soundfile
  Downloading soundfile-0.13.1-py2.py3-none-win_amd64.whl.metadata (16 kB)
Downloading soundfile-0.13.1-py2.py3-none-win_amd64.whl (1.0 MB)
   ---------------------------------------- 0.0/1.0 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.0 MB ? eta -:--:--
   ---------- ----------------------------- 0.3/1.0 MB ? eta -:--:--
   ---------------------------------------- 1.0/1.0 MB 2.8 MB/s eta 0:00:00
Installing collected packages: soundfile
Successfully installed soundfile-0.13.1


In [5]:
# Cell 1: Import libraries
import os
import threading
from flask import Flask, request, jsonify
from IPython.display import display, Javascript
import time
import soundfile as sf
import numpy as np
import tempfile

# Optional: Whisper local setup
import whisper
model = whisper.load_model("base")  # or "small", "medium", etc.


In [6]:
# Cell 2: Flask app setup
app = Flask(__name__)
AUDIO_BUFFER = []  # store chunks of raw bytes

@app.route("/upload", methods=["POST"])
def upload():
    global AUDIO_BUFFER
    audio_chunk = request.data
    AUDIO_BUFFER.append(audio_chunk)
    return jsonify({"status": "received", "length": len(audio_chunk)})


In [7]:
# Cell 3: Run Flask server in background
def run_flask():
    app.run(port=5001)

thread = threading.Thread(target=run_flask)
thread.daemon = True
thread.start()
print("Flask server started on http://localhost:5001")


Flask server started on http://localhost:5001
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5001
Press CTRL+C to quit
127.0.0.1 - - [13/Jun/2025 16:35:01] "POST /upload HTTP/1.1" 200 -
127.0.0.1 - - [13/Jun/2025 16:35:02] "POST /upload HTTP/1.1" 200 -
127.0.0.1 - - [13/Jun/2025 16:35:03] "POST /upload HTTP/1.1" 200 -
127.0.0.1 - - [13/Jun/2025 16:35:04] "POST /upload HTTP/1.1" 200 -
127.0.0.1 - - [13/Jun/2025 16:35:05] "POST /upload HTTP/1.1" 200 -
127.0.0.1 - - [13/Jun/2025 16:35:06] "POST /upload HTTP/1.1" 200 -
127.0.0.1 - - [13/Jun/2025 16:35:07] "POST /upload HTTP/1.1" 200 -
127.0.0.1 - - [13/Jun/2025 16:35:08] "POST /upload HTTP/1.1" 200 -
127.0.0.1 - - [13/Jun/2025 16:35:09] "POST /upload HTTP/1.1" 200 -
127.0.0.1 - - [13/Jun/2025 16:35:10] "POST /upload HTTP/1.1" 200 -
127.0.0.1 - - [13/Jun/2025 16:35:12] "POST /upload HTTP/1.1" 200 -
127.0.0.1 - - [13/Jun/2025 16:35:12] "POST /upload HTTP/1.1" 200 -
127.0.0.1 - - [13/Jun/2025 16:35:14] "POST /upload HTTP/1.1" 200 -
127.0.0.1 - - [13/Jun/2025 16:35:14] "POST /upload HTTP/1.1" 200 -
127.0

In [8]:
# Cell 4: Frontend JavaScript to start recording from browser and send to Flask
display(Javascript("""
const CHUNK_TIME = 1000; // ms

let mediaRecorder;
let audioChunks = [];

async function startRecording() {
    const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
    mediaRecorder = new MediaRecorder(stream);

    mediaRecorder.ondataavailable = async e => {
        const blob = e.data;
        const arrayBuffer = await blob.arrayBuffer();
        fetch("http://localhost:5001/upload", {
            method: "POST",
            body: arrayBuffer
        });
    };

    mediaRecorder.start(CHUNK_TIME);
    console.log("Recording started...");
}

startRecording();
"""))


<IPython.core.display.Javascript object>

In [9]:
# Cell 5: Assemble audio chunks into a file and transcribe
def transcribe_loop():
    print("Starting transcription loop...")
    while True:
        if len(AUDIO_BUFFER) > 0:
            print("Transcribing {} chunks...".format(len(AUDIO_BUFFER)))
            raw = b''.join(AUDIO_BUFFER)
            AUDIO_BUFFER.clear()

            with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
                # Save as WAV
                data = np.frombuffer(raw, dtype=np.int16)
                sf.write(tmp.name, data, samplerate=44100, subtype='PCM_16')
                print(f"Saved audio to {tmp.name}")
                
                # Transcribe
                result = model.transcribe(tmp.name)
                print("Transcription:", result['text'])
                
        time.sleep(5)

# Run this in background
threading.Thread(target=transcribe_loop, daemon=True).start()


Starting transcription loop...
Transcribing 43 chunks...
Saved audio to C:\Users\950207\AppData\Local\Temp\tmpm3qgkxng.wav




Transcription: 
Transcribing 7 chunks...
Saved audio to C:\Users\950207\AppData\Local\Temp\tmpi8k3rhoo.wav
Transcription: 
Transcribing 6 chunks...
Saved audio to C:\Users\950207\AppData\Local\Temp\tmpcg6fll5m.wav
Transcription: 
Transcribing 6 chunks...
Saved audio to C:\Users\950207\AppData\Local\Temp\tmpbnvmc0qs.wav
Transcription:  1.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0
Transcribing 11 chunks...
Saved audio to C:\Users\950207\AppData\Local\Temp\tmpeyqrnb03.wav
Transcription:  1.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0
Transcribing 11 chunks...
Saved audio to C:\Users\950207\AppData\Local\Temp\tmpfwb2vbhj.wav
Transcription: 

Exception in thread Thread-52 (transcribe_loop):
Traceback (most recent call last):
  File "C:\Users\950207\anaconda3\envs\gptcode\lib\threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "C:\Users\950207\anaconda3\envs\gptcode\lib\site-packages\ipykernel\ipkernel.py", line 766, in run_closure
    _threading_Thread_run(self)
  File "C:\Users\950207\anaconda3\envs\gptcode\lib\threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "C:\Users\950207\AppData\Local\Temp\ipykernel_18148\1842116349.py", line 12, in transcribe_loop
ValueError: buffer size must be a multiple of element size


Transcribing 10 chunks...


In [10]:
# Cell 6: JavaScript to stop recording
display(Javascript("""
if (mediaRecorder && mediaRecorder.state !== "inactive") {
    mediaRecorder.stop();
    console.log("Recording stopped.");
}
"""))


<IPython.core.display.Javascript object>