<a href="https://colab.research.google.com/github/acrching/WhisperLive/blob/main/Whisper_Streaming_Working.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install faster-whisper
!pip install streamlink
!pip install ffmpeg-python

In [None]:
import os
import time
import numpy as np
import queue
import threading
import ffmpeg
import streamlink
from faster_whisper import WhisperModel

AUDIO_BUFFER_SIZE = 30  # Buffer size in seconds

class WhisperOnline:
    def __init__(self, model_size, hls_url, device="cuda", compute_type="float16"):
        self.audio_buffer = queue.Queue(maxsize=AUDIO_BUFFER_SIZE * 16000)
        self.model = WhisperModel(model_size, device=device, compute_type=compute_type)
        self.hls_url = hls_url
        self.partial_transcription = ""

    def download_audio(self):
        streams = streamlink.streams(self.hls_url)
        stream_url = streams['best'].url

        process = (
            ffmpeg
            .input(stream_url)
            .output('pipe:', format='wav', acodec='pcm_s16le', ac=1, ar='16k')
            .run_async(pipe_stdout=True, pipe_stderr=True)
        )
        while True:
            in_bytes = process.stdout.read(1024)
            if not in_bytes:
                break
            self.audio_buffer.put(np.frombuffer(in_bytes, np.int16).astype(np.float32) / 32768.0)

    def transcribe_audio(self):
        while True:
            if not self.audio_buffer.empty():
                audio_chunk = []
                for _ in range(16000 * 10):
                    if not self.audio_buffer.empty():
                        audio_chunk.append(self.audio_buffer.get())
                    else:
                        break
                if len(audio_chunk) == 0:
                    break
                audio_chunk = np.concatenate(audio_chunk)
                segments, _ = self.model.transcribe(audio_chunk)

                for segment in segments:
                    self.partial_transcription += segment.text
                    if segment.text.endswith(('.', '!', '?')):
                        print(self.partial_transcription.strip())
                        self.partial_transcription = ""

    def run(self):
        download_thread = threading.Thread(target=self.download_audio)
        transcribe_thread = threading.Thread(target=self.transcribe_audio)

        download_thread.start()
        transcribe_thread.start()

        download_thread.join()
        transcribe_thread.join()

if __name__ == "__main__":
    model_size = 'large-v2'  # Model size for faster-whisper
    hls_url = 'https://dcs-live-uc1.mp.lura.live/server/play/5Awwm3GfagVzfpdA/rendition.m3u8?track=video-0&anvsid=m177626920-ndab3eff76c62376fec5de9c0c5b1b864&ts=1722924916&anvtrid=ba41b16c22763280dc45544f34528be9'  # Replace with your HLS stream URL

    whisper_online = WhisperOnline(model_size, hls_url, device="cuda", compute_type="float16")
    whisper_online.run()