In [32]:
import nos
from nos.client import Client
from nos.logging import logger

# Run server through nos serve
# nos.init(runtime="auto", logging_level="DEBUG")

# Init NOS server, wait for it to spin up then confirm its healthy.
client = Client()

logger.debug("Waiting for server to start...")
client.WaitForServer()

logger.debug("Confirming server is healthy...")
if not client.IsHealthy():
    raise RuntimeError("NOS server is not healthy")

In [33]:
from pathlib import Path
import tempfile

def trim_audio(audio_path: Path, duration_s: int = 600) -> Path:
    import ffmpeg
    with tempfile.NamedTemporaryFile(suffix=Path(audio_path).suffix, delete=False) as tmp:
        audio_trimmed = ffmpeg.input(str(audio_path)).audio.filter("atrim", duration=duration_s)
        audio_output = ffmpeg.output(audio_trimmed, tmp.name)
        ffmpeg.run(audio_output, overwrite_output=True)
        return Path(tmp.name)

def download_youtube_url_and_transcribe(url):
    from yt_dlp import YoutubeDL

    ydl_opts = {
        "format": "bestaudio/best",
        "postprocessors": [
            {
                "key": "FFmpegExtractAudio",
                "preferredcodec": "wav",
                "preferredquality": "192",
            }
        ],
    }

    with YoutubeDL(ydl_opts) as ydl:
        # set download location to current directory
        info_dict = ydl.extract_info(url, download=False)
        output_filename = ydl.prepare_filename(info_dict)
        audio_filename = output_filename.replace(".webm", ".wav")
        error_code = ydl.download([url]) 
        assert error_code == 0

    # run transcription
    whisperx = client.Module("m-bain/whisperx-large-v2")
    assert whisperx is not None
    assert whisperx.GetModelInfo() is not None

    with client.UploadFile(trim_audio(audio_filename)) as remote_path:
        response = whisperx.transcribe(path=remote_path, batch_size=96)
    
        return response
    # assert isinstance(response, dict)
    # assert "text" in response
    # return response["text"]

In [None]:
transcription = download_youtube_url_and_transcribe("https://www.youtube.com/watch?v=Tfrrubw7pcE")

[youtube] Extracting URL: https://www.youtube.com/watch?v=Tfrrubw7pcE
[youtube] Tfrrubw7pcE: Downloading webpage
[youtube] Tfrrubw7pcE: Downloading ios player API JSON
[youtube] Tfrrubw7pcE: Downloading android player API JSON
[youtube] Tfrrubw7pcE: Downloading m3u8 information
[youtube] Extracting URL: https://www.youtube.com/watch?v=Tfrrubw7pcE
[youtube] Tfrrubw7pcE: Downloading webpage
[youtube] Tfrrubw7pcE: Downloading ios player API JSON
[youtube] Tfrrubw7pcE: Downloading android player API JSON
[youtube] Tfrrubw7pcE: Downloading m3u8 information
[info] Tfrrubw7pcE: Downloading 1 format(s): 251
[download] Destination: Principles for Success： “Embrace Reality and Deal With It” ｜ Episode 2 [Tfrrubw7pcE].webm
[download] 100% of    4.31MiB in 00:00:01 at 3.60MiB/s   
[ExtractAudio] Destination: Principles for Success： “Embrace Reality and Deal With It” ｜ Episode 2 [Tfrrubw7pcE].wav
Deleting original file Principles for Success： “Embrace Reality and Deal With It” ｜ Episode 2 [Tfrrubw7p

ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers
  built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)
  configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enab

In [7]:
transcription 

{'segments': [{'start': 0.229,
   'end': 1.892,
   'text': ' Principles for Success.',
   'words': [{'word': 'Principles',
     'start': 0.229,
     'end': 0.75,
     'score': 0.841,
     'speaker': 'SPEAKER_00'},
    {'word': 'for',
     'start': 0.79,
     'end': 0.931,
     'score': 0.839,
     'speaker': 'SPEAKER_00'},
    {'word': 'Success.',
     'start': 1.011,
     'end': 1.492,
     'score': 0.77,
     'speaker': 'SPEAKER_00'}],
   'speaker': 'SPEAKER_00'},
  {'start': 1.892,
   'end': 7.262,
   'text': 'An ultra-mini-series adventure in 30 minutes and in 8 episodes.',
   'words': [{'word': 'An',
     'start': 1.892,
     'end': 1.973,
     'score': 0.891,
     'speaker': 'SPEAKER_00'},
    {'word': 'ultra-mini-series',
     'start': 2.173,
     'end': 3.415,
     'score': 0.791,
     'speaker': 'SPEAKER_00'},
    {'word': 'adventure',
     'start': 3.455,
     'end': 3.976,
     'score': 0.874,
     'speaker': 'SPEAKER_00'},
    {'word': 'in',
     'start': 4.137,
     'end':