In [1]:
import nos
from nos.client import Client
from nos.logging import logger

# Run server through nos serve
# nos.init(runtime="auto", logging_level="DEBUG")

# Init NOS server, wait for it to spin up then confirm its healthy.
client = Client()

logger.debug("Waiting for server to start...")
client.WaitForServer()
 
logger.debug("Confirming server is healthy...")
if not client.IsHealthy():
    raise RuntimeError("NOS server is not healthy")

In [2]:
from pathlib import Path
import tempfile

def trim_audio(audio_path: Path, duration_s: int = 600) -> Path:
    import ffmpeg
    with tempfile.NamedTemporaryFile(suffix=Path(audio_path).suffix, delete=False) as tmp:
        audio_trimmed = ffmpeg.input(str(audio_path)).audio.filter("atrim", duration=duration_s)
        audio_output = ffmpeg.output(audio_trimmed, tmp.name)
        ffmpeg.run(audio_output, overwrite_output=True)
        return Path(tmp.name)

def download_youtube_url_and_transcribe(url):
    from yt_dlp import YoutubeDL

    ydl_opts = {
        "format": "bestaudio/best", 
        "postprocessors": [
            {
                "key": "FFmpegExtractAudio",
                "preferredcodec": "wav",
                "preferredquality": "192",
            }
        ],
    }

    with YoutubeDL(ydl_opts) as ydl:
        # set download location to current directory
        info_dict = ydl.extract_info(url, download=False)
        output_filename = ydl.prepare_filename(info_dict)
        audio_filename = output_filename.replace(".webm", ".wav")
        error_code = ydl.download([url]) 
        assert error_code == 0

    # run transcription
    whisperx = client.Module("m-bain/whisperx-large-v2")
    assert whisperx is not None
    assert whisperx.GetModelInfo() is not None

    with client.UploadFile(trim_audio(audio_filename)) as remote_path:
        response = whisperx.transcribe(path=remote_path, batch_size=96)
    
        return response
    # assert isinstance(response, dict)
    # assert "text" in response
    # return response["text"]

In [5]:
transcription = download_youtube_url_and_transcribe("https://www.youtube.com/watch?v=Tfrrubw7pcE")

[youtube] Extracting URL: https://www.youtube.com/watch?v=Tfrrubw7pcE
[youtube] Tfrrubw7pcE: Downloading webpage
[youtube] Tfrrubw7pcE: Downloading ios player API JSON
[youtube] Tfrrubw7pcE: Downloading android player API JSON
[youtube] Tfrrubw7pcE: Downloading m3u8 information
[youtube] Extracting URL: https://www.youtube.com/watch?v=Tfrrubw7pcE
[youtube] Tfrrubw7pcE: Downloading webpage
[youtube] Tfrrubw7pcE: Downloading ios player API JSON
[youtube] Tfrrubw7pcE: Downloading android player API JSON
[youtube] Tfrrubw7pcE: Downloading m3u8 information
[info] Tfrrubw7pcE: Downloading 1 format(s): 251
[download] Destination: Principles for Success： “Embrace Reality and Deal With It” ｜ Episode 2 [Tfrrubw7pcE].webm
[download] 100% of    4.31MiB in 00:00:01 at 3.12MiB/s   
[ExtractAudio] Destination: Principles for Success： “Embrace Reality and Deal With It” ｜ Episode 2 [Tfrrubw7pcE].wav
Deleting original file Principles for Success： “Embrace Reality and Deal With It” ｜ Episode 2 [Tfrrubw7p

ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers
  built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)
  configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enab

In [9]:
transcription['segments'][0]

{'start': 0.229,
 'end': 1.892,
 'text': ' Principles for Success.',
 'words': [{'word': 'Principles',
   'start': 0.229,
   'end': 0.75,
   'score': 0.841,
   'speaker': 'SPEAKER_00'},
  {'word': 'for',
   'start': 0.79,
   'end': 0.931,
   'score': 0.839,
   'speaker': 'SPEAKER_00'},
  {'word': 'Success.',
   'start': 1.011,
   'end': 1.492,
   'score': 0.77,
   'speaker': 'SPEAKER_00'}],
 'speaker': 'SPEAKER_00'}

In [23]:
def join_segments(segments):
    user_texts = {}
    for segment in segments:
        for word_dict in segment['words']:
            if not 'speaker' in word_dict:
                continue
            speaker = word_dict['speaker']
            if speaker not in user_texts:
                user_texts[speaker] = []
            user_texts[speaker].append(word_dict['word'])
    for speaker in user_texts:
        user_texts[speaker] = ' '.join(user_texts[speaker])
    return user_texts

In [24]:
joined = join_segments(transcription['segments'])

In [26]:
joined

{'SPEAKER_00': "Principles for Success. An ultra-mini-series adventure in minutes and in episodes. Episode Embrace reality and deal with it. The path you take in life is your most important decision. In my case, I wanted my life to be great, and I feared boredom and mediocrity more than I feared failure. Since I didn't start out with money, and I didn't need much more than a bed to sleep in and food to eat, I could skew my decisions to pursue my adventures. So ever since I was a kid, I ran after the things I wanted, crashed, got up and ran again, and crashed again, and each time I crashed, I learned something, got better, and crashed less. By doing that over and over again, I learned to love this process, even the crashing part of it. Through it, I encountered reality. and I learned how to deal with it, which inspired another one of my most fundamental principles, which is that truth is the essential foundation for producing good outcomes. By truth, I don't mean anything more than the 

In [37]:
!pip install openai

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Collecting openai
  Obtaining dependency information for openai from https://files.pythonhosted.org/packages/f7/86/ea99e3f966a639a3256e2f79d78cd63d45d5eb651a5457ccbdcfd9eed24a/openai-1.1.1-py3-none-any.whl.metadata
  Downloading openai-1.1.1-py3-none-any.whl.metadata (16 kB)
Collecting distro<2,>=1.7.0 (from openai)
  Downloading distro-1.8.0-py3-none-any.whl (20 kB)
Downloading openai-1.1.1-py3-none-any.whl (217 kB)
[2K   [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m217.8/217.8 kB[0m [31m10.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: distro, openai
Successfully installed distro-1.8.0 openai-1.1.1


In [27]:
import openai
import requests
import json

url = "https://api.openai.com/v1/chat/completions"

# api_key = "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"
# org_id = "org-y63MaxTe2dqrUp2WCT86bK6f"
api_key = "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"

headers = {
    "Content-Type": "application/json",
    "Authorization": "Bearer " + api_key,
}

speakers = list(joined.keys())
first_speaker_data = joined[]

data = {
    "model": "gpt-3.5-turbo",
    "messages": [
      {
        "role": "system",
        "content": "You are a summarization bot for youtube videos. you provide two sentence descriptions."
      },
      {
        "role": "user",
        "content": "Summarize the following transcript: " + joined[0]
      }
    ],
    # "prompt": f"Summarize the following text: {transcription['segments']}",
    "max_tokens": 100,
    "temperature": 0.3,
}
    
response = requests.post(url, headers=headers, data=json.dumps(data))    
print(response.content)
summary = response.json()['choices'][0]['message']['content']
print(f"Summary: {summary}")


KeyError: 0

In [66]:
response.json()['choices'][0]['message']['content']

'Hello! How can I assist you today?'