# Transcribe
Downloading a video from YouTube and using OpenAI's Whisper to create captions with timestamps

Thanks: [yt-dlp](https://github.com/yt-dlp/yt-dlp), [Whisper](https://github.com/openai/whisper)

In [None]:
# Dependencies
%pip install openai-whisper
%pip install yt-dlp

In [13]:
# Settings for the YouTube downloader
YDL_OPTS = {
    "extract-audio": True,
    "audio-format": "opus",
    "noplaylist": True,
    "youtube_include_dash_manifest": False,
    'postprocessors': [{  # Extract audio using ffmpeg
        'key': 'FFmpegExtractAudio',
        'preferredcodec': 'mp3',
    }]
}

In [21]:
from yt_dlp import YoutubeDL

def extract_audio(filename: str, file_ext: str, url: str):
    YDL_OPTS["postprocessors"][0]["preferredcodec"] = file_ext
    YDL_OPTS["outtmpl"] = filename

    with YoutubeDL(YDL_OPTS) as ydl:
        ydl.download(url_list=[url])

In [20]:
extract_audio(filename="test_audio", file_ext="mp3", url="https://www.youtube.com/watch?v=z6xslDMimME")

[youtube] Extracting URL: https://www.youtube.com/watch?v=z6xslDMimME
[youtube] z6xslDMimME: Downloading webpage
[youtube] z6xslDMimME: Downloading android player API JSON
[info] z6xslDMimME: Downloading 1 format(s): 22
[download] test_audio has already been downloaded
[download] 100% of  382.88MiB
[ExtractAudio] Destination: test_audio.mp3
Deleting original file test_audio (pass -k to keep)


In [4]:
import whisper

model = whisper.load_model("base")
result = model.transcribe("test_audio.mp3")

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
result.keys()

dict_keys(['text', 'segments', 'language'])

In [15]:
from datetime import timedelta
import csv

with open("output.csv", "w") as file:
    w = csv.writer(file)
    for seg in result.get("segments", []):
        start = timedelta(seconds=seg["start"])
        end = timedelta(seconds=seg["end"])
        text = seg["text"]
        text = text.lstrip()
        row = [start, end, text]
        w.writerow(row)