## Minimal transcript → .txt (no timestamps)

- Edit the `VIDEO_INPUT` and `LANGUAGES` below.
- Run the cell to write `transcript.txt` in the current working directory.

Reference: https://pypi.org/project/youtube-transcript-api/


In [2]:
# If not installed, uncomment the next line to install
# %pip install youtube-transcript-api

import re
from youtube_transcript_api import YouTubeTranscriptApi, NoTranscriptFound, TranscriptsDisabled, CouldNotRetrieveTranscript

# --- Inputs ---
VIDEO_INPUT = "https://www.youtube.com/watch?v=NXYrIEP1LRs"  # YouTube URL or raw video ID
LANGUAGES = ["en"]  # Try these in order
OUTPUT_PATH = "transcript.txt"


def extract_video_id(url_or_id: str) -> str:
    s = url_or_id.strip()
    m = (
        re.search(r"[?&]v=([A-Za-z0-9_-]{6,})", s)
        or re.search(r"youtu\.be/([A-Za-z0-9_-]{6,})", s)
        or re.search(r"youtube\.com/(?:embed|shorts)/([A-Za-z0-9_-]{6,})", s)
    )
    return m.group(1) if m else s


try:
    vid = extract_video_id(VIDEO_INPUT)
    fetched = YouTubeTranscriptApi().fetch(vid, languages=LANGUAGES)
    # Join snippet texts (no timestamps)
    text = "\n".join(snippet.text for snippet in fetched if snippet.text.strip())
    if not text:
        raise RuntimeError("Transcript fetched but empty.")
    with open(OUTPUT_PATH, "w", encoding="utf-8") as f:
        f.write(text)
    print(f"Saved: {OUTPUT_PATH}")
except NoTranscriptFound:
    print("No transcript found for requested languages:", LANGUAGES)
except TranscriptsDisabled:
    print("Transcripts are disabled for this video.")
except CouldNotRetrieveTranscript:
    print("Could not retrieve transcript due to a YouTube/network error. Try again later.")
except Exception as e:
    print("Error:", e)


Saved: transcript.txt
