<a href="https://colab.research.google.com/github/aaryapandya12/youtube-transcription/blob/main/transcription.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
import os
import tempfile
from yt_dlp import YoutubeDL
import whisper

MODEL_SIZE = "base"
AUDIO_FORMAT = "wav"

def download_audio(url: str) -> str:
    """Download YouTube audio and return local path."""
    audio_path = tempfile.mktemp(suffix=f".{AUDIO_FORMAT}")

    ydl_opts = {
        "format": "bestaudio/best",
        "outtmpl": audio_path.replace(f".{AUDIO_FORMAT}", ".%(ext)s"),
        "postprocessors": [{
            "key": "FFmpegExtractAudio",
            "preferredcodec": AUDIO_FORMAT,
            "preferredquality": "192",
        }],
        "quiet": True,
        "no_warnings": True,
    }

    with YoutubeDL(ydl_opts) as ydl:
        ydl.download([url])

    return audio_path

def transcribe_audio(path: str, with_timestamps: bool = False) -> str:
    """Transcribe audio and return formatted text."""
    model = whisper.load_model(MODEL_SIZE)
    result = model.transcribe(path, verbose=False)

    output = []
    for seg in result['segments']:
        text = seg['text'].strip()
        if with_timestamps:
            output.append(f"[{seg['start']:.2f}-{seg['end']:.2f}] {text}")
        else:
            output.append(text)

    return "\n".join(output)

def main():
    video_url = input("YouTube URL: ").strip()
    with_timestamps = input("⏱️ Include timestamps? (y/n): ").lower() == "y"

    try:
        print("Downloading audio...")
        audio_path = download_audio(video_url)

        print("Transcribing...")
        transcript = transcribe_audio(audio_path, with_timestamps)

        print("\n TRANSCRIPTION:\n")
        print(transcript)

        if input("Save to .txt? (y/n): ").lower() == "y":
            with open("transcript.txt", "w", encoding="utf-8") as f:
                f.write(transcript)
            print("Saved to transcript.txt")

    except Exception as e:
        print(f"Error: {str(e)}")
    finally:
        if 'audio_path' in locals() and os.path.exists(audio_path):
            os.remove(audio_path)

if __name__ == "__main__":
    main()

YouTube URL: https://youtu.be/OVAMb6Kui6A?si=WQlDIo0INb1-vADd
⏱️ Include timestamps? (y/n): y
Downloading audio...
Transcribing...




Detected language: English


100%|██████████| 82098/82098 [03:18<00:00, 412.65frames/s]



 TRANSCRIPTION:

[0.00-7.00] Hi, Chris Sauer. Hi, Kate Johnson. Nice to meet you. Nice to meet you, Kate. Chris, please tell me a little bit about yourself.
[7.00-13.84] Well, I'm currently finishing my Masters of Education program at Lake Erie College and working on transitioning from a northeastern Ohio
[13.84-26.00] winner to being a member of the Jacksonville area community. Why, why the move? Well, my wife's job is moving down south and I'm coming along with her.
[26.00-34.00] It's very noble and admirable about you at the same time. How did you hear about the position open with our company here at RRI?
[34.00-43.00] Well, when I thought about the opportunity of changing locations and moving from one physical location to another, I thought about making a career move as well.
[43.00-53.00] And I was starting with my main interests and passions. And I love being outside. I love doing outdoor activities. And I love working with people.
[53.00-62.00] And I thought if I could use my e