# This notebook takes a video link as input, converts the video into an audio using yt-dlp, then uses OpenAI's Whisper to generate accurate transcripts.

In [1]:
!pip install yt-dlp

Collecting yt-dlp
  Downloading yt_dlp-2025.6.30-py3-none-any.whl.metadata (174 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/174.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m174.1/174.3 kB[0m [31m6.3 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m174.3/174.3 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading yt_dlp-2025.6.30-py3-none-any.whl (3.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m34.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: yt-dlp
Successfully installed yt-dlp-2025.6.30


In [2]:
!pip install -U openai-whisper

Collecting openai-whisper
  Downloading openai_whisper-20250625.tar.gz (803 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/803.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━[0m [32m368.6/803.2 kB[0m [31m10.9 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m803.2/803.2 kB[0m [31m11.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch->openai-whisper)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch->openai-whisper)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda

# Convert the video into an audio

In [4]:
import yt_dlp

url = "https://youtu.be/G6de8L7cVvM?si=jvSZx0r2JL7S4I-o"

ydl_opts = {
    'format': 'bestaudio/best',
    'extractaudio': True,
    'outtmpl': 'audio.%(ext)s',
    'postprocessors': [{
        'key': 'FFmpegExtractAudio',
        'preferredcodec': 'mp3',
        'preferredquality': '192',
    }],
}

with yt_dlp.YoutubeDL(ydl_opts) as ydl:
    ydl.download([url])

audio_path = "/content/audio.mp3"

[youtube] Extracting URL: https://youtu.be/G6de8L7cVvM?si=jvSZx0r2JL7S4I-o
[youtube] G6de8L7cVvM: Downloading webpage
[youtube] G6de8L7cVvM: Downloading tv client config
[youtube] G6de8L7cVvM: Downloading tv player API JSON
[youtube] G6de8L7cVvM: Downloading ios player API JSON
[youtube] G6de8L7cVvM: Downloading m3u8 information
[info] G6de8L7cVvM: Downloading 1 format(s): 251
[download] Destination: audio.webm
[download] 100% of    3.10MiB in 00:00:00 at 10.51MiB/s  
[ExtractAudio] Destination: audio.mp3
Deleting original file audio.webm (pass -k to keep)


# Convert the audio into a text/transcript

In [5]:
import os
import whisper

# Check if the audio file exists
if not os.path.exists(audio_path):
    print(f"Error: Audio file not found at {audio_path}")
    print("Please ensure you've downloaded the audio correctly and set the 'audio_path' variable to the right location.")
else:
    print(f"Audio file found at: {audio_path}")

    # Load the Whisper model
    #print("Loading Whisper model (this may take a moment the first time)...")
    model = whisper.load_model("base") # You can change "base" to "small", "medium", etc.

    # Transcribe the audio
    print(f"Transcribing audio from {audio_path}...")
    result = model.transcribe(audio_path)

    # Print the full transcript
    transcript_text = result["text"]
    print("\n--- Full Transcription ---")
    print(transcript_text)

    # Save the transcript to a text file
    output_transcript_path = "/content/transcript.txt"

    with open(output_transcript_path, "w") as f:
        f.write(transcript_text)
    print(f"\nTranscript saved to: {output_transcript_path}")


Audio file found at: /content/audio.mp3
Transcribing audio from /content/audio.mp3...





--- Full Transcription ---
 Smile and learn! Well done Sparky! Hi, how are you? Do you want to play with me and my friend Sparky? Sparky is a very special robot dog because he uses something called artificial intelligence, also known as AI. Have you ever heard of AI? If not, don't worry. I'll tell you a little bit about it. Artificial intelligence allows machines to do things similar to what humans do. Such as having a conversation, solving problems, or even drawing. Let's try this artificial intelligence thing with Sparky. Sparky! Fetch the yellow ball! Now bring it back. Sparky has been trained to follow orders. He can also dig holes and shake, just like a normal dog. Best of all, I can teach him new tricks thanks to artificial intelligence. Tomorrow, I'm going to teach him to jump over obstacles. What else do you think he can learn? AI is not just in robots like Sparky. Have you ever used one of those voice assistants that help us look things up on the internet? They also use artif