In [1]:
import yt_dlp
import whisper
from transformers import pipeline
import os

# 1. Download YouTube video audio or subtitles
def download_youtube_audio(url):
    ydl_opts = {
        'format': 'bestaudio/best',  # download the best audio quality
        'outtmpl': 'audio.%(ext)s',  # save file as audio.{extension}
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',  # Use FFmpegExtractAudio
            'preferredcodec': 'mp3',  # convert to mp3
            'preferredquality': '192',
        }],
        'quiet': True,  # Disable progress output for faster execution
    }

    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        info_dict = ydl.extract_info(url, download=True)
        return 'audio.mp3'  # Return the audio file path

# 2. Transcribe audio using Whisper (smaller model for faster speed)
def transcribe_audio(audio_path):
    print("🔄 Transcribing audio...")  # Added log
    model = whisper.load_model("tiny")
    result = model.transcribe(audio_path)  # Transcribe the audio
    print(f"Transcription completed: {len(result['text'])} characters.")  # Log the transcription length
    return result['text']  # Return the transcription text

# 3. Summarize transcription using a Hugging Face model (T5 or BART)
def summarize_text(text):
    print("🔍 Summarizing transcription...")  # Added log
    # Summarize in smaller chunks if text is too large
    summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

    # Split text into chunks to avoid too large inputs
    chunk_size = 1000  # You can adjust this size
    text_chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]

    summary = ""
    for chunk in text_chunks:
        summary += summarizer(chunk, max_length=200, min_length=50, do_sample=False)[0]['summary_text'] + " "

    return summary.strip()

# 4. Full function to handle everything
def summarize_youtube_video(url):
    print("🔄 Downloading audio from YouTube video...")
    audio_path = download_youtube_audio(url)  # Step 1: Download audio

    print("🧠 Transcribing audio to text...")
    transcription = transcribe_audio(audio_path)  # Step 2: Transcribe audio

    if not transcription.strip():
        print("⚠️ No transcription was generated!")
        return "Error: No transcription was generated."

    print("🔍 Summarizing the transcription...")
    summary = summarize_text(transcription)  # Step 3: Summarize text

    # Clean up the audio file after processing
    os.remove(audio_path)

    return summary

# 5. Run the summarization
if __name__ == "__main__":
    video_url = input("Enter YouTube video URL: ")
    print("\n🔄 Summarizing video, please wait...\n")
    summary = summarize_youtube_video(video_url)  # Call the function to summarize the video

    if summary:
        print("\n🔍 Video Summary:")
        print(summary)
    else:
        print("⚠️ Something went wrong with the summarization.")


Enter YouTube video URL: https://youtu.be/cVsyJvxX48A

🔄 Summarizing video, please wait...

🔄 Downloading audio from YouTube video...
🧠 Transcribing audio to text...
🔄 Transcribing audio...


100%|██████████████████████████████████████| 72.1M/72.1M [00:00<00:00, 189MiB/s]


Transcription completed: 2455 characters.
🔍 Summarizing the transcription...
🔍 Summarizing transcription...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.58k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Device set to use cpu
Your max_length is set to 200, but your input_length is only 140. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=70)



🔍 Video Summary:
Dean Wooder calls his son, Chris, to apologize for missing their appointment the other day. "How did you get so smart? It's just smart," his son asks. "I just wanted to take this opportunity to say, thank  thank you," Wooder says. I'm taking my son, Tim, my 12th year old. We're just leaving. Tim? Listen, we'll get out of your way. Do you want to sit in the box? No. It's not actually a box. "I think I got stuck on my big. You all right? Oh, yeah. Ah, I can get this on fire. You're not allergic to anything. Where do you get your... It's like, wake me up. That can my head"
