# Summarize a long YouTube Video

In [2]:
YOUTUBE_VIDEO_LINK = "https://www.youtube.com/watch?v=8l8fpR7xMEQ"

In [27]:
import os
import sys
import yt_dlp
import whisper   # correct package: pip install openai-whisper
from utility.llm_factory2 import LLMFactory

# Ensure temp directory exists
os.makedirs("./data/temp/", exist_ok=True)


class YouTubeTranscripter:
    destination = "./data/temp/"

    # ------------------------------
    # Download audio from YouTube
    # ------------------------------
    @staticmethod
    def __download_video(url):
        audio_path = os.path.join(YouTubeTranscripter.destination, "temp.mp3")

        ydl_opts = {
            "ffmpeg_location": "/usr/bin/ffmpeg",  # ensure ffmpeg is installed
            "extractor_args": {"youtube": {"player_client": "default"}},
            "format": "bestaudio/best",
            "postprocessors": [{
                "key": "FFmpegExtractAudio",
                "preferredcodec": "mp3",
                "preferredquality": "192",
            }],
            "outtmpl": os.path.join(YouTubeTranscripter.destination, "temp.%(ext)s"),
            "quiet": True,
            "noprogress": True
        }

        if os.path.exists(audio_path):
            print(f"{audio_path} exists. Skipping download.")
        else:
            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
                info = ydl.extract_info(url, download=True)
                print(f"Downloaded audio for: {info.get('title')}")

        return audio_path

    # ------------------------------
    # Transcribe audio file
    # ------------------------------
    @staticmethod
    def __generate_scripts_from_audio(audio_path):
        script_file = os.path.join(YouTubeTranscripter.destination, "temp.txt")

        if os.path.exists(script_file):
            print(f"{script_file} exists. Using cached transcript.")
            return open(script_file).read()

        # Whisper base model
        model = whisper.load_model("base")
        result = model.transcribe(audio_path)
        transcript = result["text"]

        with open(script_file, "w") as f:
            f.write(transcript)

        return transcript

    # ------------------------------
    # Rate the generated summary
    # ------------------------------
    @staticmethod
    def rate_the_summary(summary_response, transcripts):
        system_message = """
        You are tasked with rating AI-generated summaries of YouTube transcripts.
        Rate the summary from 0 to 5 based on accuracy and completeness.
        """

        user_message = f"""
### Transcript
{transcripts}

### Summary
{summary_response}
"""

        client = LLMFactory.get_llm("openai")

        from langchain_core.output_parsers import StrOutputParser
        from langchain_core.prompts import ChatPromptTemplate

        parser = StrOutputParser()

        prompt = ChatPromptTemplate.from_messages([
            ("system", system_message),
            ("user", user_message),
        ])

        chain = prompt | client | parser

        response = chain.invoke(
            {},
            config={
                "temperature": 0,
                "max_tokens": 5,
                "model_name": "gpt-4o"
            }
        )

        return response

    # ------------------------------
    # End-to-end Summarization
    # ------------------------------
    @staticmethod
    def summarize(video_link, system_message):
        audio_path = YouTubeTranscripter.__download_video(video_link)
        transcripts = YouTubeTranscripter.__generate_scripts_from_audio(audio_path)

        client = LLMFactory.get


In [28]:
# Main script to run the summarization
def main():
    # Define YouTube video link
    video_link = YOUTUBE_VIDEO_LINK
    
    # Define system message for zero-shot prompting
    zero_shot_system_message = """You are helpful assistant. Summarize the following youtube transcript in 5-10 lines.
        Keep it concise and include all important points. Mention all the topics covered in the transcript."""
    
    # Get summary and transcripts for the video
    summary_response, transcripts = YouTubeTranscripter.summarize(video_link, zero_shot_system_message)
    
    # Print the summary result
    print(f"YouTube Video: {video_link}")
    print("=================================================================================")
    print(f"Summarization using zero-shot-prompting:\n{summary_response}")

    # Evaluate the summary using the defined rating mechanism
    summary_rating = YouTubeTranscripter.rate_the_summary(summary_response, transcripts)
    print("---------------------------------------------------------------------------------")
    print(f"Rating of the summarization: {summary_rating}")

    # Define system message for chain-of-thought (CoT) prompting
    cot_system_message = """
        You are a helpful assistant that summarizes YouTube transcripts.
        Think step-by-step, focus on the main ideas, and summarize in clear and concise language.
        Ensure the summary is coherent, includes essential details, and reflects the original meaning.
    """
    
    # Get summary and transcripts for the CoT approach
    summary_response, transcripts = YouTubeTranscripter.summarize(video_link, cot_system_message)
    
    # Print the summary result for CoT approach
    print(f"YouTube Video: {video_link}")
    print("=================================================================================")
    print(f"Summarization using chain-of-thought prompting:\n{summary_response}")

    # Evaluate the summary generated by the CoT method
    summary_rating = YouTubeTranscripter.rate_the_summary(summary_response, transcripts)
    print("---------------------------------------------------------------------------------")
    print(f"Rating of the summarization: {summary_rating}")

In [29]:
main()

./data/temp/temp.mp3 exists. Skipping download.


AttributeError: module 'whisper' has no attribute 'load_model'

In [32]:
import whisper
print(whisper.__file__)

/home/azureuser/ws/agenticaiprojects/.venv/lib/python3.12/site-packages/whisper.py
