# Developing an Intelligent YouTube Transcript Agent




In [None]:
#pip install langchain langchain_ollama youtube-transcript-api


testando

In [4]:
# Import required libraries for the agent's functionality
from youtube_transcript_api import YouTubeTranscriptApi
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.prompts import ChatPromptTemplate
from langchain_ollama.llms import OllamaLLM
from langchain.tools import Tool
from langchain.agents import initialize_agent, AgentType

class YouTubeTranscriptionAgent:
    """
    This class encapsulates the complete workflow to:
      1. Extract the video ID and transcript from a YouTube video.
      2. Split the transcript into smaller chunks for better processing.
      3. Generate a summary, extract main topics, and important quotes using a language model.
      4. Process user queries and select the appropriate tool.
    """
    
    def __init__(self, model: str = "llama3"):
        """
        Initializes the agent with the name of the language model to be used.
        
        Args:
            model (str): The name of the LLM (default: "llama3").
        """
        self.model = model

    def extract_video_id(self, url: str) -> str:
        """
        Extracts the video ID from the YouTube URL.
        
        Examples of URLs:
          - https://www.youtube.com/watch?v=VIDEO_ID
          - https://youtu.be/VIDEO_ID
        
        Args:
            url (str): The full video URL.
        
        Returns:
            str: The extracted video ID.
        """
        if "youtu.be" in url:
            return url.split("/")[-1]
        if "=" in url:
            return url.split("=")[-1]
        return url

    def get_transcript(self, url: str) -> str:
        """
        Retrieves the transcript of the YouTube video and returns a continuous text.
        
        Steps:
          1. Extract the video ID using 'extract_video_id'.
          2. Request the transcript via the API.
          3. Concatenate the text segments and perform basic cleaning.
        
        Args:
            url (str): The YouTube video URL.
        
        Returns:
            str: The transcript text or an error message if retrieval fails.
        """
        video_id = self.extract_video_id(url)
        try:
            # Request the transcript (a list of dictionaries with the key "text")
            transcript = YouTubeTranscriptApi.get_transcript(video_id)
            # Join the text segments into a single string
            text = " ".join([item["text"] for item in transcript])
            # Clean up: remove line breaks and single quotes
            text = text.replace("\n", " ").replace("'", "")
            return text
        except Exception as e:
            return f"Failed to get the transcript: {str(e)}"

    def create_chunks(self, text: str, chunk_size: int = 1000, overlap: int = 100) -> list:
        """
        Splits the text into smaller chunks for easier processing.
        
        Args:
            text (str): The complete text to be split.
            chunk_size (int): Approximate number of characters per chunk (default: 1000).
            overlap (int): Number of overlapping characters between chunks (default: 100).
        
        Returns:
            list: A list of text chunks.
        """
        splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=overlap)
        return splitter.split_text(text)

    def generate_summary(self, chunks: list) -> str:
        """
        Generates a unified summary from multiple text chunks.
        
        Steps:
          1. For each chunk, generate a mini-summary using the model.
          2. Combine all mini-summaries into a single string.
          3. Process the combined string to create a final coherent summary in English.
        
        Args:
            chunks (list): A list of text chunks.
        
        Returns:
            str: A coherent summary of the video's content in English.
        """
        llm = OllamaLLM(model=self.model)
        
        # Template to generate a mini-summary for each chunk
        template = """Text: {text}
Objective: Summarize the provided text.
Answer:"""
        prompt = ChatPromptTemplate.from_template(template)
        chain = prompt | llm

        # Generate a mini-summary for each chunk
        summaries = [chain.invoke({"text": chunk}) for chunk in chunks]
        combined_summary = " ".join(summaries)

        # Template to synthesize the mini-summaries into a final summary
        final_template = """Multiple summaries: {summaries}
Objective: Create a single, coherent summary in English.
Answer:"""
        final_prompt = ChatPromptTemplate.from_template(final_template)
        final_chain = final_prompt | llm
        final_summary = final_chain.invoke({"summaries": combined_summary})
        return final_summary

    def extract_topics(self, chunks: list) -> list:
        """
        Extracts the main topics from the text chunks.
        
        Args:
            chunks (list): A list of text chunks.
        
        Returns:
            list: A unique list of main topics.
        """
        llm = OllamaLLM(model=self.model)
        template = """Text: {text}
Objective: Extract the main topics from the provided text.
Answer: List the topics separated by commas."""
        prompt = ChatPromptTemplate.from_template(template)
        chain = prompt | llm

        topic_lists = [chain.invoke({"text": chunk}) for chunk in chunks]
        all_topics = set()
        for topics in topic_lists:
            # Split the response by commas and remove extra spaces
            items = [t.strip() for t in topics.split(",")]
            all_topics.update(items)
        # Remove empty items
        all_topics = {t for t in all_topics if t}
        return list(all_topics)

    def extract_quotes(self, chunks: list) -> list:
        """
        Extracts important quotes from the text chunks.
        
        Args:
            chunks (list): A list of text chunks.
        
        Returns:
            list: A list containing unique quotes.
        """
        llm = OllamaLLM(model=self.model)
        template = """Text: {text}
Objective: Extract the most important quote from the text.
Answer: Provide the quote as plain text."""
        prompt = ChatPromptTemplate.from_template(template)
        chain = prompt | llm

        quotes = [chain.invoke({"text": chunk}) for chunk in chunks]
        unique_quotes = []
        seen = set()
        for quote in quotes:
            normalized = quote.strip().lower()
            if normalized and normalized not in seen:
                unique_quotes.append(quote.strip())
                seen.add(normalized)
        return unique_quotes

    def process_user_query(self, query: str, chunks: list) -> str:
        """
        Processes the user's query by selecting the appropriate tool (summary, topics, or quotes)
        and generates a response.
        
        Steps:
          1. Define wrapper functions for each tool to simplify invocation without parameters.
          2. Create a list of Tools with name, function, and description.
          3. Initialize a LangChain agent with the defined tools.
          4. Invoke the agent with the user's query.
          5. If a parsing error occurs, catch the exception and return an error message.
        
        Args:
            query (str): The user's question or instruction.
            chunks (list): A list of text chunks from the transcript.
        
        Returns:
            str: The response generated by the agent or an error message in case of failure.
        """
        llm = OllamaLLM(model=self.model)
        
        # Wrapper functions for each tool
        def summary_wrapper(_=""):
            return self.generate_summary(chunks)
        
        def topics_wrapper(_=""):
            return self.extract_topics(chunks)
        
        def quotes_wrapper(_=""):
            return self.extract_quotes(chunks)
        
        tools = [
            Tool(
                name="generate_summary",
                func=summary_wrapper,
                description="Generates a detailed summary of the transcript."
            ),
            Tool(
                name="extract_topics",
                func=topics_wrapper,
                description="Extracts the main topics of the transcript."
            ),
            Tool(
                name="extract_quotes",
                func=quotes_wrapper,
                description="Extracts important quotes from the transcript."
            )
        ]
        
        # Initialize the agent with the tools and configure it to use ZERO_SHOT_REACT_DESCRIPTION
        agent = initialize_agent(
            tools=tools,
            llm=llm,
            agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
            verbose=True  # Enables detailed debug messages
        )
        
        # Try to invoke the agent with the query; if a parsing error occurs, catch the exception
        try:
            response = agent.invoke(input=query, handle_parsing_errors=True)
            return response
        except Exception as e:
            return f"Agent error: {e}"

# Main execution block
if __name__ == "__main__":
    # Create an instance of the agent with the desired model
    agent = YouTubeTranscriptionAgent(model="llama3")
    
    # Example YouTube video URL
    example_url = "https://www.youtube.com/watch?v=1aA1WGON49E"
    
    # Retrieve the transcript of the video
    transcript_text = agent.get_transcript(example_url)
    
    # If an error occurs while getting the transcript, display the error message
    if transcript_text.startswith("Failed to get"):
        print(transcript_text)
    else:
        # Split the transcript text into chunks for processing
        text_chunks = agent.create_chunks(transcript_text)
        
        # Example user query to get a summary
        user_query = "Can you provide a summary of this video?"
        
        # Process the query and get the agent's response
        result = agent.process_user_query(user_query, text_chunks)
        print("Response:", result)




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mLet's get started.

Thought: The question is asking for a summary of a video, which means we'll need to use the `generate_summary` tool to create a detailed transcript of the video.

Action: generate_summary
Action Input: (empty string, since no text has been provided)[0m
Observation: [36;1m[1;3mVictor Borges, the speaker, emphasizes that while presenters focus on engaging their audience during a talk, they often overlook the broader impact of reaching a wider online audience through social media and internet sharing. He humorously notes that attention spans have become extinct due to instant-gratification online content. To illustrate this point, he keeps his own TED talk concise, aiming to finish within a minute, and shares a witty joke about why balloons are expensive (inflation).[0m
Thought:[32;1m[1;3mThought: This is the start of the process.

Action: generate_summary
Action Input: (empty string, since no text has 

In [None]:
--------------

In [3]:
from youtube_transcript_api import YouTubeTranscriptApi
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.prompts import ChatPromptTemplate
from langchain_ollama.llms import OllamaLLM
from langchain.tools import Tool
from langchain.agents import initialize_agent, AgentType


def parse_url(url: str) -> str:
    """
    Extract video ID from URL.

    Args: 
        url(str): youtube video url

    Returns:
        Youtube video's video ID
    
    """
    if "youtu.be" in url:
        return url.split("/")[-1]
    if "=" in url:
        return url.split("=")[-1]

    return url

def get_text_from_video(url: str) -> str:
    """
    Get transcript text from YouTube video.

    Args:
        url(str): youtube video url

    Returns:
        Youtube video's transcripted text
    
    """
    video_id = parse_url(url)
    
    try:
        transcript = YouTubeTranscriptApi.get_transcript(video_id)
        transcript_text = " ".join([entry["text"] for entry in transcript])
        transcript_text = transcript_text.replace("\n", " ").replace("'", "")
        return transcript_text
    except Exception as e:
        return f"Failed to retrieve transcript: {str(e)}"

def create_chunks(transcript_text: str) -> list:
    """
    Split transcript text into processable chunks.

    Args:
        transcript_text (str): Youtube video's transcripted text

    Returns:
        processable chunks
    
    """
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
    chunks = text_splitter.split_text(transcript_text)
    return chunks

def get_summary(chunks: list) -> str:
    """
    Summarize text chunks and create a single summary.
    
    Args:
        chunks (list): processable chunks of transcriptted text

    Returns:
        A single summary for youtube video
    """
    llm = OllamaLLM(model="llama3")
    template = """Text: {text}
    Goal: Summarize given text.
    Answer: """

    prompt = ChatPromptTemplate.from_template(template)
    chain = prompt | llm

    summaries = [chain.invoke({"text": chunk}) for chunk in chunks]
    
    # Better approach to combining summaries
    combined_summary = " ".join(summaries)
    
    # Create final summary
    final_summary_prompt = ChatPromptTemplate.from_template(
        "Multiple summaries: {summaries}\nGoal: Create a coherent single summary.\nAnswer: "
    )
    final_summary_chain = final_summary_prompt | llm
    final_summary = final_summary_chain.invoke({"summaries": combined_summary})
    
    return final_summary

def extract_topics(chunks:list) -> list:
    """
    Extract main topics from text chunks.
    
    Args:
        chunks (list): processable chunks of transcriptted text
    
    Returns:
        Main topic list
    """
    llm = OllamaLLM(model="llama3")
    template = """Text: {text}
    Goal: Extract main topics from the given text.
    Answer: List the key topics separated by commas."""

    prompt = ChatPromptTemplate.from_template(template)
    chain = prompt | llm

    topics_list = [chain.invoke({"text": chunk}) for chunk in chunks]

    # Combine topics from different chunks
    all_topics = set()
    for topics in topics_list:
        # Split comma-separated topics and clean whitespace
        topic_items = [t.strip() for t in topics.split(",")]
        all_topics.update(topic_items)

    # Remove empty elements
    all_topics = {topic for topic in all_topics if topic}
    
    return list(all_topics)

def extract_quotes(chunks:list) -> list:
    """
    Extract important quotes from text chunks.
    
    Args:
        chunks (list): processable chunks of transcriptted text
    
    Returns:
        important quotes list
    """
    llm = OllamaLLM(model="llama3")
    template = """Text: {text}
    Goal: Extract the most important quote from this text.
    Answer: Provide the quote as plain text."""

    prompt = ChatPromptTemplate.from_template(template)
    chain = prompt | llm

    quotes = [chain.invoke({"text": chunk}) for chunk in chunks]
    
    # Filter duplicate or empty quotes
    unique_quotes = []
    seen_quotes = set()
    
    for quote in quotes:
        # Normalize quote (clean whitespace and compare lowercase)
        normalized = quote.strip().lower()
        if normalized and normalized not in seen_quotes:
            unique_quotes.append(quote.strip())
            seen_quotes.add(normalized)
    
    return unique_quotes

def process_user_query(query, chunks, url):
    """Select appropriate tool based on user query and generate response."""

    llm = OllamaLLM(model="llama3")
    
    # Create wrapper functions for tools
    def get_summary_wrapper(input_str=""):
        return get_summary(chunks)
    
    def extract_topics_wrapper(input_str=""):
        return extract_topics(chunks)
    
    def extract_quotes_wrapper(input_str=""):
        return extract_quotes(chunks)
    
    # Create tools with wrapper functions
    tools = [
        Tool(
            name="get_summary",
            func=get_summary_wrapper,
            description="Provides a detailed summary of the transcript."
        ),
        Tool(
            name="extract_topics",
            func=extract_topics_wrapper,
            description="Extracts main topics from the transcript."
        ),
        Tool(
            name="extract_quotes",
            func=extract_quotes_wrapper,
            description="Extracts important quotes from the transcript."
        )
    ]

    agent = initialize_agent(
        tools=tools,
        llm=llm,
        agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
        verbose=True
    )

    # Call agent with user query
    response = agent.invoke(input=query, handle_parsing_errors=True)

    return response


if __name__ == "__main__":
    # Initialize LLM model
    llm = OllamaLLM(model="llama3")

    # Example YouTube URL
    url = "https://www.youtube.com/watch?v=1aA1WGON49E"

    transcript_text = get_text_from_video(url)

    chunks = create_chunks(transcript_text)

    user_query = "Can you give me a summary of this video?"
    
    result = process_user_query(user_query, chunks, url)
    print(f"Response: {result}")
    



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mLet's get started.

Thought: The input is likely a transcript of a video, and we need to summarize it.

Action: get_summary

Action Input: [transcript of the video]
[0m
Observation: [36;1m[1;3mHere's a concise summary:

Victor Borges emphasizes the importance of tailoring presentations to appeal to online audiences, acknowledging that attention spans are short and fleeting. He highlights the need for concise and shareable content that can engage people on social media. Using humor, he illustrates his point with an inflation joke and concludes by sharing another joke about balloons being expensive due to "inflation" in a clever wrap-up of his own TED talk, which is promised to be under a minute long.[0m
Thought:[32;1m[1;3mThought: I understand that the goal is to provide a summary of the video transcript.

Action: get_summary

Action Input: [transcript of the video]
[0m
Observation: [36;1m[1;3mVictor Borges emphasizes