In [None]:
from langchain_community.document_loaders import YoutubeLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    SystemMessagePromptTemplate,
)
from langchain_ollama import ChatOllama
from langchain_text_splitters import RecursiveCharacterTextSplitter

from .utils._logger import logger
from .utils._validation import config_args

In [None]:
from typing import LiteralString

from langchain_core.prompts.chat import (
    HumanMessagePromptTemplate,
    SystemMessagePromptTemplate,
)


def extract_and_summarize_youtube_content(video_url) -> None | str:
    """
    Extracts text from a YouTube video and generates a short summary using Ollama.

    Args:
        video_url (str): The URL of the YouTube video.

    Returns:
        str: A short summary of the video content.  Returns None if an error occurs.
    """
    try:
        # Load the YouTube document
        loader = YoutubeLoader.from_youtube_url(video_url, add_video_info=False)
        docs = loader.load()

        if not docs:
            logger.warning(f"No documents loaded from {video_url}")
            return None

        doc = docs[0]  # Assuming only one document is loaded

        # Split the text into chunks
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000, chunk_overlap=100
        )
        chunks = text_splitter.split_text(doc.page_content)

        system: SystemMessagePromptTemplate = SystemMessagePromptTemplate.from_template(
            """You are a helpful AI assistant specializing in concise summarization of text content."""
        )
        prompt = """Create a short summarization of the provided context!
            ### Context:
            {context}

            ### Answer:"""

        llm = ChatOllama(model=config_args.model, base_url="url")
        prompt = HumanMessagePromptTemplate.from_template(prompt)
        template = ChatPromptTemplate(messages=[system, prompt])
        qna_chain = template | llm | StrOutputParser()

        sums = []
        for chunk in chunks:
            sum = qna_chain.invoke({"context": chunk})
            sums.append(sum)

        # Combine summaries and final prompt
        sums_join: LiteralString = ". ".join([sum.split("\n\n")[-1] for sum in sums])
        result: str = qna_chain.invoke({"context": sums_join})
        return result.strip()

    except Exception as e:
        logger.error(f"An error occurred: {e}")
        return None

In [None]:
print(extract_and_summarize_youtube_content("url"))