In [1]:
# !pip install gradio openai langchain langchain-community youtube_transcript_api tiktoken transformers langchain-ollama

# YouTube Summarizer by Case Done
- This app will get YouTube info and transcript, and allow you to summarize it.
- It is based on LangChain map-reduce method powered by Llama 3.2 via Ollama.
- Start by providing a valid YouTube URL in the textbox.

In [2]:
from youtube_transcript_api import YouTubeTranscriptApi
import re
from urllib.request import urlopen
import html
import gradio as gr
from langchain_core.documents import Document
from langchain_core.prompts import PromptTemplate
from langchain.chains.summarize import load_summarize_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_ollama import ChatOllama
import tiktoken
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
MAP_TEMPLATE_TXT = """Write a detail summary of this text section in bullet points. 
Use '-' for bullet points and answer only the bullet points.
Text:
{text}

SUMMARY:"""
    
COMBINE_TEMPLATE_TXT = """Combine these summaries into a final summary in bullet points.
Use '-' for bullet points and answer only the bullet points.
Text:
{text}

FINAL SUMMARY:"""

map_prompt_txt = MAP_TEMPLATE_TXT
combine_prompt_txt = COMBINE_TEMPLATE_TXT


QUESTION_TEMPLATE_TXT = """Write a detailed summary (in bullet points, using "-" for bullets) of the following:

"{text}"

CONCISE SUMMARY:"""

REFINE_TEMPLATE_TXT = """Your job is to produce a final summary in bullet points (using "-" for bullets).
You are provided an existing summary here:
<existing_summary>
{existing_answer}
</existing_summary>

You are provided new text.
<new_text>
{text}
</new_text>

Given the new text, refine the original summary.
If the context isn't useful, return the original summary. Answer your summary only, not other texts.
"""

refine_prompt_txt = REFINE_TEMPLATE_TXT
question_prompt_txt = QUESTION_TEMPLATE_TXT

# Configuration Settings

In [4]:
# with open("sample-text.txt", "r") as f:
#     transcript = f.read()

model = "llama3.2"
base_url = "http://localhost:11434"

chunk_size = 2000 # this is in tokens
overlap_size = 0 # this is in tokens
temperature = 0.5

mapreduce_num_predict = 512
map_num_predict = 512 # number of tokens to predict, Default: 128, -1 = infinite generation, -2 = fill context
combine_num_predict = 2048
refine_num_predict = 2048

global config
config = {}
config["model"] = model
config["base_url"] = base_url
config["chunk_size"] = chunk_size
config["overlap_size"] = overlap_size
config["temperature"] = temperature
config["mapreduce_num_predict"] = mapreduce_num_predict
config["map_num_predict"] = map_num_predict
config["combine_num_predict"] = combine_num_predict
config["refine_num_predict"] = refine_num_predict

global text_to_summarize
text_to_summarize = ""


# Helper functions

In [11]:
def get_youtube_info(url: str):
    """Get video title and description."""
    # try:
    video_id = extract_video_id(url)
    if not video_id:
        raise ValueError("Invalid YouTube URL")
        
    # Get video page content
    video_url = f"https://youtube.com/watch?v={video_id}"
    content = urlopen(video_url).read().decode('utf-8')
    
    # Extract title
    title_match = re.search(r'"title":"([^"]+)"', content)
    title = html.unescape(title_match.group(1)) if title_match else "Unknown Title"
    
    # Extract description
    desc_match = re.search(r'"description":{"simpleText":"([^"]+)"', content)
    description = html.unescape(desc_match.group(1)) if desc_match else "No description available"
    
    return title, description
    # except Exception as e:
    #     return {"title": "Error", "description": str(e)}
    
    
def extract_video_id(url):
    """Extract YouTube video ID from URL."""
    patterns = [
        r'(?:v=|\/)([0-9A-Za-z_-]{11}).*',
        r'(?:embed\/)([0-9A-Za-z_-]{11})',
        r'(?:youtu\.be\/)([0-9A-Za-z_-]{11})'
    ]
    
    for pattern in patterns:
        match = re.search(pattern, url)
        if match:
            return match.group(1)
    return None

def get_text_splitter(chunk_size: int, overlap_size: int):
    return RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=chunk_size, chunk_overlap=overlap_size)

def convert_text_to_tokens(text, encoder="gpt-3.5-turbo"):
    enc = tiktoken.encoding_for_model(encoder)
    return enc.encode(text)

def get_larger_context_size(token_count):
    num_ctxes = [1024*i for i in range(1, 100)]
    num_ctx = next(ctx for ctx in num_ctxes if ctx > token_count) # pick the first context size that is greater than the token counts
    return num_ctx


def get_youtube_transcript(url):
    """
    Extract transcript from a YouTube video URL.
    
    Args:
        url (str): YouTube video URL
        
    Returns:
        str: Full transcript text
    """
    try:
        # Extract video ID from URL
        video_id = extract_video_id(url)
        if not video_id:
            raise ValueError("Invalid YouTube URL")
            
        # Get transcript
        transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
        
        # Combine transcript pieces
        full_transcript = ' '.join(entry['text'] for entry in transcript_list)

        enc = tiktoken.encoding_for_model("gpt-4")
        count = len(enc.encode(full_transcript))
        
        return full_transcript, count
        
    except Exception as e:
        return f"Error: {str(e)}", 0
    
    
def get_llm(model: str, base_url: str, temperature: float, num_ctx: int=2048, num_predict: int=256):
    llm = ChatOllama(
        model=model,
        base_url=base_url,
        temperature=temperature,
        num_ctx=num_ctx,
        num_predict=num_predict
    )
    return llm


def convert_text_to_split_docs(text, chunk_size, overlap_size):
    docs = [Document(
        page_content=text,
        # metadata={"source": url}
    )]
    text_splitter = get_text_splitter(chunk_size=chunk_size, overlap_size=overlap_size)
    split_docs = text_splitter.split_documents(docs)
    return split_docs
    
    
def get_summary_map_reduce_langchain(text_to_summarize: str, map_prompt_txt: str, combine_prompt_text:str):

    global config
    chunk_size = config["chunk_size"]
    overlap_size = config["overlap_size"]
    model = config["model"]
    base_url = config["base_url"]
    temperature = config["temperature"]
    mapreduce_num_predict = config["mapreduce_num_predict"]
    
    # transcript, tokencount = get_youtube_transcript(url)
    split_docs = convert_text_to_split_docs(text_to_summarize, chunk_size, overlap_size)
    
    tokens = (mapreduce_num_predict+chunk_size)
    num_ctx = get_larger_context_size(tokens)
    llm = get_llm(model, base_url, temperature, num_predict=mapreduce_num_predict, num_ctx=num_ctx, verbose=True)

    map_prompt = PromptTemplate(
        template=map_prompt_txt,
        input_variables=["text"]
    )

    combine_prompt = PromptTemplate(
        template=combine_prompt_text,
        input_variables=["text"]
    )
    
    chain = load_summarize_chain(llm, 
                                 chain_type="map_reduce",
                                 map_prompt=map_prompt,
                                 combine_prompt=combine_prompt,
                                 verbose=True
                                 )
    
    output = chain.invoke(split_docs)
    
    return output['output_text']

def get_summary_map_reduce_manual(text_to_summarize: str, map_prompt_txt: str, combine_prompt_text:str):

    global config
    chunk_size = config["chunk_size"]
    overlap_size = config["overlap_size"]
    model = config["model"]
    base_url = config["base_url"]
    temperature = config["temperature"]
    map_num_predict = config["map_num_predict"]
    combine_num_predict = config["combine_num_predict"]
    
    split_docs = convert_text_to_split_docs(text_to_summarize, chunk_size, overlap_size)
    
    map_prompt = PromptTemplate(
        template=map_prompt_txt,
        input_variables=["text"]
    )

    combine_prompt = PromptTemplate(
        template=combine_prompt_text,
        input_variables=["text"]
    )

    map_num_ctx = get_larger_context_size(map_num_predict+chunk_size)
    
    llm_map = ChatOllama(
        model=model,
        base_url=base_url,
        temperature=temperature,
        num_ctx=map_num_ctx,
        num_predict=map_num_predict,
        format='',
        verbose=True
    )
    
    summaries = []

    for i, splic_doc in enumerate(tqdm(split_docs, desc="Mapping...")):
        full_prompt = map_prompt.format_prompt(text=splic_doc.page_content)
        output = llm_map.invoke(full_prompt.text)
        summaries.append(output.content)
    
    combined_summaries = "\n".join(summaries)

    full_prompt = combine_prompt.format_prompt(text=combined_summaries)

    token_counts = len(convert_text_to_tokens(full_prompt.text))

    combine_num_ctx = get_larger_context_size(token_counts+combine_num_predict)

    llm_combine = ChatOllama(
        model=model,
        base_url=base_url,
        temperature=temperature,
        num_ctx=combine_num_ctx,
        num_predict=combine_num_predict,
        format='',
        verbose=True
    )

    output_comb = llm_combine.invoke(full_prompt.text)

    return output_comb.content

def get_summary_refine_langchain(text_to_summarize: str, refine_prompt_txt: str, question_prompt_text:str):

    global config
    chunk_size = config["chunk_size"]
    overlap_size = config["overlap_size"]
    model = config["model"]
    base_url = config["base_url"]
    temperature = config["temperature"]
    refine_num_predict = config["refine_num_predict"]
    
    split_docs = convert_text_to_split_docs(text_to_summarize, chunk_size, overlap_size)
    
    refine_prompt = PromptTemplate.from_template(
        template=refine_prompt_txt,
        # input_variables=["text", "existing_answer"]
    )

    question_prompt = PromptTemplate.from_template(
        template=question_prompt_text,
        # input_variables=["text"]
    )

    num_ctx = get_larger_context_size(int(2*(refine_num_predict+chunk_size)))
    
    llm = ChatOllama(
        model=model,
        base_url=base_url,
        temperature=temperature,
        num_ctx=num_ctx,
        num_predict=refine_num_predict,
        format='',
        verbose=True
    )
    
    chain = load_summarize_chain(
        llm, 
        chain_type="refine",
        question_prompt=question_prompt,
        refine_prompt=refine_prompt,
        document_variable_name="text", 
        initial_response_name="existing_answer",
        verbose=True
    )

    output_comb = chain.invoke(split_docs)

    return output_comb['output_text']

def get_summary():
    return "yo yo"

In [12]:
# for debugging
with open("sample-text.txt", "r") as f:
    text_to_summarize = f.read()

In [13]:
try:
    demo.close()
except:
    pass


with gr.Blocks() as demo:
    gr.Markdown("""# YouTube Summarizer by Case Done
- This app will get YouTube info and transcript, and allow you to summarize it.
- It is based on LangChain map-reduce method powered by Llama 3.2 via Ollama.
- Start by providing a valid YouTube URL in the textbox.
                """)
    
    with gr.Row():
        with gr.Column(scale=4):
            pass
        with gr.Column(scale=1, min_width=25):
            bttn_clear = gr.ClearButton(interactive=True, variant='stop')
    
    with gr.Tab(label="YouTube") as tab1:
        
        gr.Markdown("## Input YouTube Link Here:")
        url = gr.Textbox(label='YouTube URL', value="https://youtu.be/bvPDQ4-0LAQ")
        
        gr.Markdown("## YouTube Info")
        with gr.Row(equal_height=False):
            with gr.Column(scale=4):
                with gr.Accordion("YouTube Information"):
                    title = gr.Textbox(label='Title', lines=2, max_lines=5, show_copy_button=True)
                    desc = gr.Textbox(label='Description', lines=10, max_lines=20, 
                                      autoscroll=False, show_copy_button=True)
            with gr.Column(scale=1, min_width=25):
                bttn_info_get = gr.Button('Get Info', variant='primary', )
        
        gr.Markdown("## Transcript")
        with gr.Row(equal_height=False):              
            with gr.Column(scale=4):
                trns_raw = gr.Textbox(label='Transcript', show_copy_button=True, autoscroll=True,
                                      lines=10, max_lines=500,
                                      value=text_to_summarize[:2000],
                                      interactive=True)
            with gr.Column(scale=1, min_width=25):
                bttn_trns_get = gr.Button("Get Transcript", variant='primary')
                tkncount = gr.Number(label='Token Count (~)', interactive=False)
        
    with gr.Tab(label="Summarize") as tab2:
        gr.Markdown("## Model Parameters")
        with gr.Group():
            with gr.Row():
                with gr.Column(scale=1, min_width=100):
                    model = gr.Dropdown(choices=['llama3.2'], value='llama3.2', label='Ollama models', interactive=True)
                with gr.Column(scale=1, min_width=100):
                    base_url = gr.Textbox(label='Base URL', value='http://localhost:11434', interactive=True)
                with gr.Column(scale=1, min_width=100):
                    temperature = gr.Number(label='Temperature', minimum=0.0, step=0.01, precision=-2)

        gr.Markdown("## Text Splitting Parameters")
        # with gr.Accordion(label='Text Splitting Parameters', open=False):
        with gr.Group():
            with gr.Row():
                with gr.Column(scale=1, min_width=100):
                    chunk = gr.Number(label='Chunk Size', minimum=200, step=100, value=2000)
                with gr.Column(scale=1, min_width=100):
                    overlap = gr.Number(label='Overlap Size', minimum=0, step=10, value=0)

        gr.Markdown("## Approaches")
        # with gr.Tabs() as tabs:
        with gr.Tab(label="Map-Reduce LangChain") as tab_mrlc:
            with gr.Row():
                with gr.Column(scale=1, min_width=25):
                    mapreduce_num_predict = gr.Number(label='Number of tokens to predict', minimum=128, step=128, value=config["mapreduce_num_predict"], interactive=True)
                with gr.Column(scale=4):
                    with gr.Accordion(label="Prompt Templates", open=False):
                        map_prompt_txt_mrlc = gr.Textbox(label="Prompt for the mapping step", value=MAP_TEMPLATE_TXT,
                                                        lines=10, max_lines=50, show_copy_button=True, interactive=True)
                        combine_prompt_txt_mrlc = gr.Textbox(label="Prompt for the combine step", value=COMBINE_TEMPLATE_TXT,
                                                        lines=10, max_lines=50, show_copy_button=True, interactive=True)
            with gr.Row():
                with gr.Column(scale=1, min_width=25):
                    bttn_summ_mrlc = gr.Button("Summarize with Map-Reduce LangChain", variant='primary')
        with gr.Tab(label="Map-Reduce Manual") as tab_mrmn:
            with gr.Row():
                with gr.Column(scale=1, min_width=25):
                    map_num_predict = gr.Number(label='Number of tokens to predict at mapping step', minimum=128, step=128, value=config["map_num_predict"], interactive=True)
                with gr.Column(scale=1, min_width=25):
                    combine_num_predict = gr.Number(label='Number of tokens to predict at refine step', minimum=128, step=128, value=config["combine_num_predict"], interactive=True)
                with gr.Column(scale=4):
                    with gr.Accordion(label="Prompt Templates", open=False):
                        map_prompt_txt_mrmn = gr.Textbox(label="Prompt for the mapping step", value=MAP_TEMPLATE_TXT,
                                                        lines=10, max_lines=50, show_copy_button=True)
                        combine_prompt_txt_mrmn = gr.Textbox(label="Prompt for the combine step", value=COMBINE_TEMPLATE_TXT,
                                                        lines=10, max_lines=50, show_copy_button=True)
            with gr.Row():
                with gr.Column(scale=1, min_width=25):
                    bttn_summ_mrmn = gr.Button("Summarize with Map-Reduce Manual", variant='primary')
        with gr.Tab(label="Refine LangChain") as tab_rflc:
            with gr.Row():
                with gr.Column(scale=1, min_width=25):
                    refine_num_predict = gr.Number(label='Number of tokens to predict', minimum=128, step=128, value=config["refine_num_predict"], interactive=True)
                with gr.Column(scale=4):
                    with gr.Accordion(label="Prompt Templates", open=False):
                        question_prompt_txt_rl = gr.Textbox(label="Prompt for the each split doc", value=QUESTION_TEMPLATE_TXT,
                                                        lines=10, max_lines=50, show_copy_button=True, interactive=True)
                        refine_prompt_txt_rl = gr.Textbox(label="Prompt for the refine step", value=REFINE_TEMPLATE_TXT,
                                                        lines=10, max_lines=50, show_copy_button=True, interactive=True)
            with gr.Row():
                with gr.Column(scale=1, min_width=25):
                    bttn_summ_rflc = gr.Button("Summarize with Refine LangChain", variant='primary')
                    
                
        gr.Markdown("## Summary")
        with gr.Row():
            with gr.Column() as r3c2:
                    trns_sum = gr.Textbox(label="Summary", show_copy_button=True)

    ### events

                    
    bttn_info_get.click(fn=get_youtube_info,
                        inputs=url,
                        outputs=[title, desc],
                        api_name="get_youtube_info"
                        )
        
    bttn_trns_get.click(fn=get_youtube_transcript,
                        inputs=url,
                        outputs=[trns_raw, tkncount]
                        )

    bttn_summ_mrmn.click(fn=get_summary_map_reduce_manual,
    inputs=[trns_raw, map_prompt_txt_mrmn, combine_prompt_txt_mrmn],
    outputs=trns_sum
    )

    bttn_summ_mrlc.click(fn=get_summary_map_reduce_langchain,
    inputs=[trns_raw, map_prompt_txt_mrlc, combine_prompt_txt_mrlc],
    outputs=trns_sum
    )


    bttn_summ_rflc.click(fn=get_summary_refine_langchain,
    inputs=[trns_raw, refine_prompt_txt_rl, question_prompt_txt_rl],
    outputs=trns_sum
    )
    
    bttn_clear.add([url, title, desc, trns_raw, trns_sum, tkncount])

demo.launch(share=False)

Closing server running on port: 7860
* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Mapping...: 100%|██████████| 1/1 [00:02<00:00,  2.51s/it]




[1m> Entering new RefineDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mWrite a detailed summary (in bullet points, using "-" for bullets) of the following:

"think it's possible that physics has exploits and we should be trying to find them arranging some kind of a crazy quantum mechanical system that somehow gives you buffer overflow somehow gives you a rounding error in the floating Point synthetic intelligences are kind of like the next stage of development and I don't know where it leads to like at some point I suspect the universe is some kind of a puzzle these synthetic AIS will uncover that puzzle and solve it the following is a conversation with Andre capothy previously the director of AI at Tesla and before that at open Ai and Stanford he is one of the greatest scientists engineers and Educators in the history of artificial intelligence this is the Lex Friedman podcast to support it please check out our sponsors 