<a href="https://colab.research.google.com/github/axen1/E-ticaret-e-commerce/blob/master/YouTube_Video_Summarizer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# First, install required packages
!pip install -q youtube-transcript-api transformers torch gradio

import gradio as gr
from youtube_transcript_api import YouTubeTranscriptApi
from transformers import pipeline
import re

def get_video_id(url):
    """Extract video ID from various YouTube URL formats"""
    patterns = [
        r'(?:v=|\/)([0-9A-Za-z_-]{11}).*',
        r'(?:embed\/)([0-9A-Za-z_-]{11})',
        r'(?:youtu.be\/)([0-9A-Za-z_-]{11})'
    ]

    for pattern in patterns:
        match = re.search(pattern, url)
        if match:
            return match.group(1)
    return None

def summarize_video(url, progress=gr.Progress()):
    try:
        # Initialize the summarizer
        progress(0.1, desc="Loading summarization model...")
        summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

        # Get video ID
        video_id = get_video_id(url)
        if not video_id:
            return "Error: Invalid YouTube URL. Please check the URL and try again."

        # Get transcript
        progress(0.3, desc="Fetching video transcript...")
        try:
            transcript = YouTubeTranscriptApi.get_transcript(video_id)
            text = " ".join([t["text"] for t in transcript])
        except Exception as e:
            return f"Error: Could not fetch video transcript. Make sure the video has English subtitles enabled. Error details: {str(e)}"

        # Clean transcript
        progress(0.4, desc="Processing transcript...")
        text = re.sub(r'\[.*?\]', '', text)
        text = re.sub(r'\n', ' ', text)
        text = ' '.join(text.split())

        # Split into chunks (BART has a max input length)
        max_chunk = 1000
        chunks = [text[i:i + max_chunk] for i in range(0, len(text), max_chunk)]

        # Summarize each chunk
        progress(0.6, desc="Generating summary...")
        summaries = []
        for i, chunk in enumerate(chunks):
            summary = summarizer(chunk, max_length=150, min_length=40, do_sample=False)
            summaries.append(summary[0]['summary_text'])
            progress(0.6 + (0.3 * (i + 1) / len(chunks)))

        # Combine summaries
        final_summary = " ".join(summaries)

        # Extract key points (sentences)
        sentences = [s.strip() for s in final_summary.split('.') if s.strip()]
        key_points = sentences[:5]  # Take up to 5 key points

        # Format output
        progress(0.9, desc="Formatting output...")
        output = f"""# Video Summary

## Executive Summary
{final_summary}

## Key Points
"""
        for i, point in enumerate(key_points, 1):
            output += f"{i}. {point}.\n"

        output += f"""
## Conclusion
{sentences[-1] if sentences else 'No conclusion available.'}"""

        progress(1.0, desc="Done!")
        return output

    except Exception as e:
        return f"An error occurred: {str(e)}\nPlease try again with a different video."

# Create and launch the interface
iface = gr.Interface(
    fn=summarize_video,
    inputs=gr.Textbox(
        label="YouTube URL",
        placeholder="Paste YouTube URL here...",
        info="Note: Video must have English subtitles/captions available"
    ),
    outputs=gr.Markdown(label="Summary"),
    title="YouTube Video Summarizer",
    description="Get a quick summary of any YouTube video with English subtitles.",
    examples=[
        ["https://www.youtube.com/watch?v=SZnyZONfIZ8"]
    ],
    theme=gr.themes.Soft()
)

iface.launch(share=True, debug=True)

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://d432cda05465ce34eb.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Your max_length is set to 130, but your input_length is only 106. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=53)
Your max_length is set to 150, but your input_length is only 99. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=49)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://ce4dc8c625532e719d.gradio.live
Killing tunnel 127.0.0.1:7861 <> https://d432cda05465ce34eb.gradio.live


