In [8]:
# STEP 1: Install required libraries
!pip install -U google-generativeai pytube

# STEP 2: Import required libraries
import google.generativeai as genai
from pytube import YouTube
import re

# STEP 3: Set your Gemini API key
GEMINI_API_KEY = "AIzaSyDnMG3av2_FdnX8oD6l7Zfd_SMAK2PwGVw"  # ← Replace with your API key from Google AI Studio
genai.configure(api_key=GEMINI_API_KEY)

# Optional: Print available models (for debugging)
models = genai.list_models()
for m in models:
    print(m.name)

# STEP 4: Extract transcript from YouTube
def get_youtube_transcript(url):
    try:
        yt = YouTube(url)
        caption = yt.captions.get_by_language_code('en')
        if not caption:
            print("No English captions found.")
            return None
        srt_captions = caption.generate_srt_captions()
        transcript = ""
        for line in srt_captions.split('\n'):
            if not line.isdigit() and '-->' not in line:
                transcript += line + " "
        return transcript.strip()
    except Exception as e:
        print(f"Error: {e}")
        return None

# STEP 5: Chunk the transcript (if long)
def chunk_transcript(text, max_len=1500, overlap=200):
    chunks = []
    start = 0
    while start < len(text):
        end = min(start + max_len, len(text))
        chunk = text[start:end]
        chunks.append(chunk)
        start += max_len - overlap
    return chunks

# STEP 6: Summarize each chunk using Gemini Pro
def summarize_with_gemini(text):
    try:
        model = genai.GenerativeModel("models/gemini-pro")
        response = model.generate_content(f"Please summarize this transcript:\n\n{text}")
        return response.text
    except Exception as e:
        return f"Error during summarization: {e}"

# STEP 7: Main pipeline to summarize YouTube video
def summarize_youtube_video(url):
    print("🔍 Fetching transcript...")
    transcript = get_youtube_transcript(url)
    if not transcript:
        return "Transcript not found or unavailable."

    print("📦 Chunking transcript...")
    chunks = chunk_transcript(transcript)

    print("🧠 Generating summary...")
    summaries = []
    for i, chunk in enumerate(chunks):
        print(f"→ Summarizing chunk {i+1}/{len(chunks)}")
        summary = summarize_with_gemini(chunk)
        summaries.append(summary)

    final_summary = "\n\n".join(summaries)
    return final_summary

# STEP 8: Example run
youtube_url = "https://www.youtube.com/watch?v=dQw4w9WgXcQ"  # Replace with any video URL with captions
summary = summarize_youtube_video(youtube_url)

print("\n✅ FINAL SUMMARY:\n")
print(summary)


models/chat-bison-001
models/text-bison-001
models/embedding-gecko-001
models/gemini-1.0-pro-vision-latest
models/gemini-pro-vision
models/gemini-1.5-pro-latest
models/gemini-1.5-pro-001
models/gemini-1.5-pro-002
models/gemini-1.5-pro
models/gemini-1.5-flash-latest
models/gemini-1.5-flash-001
models/gemini-1.5-flash-001-tuning
models/gemini-1.5-flash
models/gemini-1.5-flash-002
models/gemini-1.5-flash-8b
models/gemini-1.5-flash-8b-001
models/gemini-1.5-flash-8b-latest
models/gemini-1.5-flash-8b-exp-0827
models/gemini-1.5-flash-8b-exp-0924
models/gemini-2.5-pro-exp-03-25
models/gemini-2.5-pro-preview-03-25
models/gemini-2.0-flash-exp
models/gemini-2.0-flash
models/gemini-2.0-flash-001
models/gemini-2.0-flash-exp-image-generation
models/gemini-2.0-flash-lite-001
models/gemini-2.0-flash-lite
models/gemini-2.0-flash-lite-preview-02-05
models/gemini-2.0-flash-lite-preview
models/gemini-2.0-pro-exp
models/gemini-2.0-pro-exp-02-05
models/gemini-exp-1206
models/gemini-2.0-flash-thinking-exp-01

In [9]:
!pip install google-generativeai langchain faiss-cpu openai requests

!pip install google-generativeai langchain faiss-cpu openai requests youtube-transcript-api

!pip install -q langchain youtube-transcript-api gradio tiktoken faiss-cpu openai

import requests
import gradio as gr
from youtube_transcript_api import YouTubeTranscriptApi
import re

# 🔑 Paste your Gemini API key here
GEMINI_API_KEY = "AIzaSyDnMG3av2_FdnX8oD6l7Zfd_SMAK2PwGVw"

# Gemini API URL (for gemini-2.0-flash)
GEMINI_URL = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key={GEMINI_API_KEY}"

# ✂️ Helper to chunk text
def chunk_text(text, max_chars=1000, overlap=200):
    chunks = []
    start = 0
    while start < len(text):
        end = min(start + max_chars, len(text))
        chunks.append(text[start:end])
        start += max_chars - overlap
    return chunks

# 🧠 Call Gemini to summarize
def gemini_summarize(prompt):
    headers = {"Content-Type": "application/json"}
    data = {
        "contents": [
            {"role": "user", "parts": [{"text": prompt}]}
        ]
    }
    response = requests.post(GEMINI_URL, headers=headers, json=data)
    if response.status_code == 200:
        try:
            return response.json()['candidates'][0]['content']['parts'][0]['text']
        except:
            return "⚠️ Error parsing response"
    else:
        return f"❌ API Error: {response.status_code} - {response.text}"

# 🔁 Main summarizer
def summarize_youtube_video(video_url):
    try:
        # 1. Extract video ID from URL using regex
        match = re.search(r"(?:https?://(?:www\.)?youtube\.com(?:/[^/]+)*\?v=|youtu\.be/)([A-Za-z0-9_-]+)", video_url)
        if not match:
            return "❌ Error: Invalid YouTube URL"
        video_id = match.group(1)

        # 2. Extract transcript
        transcript = YouTubeTranscriptApi.get_transcript(video_id)
        full_text = " ".join([entry["text"] for entry in transcript])

        # 3. Chunk text
        chunks = chunk_text(full_text)

        # 4. Summarize each chunk with Gemini
        partial_summaries = []
        for i, chunk in enumerate(chunks):
            summary = gemini_summarize(f"Summarize this video transcript segment:\n\n{chunk}")
            partial_summaries.append(summary)

        # 5. Final summary from all chunks
        final_input = "Combine and summarize the following partial summaries:\n\n" + "\n\n".join(partial_summaries)
        final_summary = gemini_summarize(final_input)
        return final_summary

    except Exception as e:
        return f"❌ Error: {str(e)}"

# 🎛️ Gradio UI
gr.Interface(
    fn=summarize_youtube_video,
    inputs=gr.Textbox(label="YouTube Video URL"),
    outputs=gr.Textbox(label="Summary", lines=10),
    title="🎥 YouTube Summarizer with Gemini API",
    description="Paste a YouTube video URL with captions to generate a summary using Gemini 2.0 Flash."
).launch()

Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl (30.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/30.7 MB[0m [31m13.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.10.0
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.9/46.9 MB[0m [31m13.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m322.2/322.2 kB[0m [31m18.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m49.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.4/11.4 MB[0m [31m93.5 MB/s[0m eta [36m0:00:00[0m
[?25hIt looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Autom

