In [1]:
# ✅ Step 1: Install Dependencies
!pip install langchain langchain-community youtube-transcript-api google-generativeai faiss-cpu langchain-google-genai

# ✅ Step 2: Import Libraries
import os
import re
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
from langchain.chains import RetrievalQA
from langchain.vectorstores import FAISS
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain.text_splitter import RecursiveCharacterTextSplitter
import google.generativeai as genai

# ✅ Step 3: Set Your Gemini API Key
GOOGLE_API_KEY = "AIzaSyD2FpyhNgfXSl5kRgzx-AU2L8xd3At6TrE"  # 🔑 Replace with your actual Gemini 1.5 API key
os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY
genai.configure(api_key=GOOGLE_API_KEY)

# ✅ Step 4: Extract YouTube Video ID (supports youtube.com and youtu.be)
def extract_video_id(url):
    patterns = [
        r"(?:https?://)?(?:www\.)?youtube\.com/watch\?v=([^&]+)",
        r"(?:https?://)?(?:www\.)?youtu\.be/([^?&]+)"
    ]
    for pattern in patterns:
        match = re.search(pattern, url)
        if match:
            return match.group(1)
    return None

# ✅ Step 5: Get Transcript from YouTube
def get_transcript(youtube_url):
    try:
        video_id = extract_video_id(youtube_url)
        if not video_id:
            return None
        transcript = YouTubeTranscriptApi.get_transcript(video_id)
        full_text = " ".join([entry["text"] for entry in transcript])
        return full_text
    except (TranscriptsDisabled, NoTranscriptFound):
        return None
    except Exception as e:
        return f"❌ Error while fetching transcript: {str(e)}"

# ✅ Step 6: Split transcript into chunks
def split_text_into_docs(text):
    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
    docs = splitter.create_documents([text])
    return docs

# ✅ Step 7: Build RAG pipeline
def build_rag_chain(docs):
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
    vectorstore = FAISS.from_documents(docs, embeddings)
    retriever = vectorstore.as_retriever()

    llm = ChatGoogleGenerativeAI(model="models/gemini-1.5-pro-latest", temperature=0.2)  # Fixed model name

    rag_chain = RetrievalQA.from_chain_type(
        llm=llm,
        retriever=retriever,
        chain_type="stuff",
        return_source_documents=False
    )
    return rag_chain

# ✅ Step 8: Main Summary Function
def summarize_youtube_video(youtube_url):
    transcript = get_transcript(youtube_url)
    if not transcript:
        return "❌ Transcript not available or invalid video URL."
    if transcript.startswith("❌"):
        return transcript

    docs = split_text_into_docs(transcript)
    rag_chain = build_rag_chain(docs)

    query = "Give a clear and concise summary of this video."
    summary = rag_chain.run(query)
    return summary

# ✅ Step 9: Get user input and run the summarizer
video_url = input("Enter the YouTube video URL: ")  # User input for video URL
summary = summarize_youtube_video(video_url)

# Print the summary
print("\n🎯 Video Summary:\n")
print(summary)

Enter the YouTube video URL: https://youtu.be/VTs8wnMsh0k?si=mILQ1xCgjb5ilku5

🎯 Video Summary:

❌ Error while fetching transcript: 
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=VTs8wnMsh0k! This is most likely caused by:

YouTube is blocking requests from your IP. This usually is due to one of the following reasons:
- You have done too many requests and your IP has been blocked by YouTube
- You are doing requests from an IP belonging to a cloud provider (like AWS, Google Cloud Platform, Azure, etc.). Unfortunately, most IPs from cloud providers are blocked by YouTube.

There are two things you can do to work around this:
1. Use proxies to hide your IP address, as explained in the "Working around IP bans" section of the README (https://github.com/jdepoix/youtube-transcript-api?tab=readme-ov-file#working-around-ip-bans-requestblocked-or-ipblocked-exception).
2. (NOT RECOMMENDED) If you authenticate your requests using cookies, you will be able to continu