<a href="https://colab.research.google.com/github/Vijaykumar-VT/SDC_GENAI/blob/main/youtube.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Auto-install missing packages
import os

try:
    from youtube_transcript_api import YouTubeTranscriptApi
except ImportError:
    os.system("pip install youtube-transcript-api")

try:
    from langchain_community.vectorstores import FAISS
    from langchain_community.embeddings import OpenAIEmbeddings
    from langchain.text_splitter import RecursiveCharacterTextSplitter
    from langchain.chat_models import ChatOpenAI
    from langchain.chains import RetrievalQA
except ImportError:
    os.system("pip install -U langchain langchain-community faiss-cpu openai")

# Re-import after installation
from youtube_transcript_api import YouTubeTranscriptApi
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA

# Set your OpenAI API key
os.environ["OPENAI_API_KEY"] = "your-openai-api-key"  # Replace with your actual key

def get_transcript(video_url):
    try:
        video_id = video_url.split("v=")[-1]
        transcript = YouTubeTranscriptApi.get_transcript(video_id)
        return " ".join([entry["text"] for entry in transcript])
    except Exception as e:
        return f"❌ Error retrieving transcript: {e}"

def split_text(text):
    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
    return splitter.split_text(text)

def create_vectorstore(chunks):
    embeddings = OpenAIEmbeddings()
    return FAISS.from_texts(chunks, embeddings)

def create_rag_chain(vstore):
    retriever = vstore.as_retriever()
    llm = ChatOpenAI(temperature=0)
    return RetrievalQA.from_chain_type(llm=llm, retriever=retriever)

def summarize_youtube_video(url):
    transcript = get_transcript(url)
    if transcript.startswith("❌"):
        return transcript
    chunks = split_text(transcript)
    vstore = create_vectorstore(chunks)
    qa = create_rag_chain(vstore)
    return qa.run("Summarize this YouTube video in simple terms.")

# === Run it ===
if __name__ == "__main__":
    url = input("🎥 Enter YouTube video URL: ")
    summary = summarize_youtube_video(url)
    print("\n📄 Summary:\n", summary)
