In [None]:
import os
os.environ["GOOGLE_API_KEY"] = "AIzaSyDfG6fw9kGpxby8X_xyew7dwhCPp0WVros"



# **Install libraries**



In [None]:
!pip install -q youtube-transcript-api langchain-community langchain-google-genai \
                faiss-cpu tiktoken python-dotenv

In [None]:
!pip install -q langchain-google-genai


In [None]:
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_community.vectorstores import FAISS
from langchain.prompts import PromptTemplate

## **Step 1 - Indexing (Document Ingestion)**

In [None]:
video_id = "Gfr50f6ZBvo" # only the ID, not full URL
try:
    api = YouTubeTranscriptApi()
    transcript = api.fetch(video_id, languages=["en"])

    full_text = " ".join([snippet.text for snippet in transcript.snippets])
    print(full_text)


except TranscriptsDisabled:
    print("No captions available for this video.")


## **Step 2 - Indexing (Text Splitting)**

In [None]:
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = splitter.create_documents([full_text])

In [None]:
len(chunks)

## **Step 3 - Indexing (Embedding Generation and Storing in Vector Store)**

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

In [None]:
vectore_store = FAISS.from_documents(chunks, embeddings)

In [None]:
vectore_store.index_to_docstore_id

In [None]:
vectore_store.get_by_ids(["09a68a2a-ec7a-41e2-81a2-cbcae6a832b3"])

## **Step 4 - Retrieval**

In [None]:
retriever = vectore_store.as_retriever(search_type="similarity", search_kwargs={"k": 4})

In [None]:
retriever

In [None]:
retriever.invoke("what is deepmind")

## **Step 5 - Augmentation**

In [None]:
prompt = PromptTemplate(
    template="""
      You are a helpoful assistant.
      Answer Only from the prvided trancript context.
      if the context is isufficient, just say you don't know.

      {context}
      Question: {question}
    """,
    input_variables=["context", "question"]
)

In [None]:
question = "is the topic of aliens dicussed in this video? if yes then what was discussed"
retriever_docs = retriever.invoke(question)

In [None]:
retriever_docs

In [None]:
context_text = "\n\n".join(doc.page_content for doc in retriever_docs)

In [None]:
final_prompt = prompt.invoke({"context": context_text, "question": question})

## **Step 6 Generation**

In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI
os.environ["GOOGLE_API_KEY"] = "AIzaSyDfG6fw9kGpxby8X_xyew7dwhCPp0WVros"
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash")

In [None]:
answer = llm.invoke(final_prompt)
answer.content

In [None]:
!pip -q install langchain-core


### **Building a Chian**

In [None]:
from langchain_core.runnables import RunnableParallel, RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser

In [None]:
def format_docs(retrieved_docs):
  context_text = "\n\n".join(doc.page_content for doc in retrieved_docs)
  return context_text

In [None]:
parallel_chain = RunnableParallel({
    'context': retriever | RunnableLambda(format_docs),
    'question': RunnablePassthrough()
})

In [None]:
parallel_chain.invoke("Who is Demis")

In [None]:
parser = StrOutputParser()

In [None]:
main_chain = parallel_chain | prompt | llm | parser

In [None]:
main_chain.invoke("can you summerize the video")