In [84]:
from youtube_transcript_api import  YouTubeTranscriptApi,YouTubeRequestFailed
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_google_genai import GoogleGenerativeAI,GoogleGenerativeAIEmbeddings
from langchain.prompts import PromptTemplate
from langchain_core.runnables import RunnableParallel,RunnableLambda,RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
import os
from dotenv import load_dotenv
load_dotenv()

True

In [28]:
def get_transcript(video_id:list) -> str:
    try:
        transcript_LIST=YouTubeTranscriptApi.get_transcript(video_id=video_id,languages=['hi','en'])
        transcript="".join([item['text'] for item in transcript_LIST])
        return transcript
    except YouTubeRequestFailed as e:
        print(f"An error occurred: {e}")

In [91]:
splitted=get_transcript('3Cv4Vhb8msI')

In [92]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks=text_splitter.create_documents([splitted])

In [93]:
embeddings= GoogleGenerativeAIEmbeddings(google_api_key=os.getenv('GOOGLE_API'),model="models/embedding-001")
vector_store=FAISS.from_documents(embedding=embeddings,documents=chunks)

In [94]:
vector_store.index_to_docstore_id

{0: '3ae05e27-d1af-4e48-ba9f-1074bf1f6053',
 1: 'da2378ed-5a6a-4e3c-94b7-3dea989fc527',
 2: '883855ff-020e-4b99-b6d8-888899d08b2e',
 3: '61dd61ed-e50c-45a5-98f7-06a3a9b87e95',
 4: '9d1670c2-0488-4b09-80b6-d4030735dd8c',
 5: '1aebe88d-b86c-4717-b8e4-2a30d94cfa4e',
 6: 'b1be7cb4-023f-4144-98ed-f93f6d603ae0',
 7: 'bf73170a-a046-4435-bfe2-d23e37752467',
 8: 'a4b44d69-d36e-4bf7-a985-9b35bc41d38e',
 9: '7c557650-9132-40c4-a6cd-6f5d12905f6f',
 10: '90201a60-0bf9-4e57-90df-9b1cd659028f',
 11: 'ff09239f-b552-4f2b-9193-5c345508718b',
 12: 'f7d0323a-22be-41df-a8c2-dbf37efb1cd0',
 13: '00b5e601-2682-476d-ae6d-2168b3fc4433',
 14: '52fdd52a-6896-46f6-8e1d-e885e1a4bcba',
 15: 'd0b5b2e0-1131-4444-ba03-e91c20ffc254',
 16: 'ab355224-f583-4b60-a57a-f9bd5b1383c6',
 17: 'f749f5ff-a6c3-429c-8cd7-11eedacb055d',
 18: 'f24da694-3bd6-4907-8d06-b88768448d5b',
 19: '573afaa2-b43d-4586-b958-e3132311482a'}

In [95]:
vector_store.get_by_ids(['d533610c-3659-410e-b164-192fa0327f3e'])

[]

In [96]:
retriever=vector_store.as_retriever(search_type="mmr",search_kwargs={"k":2})

In [97]:
retriever.invoke('whats deepmind doing' )[1]

Document(id='573afaa2-b43d-4586-b958-e3132311482a', metadata={}, page_content='estate in the heart ofAtlanta has so much to offer from anindoor basketball court to a full-on mancave. However, the house was prettydirty. So, if I were to live here, Iwould definitely need a deep clean,specifically from 21 Savage. 21, can youclean the house for me?And then I would consider living there.Two stars.')

In [98]:
llm=GoogleGenerativeAI(google_api_key=os.getenv('GOOGLE_API'),model='gemini-2.5-flash',temperature=0.2)

In [99]:
prompt = PromptTemplate(
    template="""
      You are a helpful assistant.
      Answer ONLY from the provided transcript context.
      If the context is insufficient, just say you don't know.

      {context}
      Question: {question}
    """,
    input_variables = ['context', 'question']
)


In [101]:
def format_context(response):
        contxt='\n\n'.join([doc.page_content for doc in response])
        return contxt

In [102]:
final_pro=prompt.invoke(input={'context':contxt,'question':query})

In [103]:
answer=llm.invoke(final_pro)
print(answer)

I cannot answer your question as the question itself is missing. Please provide the question you would like me to answer based on the transcript.


In [104]:
parallel_chain=RunnableParallel(
    {'context': retriever| RunnableLambda(format_context),'question':RunnablePassthrough()}
)
parser=StrOutputParser()

In [109]:
main_chain=parallel_chain|prompt|llm|parser
main_chain.invoke('how many times she says hii' )


'Based on the provided transcript, she says "hii" one time:\n\n"Hi, hungry. I\'m Lexi."'