In [93]:
import os
from dotenv import load_dotenv
load_dotenv()



True

In [56]:
%pip install youtube-transcript-api


Note: you may need to restart the kernel to use updated packages.


In [73]:
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_openai import ChatOpenAI
from langchain_core.prompts import PromptTemplate

In [74]:
video_id = "LPZh9BOjkQs"
api = YouTubeTranscriptApi()
try:
    transcript_list = api.fetch(video_id, languages = ["en"])
    transcript = " ".join(chunk.text for chunk in transcript_list)
except TranscriptsDisabled:
    print("No captions available here")
    

In [75]:
# import youtube_transcript_api
# print(youtube_transcript_api.__file__)

In [76]:
# %pip show youtube_transcript_api

In [77]:
# dir(YouTubeTranscriptApi)

In [78]:
transcript_list

FetchedTranscript(snippets=[FetchedTranscriptSnippet(text='Imagine you happen across a short movie script that', start=1.14, duration=2.836), FetchedTranscriptSnippet(text='describes a scene between a person and their AI assistant.', start=3.976, duration=3.164), FetchedTranscriptSnippet(text="The script has what the person asks the AI, but the AI's response has been torn off.", start=7.48, duration=5.58), FetchedTranscriptSnippet(text='Suppose you also have this powerful magical machine that can take', start=13.06, duration=3.92), FetchedTranscriptSnippet(text='any text and provide a sensible prediction of what word comes next.', start=16.98, duration=3.98), FetchedTranscriptSnippet(text='You could then finish the script by feeding in what you have to the machine,', start=21.5, duration=4.006), FetchedTranscriptSnippet(text="seeing what it would predict to start the AI's answer,", start=25.506, duration=2.862), FetchedTranscriptSnippet(text='and then repeating this over and over with 

In [79]:
splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=10)
chunks = splitter.create_documents([transcript])

In [80]:
len(chunks)

85

In [81]:
chunks[0]

Document(metadata={}, page_content='Imagine you happen across a short movie script that describes a scene between a person and their AI')

In [66]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
embeddings = GoogleGenerativeAIEmbeddings(
    model = 'gemini-embedding-001'
)
vector_store = FAISS.from_documents(chunks, embeddings)

In [82]:
vector_store.index_to_docstore_id

{0: '4b628bd0-7352-437b-bf22-b1d14329247d',
 1: '0c8b6cb1-6153-48f2-a0df-6fd32346001f',
 2: '53dc211b-b6e5-4dcf-9d6d-06c760085218',
 3: 'df0517bb-0556-4f55-be38-958d0ae9c830',
 4: '3d895249-73a5-45c8-9f7f-6b5a52684399',
 5: '9de5c074-207c-451d-a19f-179839141db6',
 6: '2b0785a6-74a9-4333-8175-16c0d3e9c357',
 7: '18bfd52c-c9c7-455d-837d-d797aeccf37e',
 8: '815c1748-6a31-46ef-be6e-63c4b7c8e5e4',
 9: 'ebfdb1ee-6011-44f8-9df6-b63737558ac8',
 10: 'b1f7a135-9840-41cf-8051-8bdae26ded55',
 11: 'b7a20fe5-a910-46d2-8b23-3cf077ab9ccc',
 12: '3e37b680-3ad9-45ed-a29d-c20bee086d17',
 13: '8e253b46-28f5-4750-b938-01a0ebb3b18a',
 14: '20035df5-6fc5-494f-9c1d-ea79cc580762',
 15: 'f65f422c-535c-4819-9d89-6a5dd47cd3e7',
 16: 'c841569b-75a5-44cd-8916-22b4ec4eda14',
 17: 'ede3c742-7824-4172-b5b1-60ff2e710c3e',
 18: '745504a9-aa21-4e8a-b86e-aab50e08ffb0',
 19: 'f3947215-c3e2-4aab-acfe-e4684b7a38bc',
 20: '21978bab-e2ed-4212-8b1a-38755dacc154',
 21: 'd73461a5-4423-4214-a81e-b7b5533f16f9',
 22: '4a0b579c-fcdc-

In [83]:
retriever = vector_store.as_retriever(search_type = "similarity", search_kwargs={"k":4})

In [84]:
retriever

VectorStoreRetriever(tags=['FAISS', 'GoogleGenerativeAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x0000029F0A3F5A90>, search_kwargs={'k': 4})

In [85]:
retriever.invoke("what is LLM")

[Document(id='18bfd52c-c9c7-455d-837d-d797aeccf37e', metadata={}, page_content='language model is a sophisticated mathematical function that predicts what word comes next for any'),
 Document(id='2b0785a6-74a9-4333-8175-16c0d3e9c357', metadata={}, page_content="the dialogue. When you interact with a chatbot, this is exactly what's happening. A large language"),
 Document(id='5eb8a396-2766-48d2-bea9-15b4be4c532b', metadata={}, page_content='large in large language model is how they can have hundreds of billions of these parameters. No'),
 Document(id='815c1748-6a31-46ef-be6e-63c4b7c8e5e4', metadata={}, page_content='for any piece of text. Instead of predicting one word with certainty, though, what it does is')]

In [95]:

llm =  ChatOpenAI(
    model = "gpt-4o-mini",
    api_key= os.getenv("OPENROUTER_API_KEY"),
    base_url=os.getenv("OPENAI_API_BASE"),
    temperature=0.2
)

In [96]:
prompt=  PromptTemplate(
    template="""
    You are a helpful assisstant.
    Answer Only from the provided transcript context.
    If the context is insufficient, just say you don't know.
    {context}
    Question: {question}
""",
input_variables=['context', 'question']
)

In [97]:
question = " Is the topic of LLM discussed in this video, if yes then what was discussed"
retrieved_docs = retriever.invoke(question)

In [98]:
context_text = "\n\n".join(doc.page_content for doc in retrieved_docs)

In [99]:
final_prompt = prompt.invoke({"context":context_text, "question":question})

In [100]:
final_prompt

StringPromptValue(text="\n    You are a helpful assisstant.\n    Answer Only from the provided transcript context.\n    If the context is insufficient, just say you don't know.\n    Also, on my second channel I just posted a talk I gave a couple months ago about this topic for the\n\ndo all of the operations involved in training the largest language models? Do you think it would\n\nthe dialogue. When you interact with a chatbot, this is exactly what's happening. A large language\n\nlarge in large language model is how they can have hundreds of billions of these parameters. No\n    Question:  Is the topic of LLM discussed in this video, if yes then what was discussed\n")

In [101]:


answer = llm.invoke(final_prompt)
print(answer.content)



Yes, the topic of large language models (LLMs) is discussed in the video. It mentions the operations involved in training the largest language models and highlights that these models can have hundreds of billions of parameters.


In [102]:
from langchain_core.runnables import RunnableParallel, RunnableLambda, RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

In [103]:
def format_docs(retrieved_docs):
    context_text = "\n\n".join(doc.page_content for doc in retrieved_docs)
    return context_text

In [105]:
parallel_chain = RunnableParallel({
    'context': retriever | RunnableLambda(format_docs),
    'question': RunnablePassthrough()
})

In [106]:
parallel_chain.invoke("what is LLM")

{'context': "language model is a sophisticated mathematical function that predicts what word comes next for any\n\nthe dialogue. When you interact with a chatbot, this is exactly what's happening. A large language\n\nlarge in large language model is how they can have hundreds of billions of these parameters. No\n\nfor any piece of text. Instead of predicting one word with certainty, though, what it does is",
 'question': 'what is LLM'}

In [107]:
parser = StrOutputParser()
main_chain = parallel_chain | prompt | llm | parser
main_chain.invoke("Summarize this video")

"I don't know."