In [44]:
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate


In [45]:
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
from langchain.embeddings import HuggingFaceEmbeddings

In [46]:
video_id = extract_youtube_video_id("https://www.youtube.com/watch?v=BwziJOvqFE0")
print(video_id)

BwziJOvqFE0


In [47]:
try:
    # If you don’t care which language, this returns the “best” one
    transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=["en"])

    # Flatten it to plain text
    transcript = " ".join(chunk["text"] for chunk in transcript_list)
    print(transcript)

except:
    print("No captions available for this video.")

- [Narrator] Imagine a
small rural village in Iraq in the middle of the night in 2003. A man is hiding inside
an underground hole. Its entrance covered with styrofoam. And suddenly, the streetlights
in the entire village go out and hundreds of U.S. soldiers begin sweeping through the town. As the troops reach the farmhouse, a soldier starts to unearth a rope, discovering that it leads
to the hidden trap door. They open it and prepare to
throw a grenade into the hole just to be safe. But just before the pin is pulled, everyone freezes as a pair of
hands emerge from the hole, followed by a bearded man. With his hands in the air, he
declares to those around him. "My name is Saddam Hussein. I am the president of Iraq
and I want to negotiate." - Ladies and gentlemen, we got him. (gentle music) - [Narrator] Whether
you've heard the story of Saddam's hiding place because of the countless
memes circulating online, or whether you lived through this time and think you remember it, I doubt you ev

In [48]:
transcript

'- [Narrator] Imagine a\nsmall rural village in Iraq in the middle of the night in 2003. A man is hiding inside\nan underground hole. Its entrance covered with styrofoam. And suddenly, the streetlights\nin the entire village go out and hundreds of U.S. soldiers begin sweeping through the town. As the troops reach the farmhouse, a soldier starts to unearth a rope, discovering that it leads\nto the hidden trap door. They open it and prepare to\nthrow a grenade into the hole just to be safe. But just before the pin is pulled, everyone freezes as a pair of\nhands emerge from the hole, followed by a bearded man. With his hands in the air, he\ndeclares to those around him. "My name is Saddam Hussein. I am the president of Iraq\nand I want to negotiate." - Ladies and gentlemen, we got him. (gentle music) - [Narrator] Whether\nyou\'ve heard the story of Saddam\'s hiding place because of the countless\nmemes circulating online, or whether you lived through this time and think you remember it, I

In [49]:
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = splitter.create_documents([transcript])

In [50]:
len(chunks)

22

In [51]:
type(chunks[0])

langchain_core.documents.base.Document

In [52]:
# embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
# vector_store = FAISS.from_documents(chunks, embeddings)

model_name = "sentence-transformers/all-MiniLM-L6-v2"
# model_name = "Qwen/Qwen3-Embedding-0.6B"
embeddings = HuggingFaceEmbeddings(model_name=model_name)
vector_store = FAISS.from_documents(chunks, embeddings)


In [53]:
vector_store.index_to_docstore_id

{0: '84b20fcc-871f-4cce-9e19-5b38d70990f8',
 1: 'ee9c277e-423c-476d-b725-262c26706588',
 2: 'e6054623-8411-441c-94da-7d1545b781da',
 3: '4ab5ba26-5d74-43d4-a2ac-c7f20de07fe4',
 4: '1ee48b4b-59cf-4334-8f2d-3fc787e2e041',
 5: 'ec07d3f2-0309-4b74-ab0b-d5dceb963b8b',
 6: 'f8fe9c7b-7c67-4ccb-9644-0c730a6d8284',
 7: '0b8cbd8c-43fe-4233-8ef9-13db87c491ee',
 8: 'de789edd-8f07-4afe-b2d7-90858f282b12',
 9: '8d0d9f7f-124f-4160-a31b-b37068062b3b',
 10: 'd1fcbc98-4e92-409f-a66b-52f16eab807b',
 11: '511006dd-6880-462b-8aea-b188864965c2',
 12: '1e9a5dcb-9e70-4c07-aa19-2a4d578cf4ae',
 13: 'bdace5b4-4065-4219-bd50-b2518daad9c8',
 14: 'b988f813-eb33-44d3-ad90-3269c2214192',
 15: '25782068-9629-49c5-a630-8d3e0df96842',
 16: '3109c4dd-753f-45ff-b4dd-b9e4bbb72234',
 17: '8091e8eb-b84b-45b4-962d-70ad6a3bb87a',
 18: '4595e678-7d2d-481f-882c-0ed5bd9e97b4',
 19: '783911f1-5e89-4e23-8853-977a772cc5b2',
 20: '1c59ac74-73c4-4b8e-a0b7-f3896b179636',
 21: 'c44484b6-5e56-40d9-80bd-cb02855ab2b1'}

In [54]:
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 4})

In [55]:
from dotenv import load_dotenv

load_dotenv()

llm = HuggingFaceEndpoint(
    task='text-generation',
    model="meta-llama/Llama-3.1-8B-Instruct",
    temperature=0.7,
)

model = ChatHuggingFace(llm = llm)

In [56]:
prompt = PromptTemplate(
    template="""
      You are a helpful assistant.
      Use the provided context of video transcript to answer .
      If the context is insufficient, just say you don't know.

      {context}
      Question: {question}
    """,
    input_variables = ['context', 'question']
)

In [57]:
question          = "summarize the context"
retrieved_docs    = retriever.invoke(question)

In [58]:
retrieved_docs

[Document(id='84b20fcc-871f-4cce-9e19-5b38d70990f8', metadata={}, page_content='- [Narrator] Imagine a\nsmall rural village in Iraq in the middle of the night in 2003. A man is hiding inside\nan underground hole. Its entrance covered with styrofoam. And suddenly, the streetlights\nin the entire village go out and hundreds of U.S. soldiers begin sweeping through the town. As the troops reach the farmhouse, a soldier starts to unearth a rope, discovering that it leads\nto the hidden trap door. They open it and prepare to\nthrow a grenade into the hole just to be safe. But just before the pin is pulled, everyone freezes as a pair of\nhands emerge from the hole, followed by a bearded man. With his hands in the air, he\ndeclares to those around him. "My name is Saddam Hussein. I am the president of Iraq\nand I want to negotiate." - Ladies and gentlemen, we got him. (gentle music) - [Narrator] Whether\nyou\'ve heard the story of Saddam\'s hiding place because of the countless'),
 Document(id

In [59]:
context_text = "\n\n".join(doc.page_content for doc in retrieved_docs)
context_text

'- [Narrator] Imagine a\nsmall rural village in Iraq in the middle of the night in 2003. A man is hiding inside\nan underground hole. Its entrance covered with styrofoam. And suddenly, the streetlights\nin the entire village go out and hundreds of U.S. soldiers begin sweeping through the town. As the troops reach the farmhouse, a soldier starts to unearth a rope, discovering that it leads\nto the hidden trap door. They open it and prepare to\nthrow a grenade into the hole just to be safe. But just before the pin is pulled, everyone freezes as a pair of\nhands emerge from the hole, followed by a bearded man. With his hands in the air, he\ndeclares to those around him. "My name is Saddam Hussein. I am the president of Iraq\nand I want to negotiate." - Ladies and gentlemen, we got him. (gentle music) - [Narrator] Whether\nyou\'ve heard the story of Saddam\'s hiding place because of the countless\n\npalaces in the Middle East. - [Narrator] They publicized\nphotos of his arrest, showing him

In [60]:
final_prompt = prompt.invoke({"context": context_text, "question": question})

In [61]:
final_prompt

StringPromptValue(text='\n      You are a helpful assistant.\n      Use the provided context of video transcript to answer .\n      If the context is insufficient, just say you don\'t know.\n\n      - [Narrator] Imagine a\nsmall rural village in Iraq in the middle of the night in 2003. A man is hiding inside\nan underground hole. Its entrance covered with styrofoam. And suddenly, the streetlights\nin the entire village go out and hundreds of U.S. soldiers begin sweeping through the town. As the troops reach the farmhouse, a soldier starts to unearth a rope, discovering that it leads\nto the hidden trap door. They open it and prepare to\nthrow a grenade into the hole just to be safe. But just before the pin is pulled, everyone freezes as a pair of\nhands emerge from the hole, followed by a bearded man. With his hands in the air, he\ndeclares to those around him. "My name is Saddam Hussein. I am the president of Iraq\nand I want to negotiate." - Ladies and gentlemen, we got him. (gentle 

In [62]:
answer = model.invoke(final_prompt)
print(answer.content)

The context is about the story of how Saddam Hussein was found and captured by US soldiers in 2003. The video transcript describes the events leading up to his capture, including the U.S. invasion of Iraq, the search for Saddam, and the efforts to erode his cult of personality.

The key points in the context are:

1. Saddam was hiding in a secret underground hole in a rural village in Iraq.
2. US soldiers discovered the hole and were preparing to throw a grenade in when Saddam emerged and asked to negotiate.
3. The capture of Saddam was a significant event, but the U.S. military's actions and the role of media in shaping narratives are also explored in the context.

The transcript also delves into the power of different narratives, including the U.S. narrative of finding weapons of mass destruction and Saddam's own cult of personality. The story highlights the investigation that led to Saddam's capture, including the decision to focus on his bodyguards rather than the officials from hi

In [23]:
from langchain_core.runnables import RunnableParallel, RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser

In [24]:
def format_docs(retrieved_docs):
  context_text = "\n\n".join(doc.page_content for doc in retrieved_docs)
  return context_text

In [25]:
parallel_chain = RunnableParallel({
    'context': retriever | RunnableLambda(format_docs),
    'question': RunnablePassthrough()
})

In [26]:
parallel_chain.invoke('who is Demis')

{'context': "to get world peace because there's also other corrupting things like wanting power over people and this kind of stuff which is not necessarily satisfied by by just abundance but i think it will help um and i think uh but i think ultimately ai is not going to be run by any one person or one organization i think it should belong to the world belong to humanity um and i think maybe many there'll be many ways this will happen and ultimately um everybody should have a say in that do you have advice for uh young people in high school and college maybe um if they're interested in ai or interested in having a big impact on the world what they should do to have a career they can be proud of her to have a life they can be proud of i love giving talks to the next generation what i say to them is actually two things i i think the most important things to learn about and to find out about when you're when you're young is what are your true passions is first of all there's two things on

In [27]:
parser = StrOutputParser()

In [30]:
main_chain = parallel_chain | prompt | model | parser

In [31]:
main_chain.invoke('Can you summarize the video')

"The conversation is about explaining complex topics in a simple way. The speaker thinks that explaining things clearly and simply is a sign of intelligence. They mention Richard Feynman as an example of someone who could explain complex topics in a simple way. The conversation also touches on the topic of artificial intelligence and how it can be used to enhance human intelligence. The speaker shares a personal anecdote about being impressed by Garry Kasparov's mind when he beat Deep Blue, a chess-playing computer."