# Youtube Video Summerizer using Langchain

In [None]:
import os 
os.environ['GROQ_API_KEY'] = '' # groq api key 

In [46]:
# !pip install -q youtube-transcript-api langchain-community langchain-openai faiss-cpu tiktoken python-dotenv

# Importing Libraries

In [27]:
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_groq import ChatGroq
from langchain_core.prompts import PromptTemplate 
from langchain_huggingface import HuggingFaceEmbeddings

# Document Ingestion

In [67]:
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api._errors import TranscriptsDisabled, NoTranscriptFound

# YouTube video ID only, not full URL
video_id = "n5bY1gIq2gU"

try:
    # Create API client
    api = YouTubeTranscriptApi()
    
    # Fetch transcript data for the video
    transcript = api.fetch(video_id, languages=["hi", "en"])

    # Convert transcript objects into a single text string
    text = " ".join(item.text for item in transcript)
    
    # Print full transcript
    print(text)

except TranscriptsDisabled:
    # Raised when captions are disabled by the video owner
    print("Captions are disabled")

except NoTranscriptFound:
    # Raised when no transcript exists for the video
    print("No transcript found")

except Exception as e:
    # Catch any unexpected errors
    print("Error:", e)


I’m excited to announce the release of our latest LangChain Academy
foundations course, Introduction to LangChain in Python. We’ve entered a new era of AI, one
where our apps don’t just respond, they think, plan, and act autonomously. Today, we're building agents –
AI systems that can reason and interact with their environments
to get real work done. Imagine a team of assistants
that can summarize your inbox, schedule meetings, and perform
market research 24/7. In this course, you'll learn to
build deployment-ready agents like these using LangChain. LangChain is the best way to get started
with building agents. Together with the community,
we've learned from production use cases and identified the essential
components an agent needs. The “create agent” abstraction in LangChain
captures these qualities in their simplest form. In this course,
you'll learn how to build agents using this abstraction
and how to customize them with middleware. The goal of this course is to get
you building a

In [29]:
transcript

FetchedTranscript(snippets=[FetchedTranscriptSnippet(text='I’m excited to announce the release', start=0.0, duration=1.71), FetchedTranscriptSnippet(text='of our latest LangChain Academy\nfoundations course,', start=1.71, duration=2.794), FetchedTranscriptSnippet(text='Introduction to LangChain in Python.', start=4.504, duration=2.419), FetchedTranscriptSnippet(text='We’ve entered a new era of AI, one\nwhere our apps don’t just respond,', start=6.923, duration=4.004), FetchedTranscriptSnippet(text='they think, plan, and act autonomously.', start=10.927, duration=3.128), FetchedTranscriptSnippet(text="Today, we're building agents –\nAI systems that can reason", start=14.055, duration=3.629), FetchedTranscriptSnippet(text='and interact with their environments\nto get real work done.', start=17.684, duration=3.42), FetchedTranscriptSnippet(text='Imagine a team of assistants\nthat can summarize your inbox,', start=21.104, duration=3.42), FetchedTranscriptSnippet(text='schedule meetings, an

# Text Splitting

In [30]:
# Convert FetchedTranscript → plain text
transcript_text = " ".join(item.text for item in transcript)

# Chunking
splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=100
)

chunks = splitter.create_documents([transcript_text])

In [31]:
print(type(transcript))       # FetchedTranscript
print(type(transcript_text))  # str

<class 'youtube_transcript_api._transcripts.FetchedTranscript'>
<class 'str'>


In [32]:
len(chunks)

6

# Embedding Generation and Storing in Vector Store (Faiss)

In [33]:
emb = HuggingFaceEmbeddings(
        model_name="BAAI/bge-base-en-v1.5",
        model_kwargs={"device": "cpu", "trust_remote_code": True},
        encode_kwargs={"normalize_embeddings": True}
)

In [34]:
vector_store = FAISS.from_documents(chunks, emb)

In [35]:
vector_store.index_to_docstore_id

{0: '25e030ee-38ff-4670-b86a-c5b9de70ae7b',
 1: '88420877-d228-4f7f-925f-92888b277665',
 2: 'e91830cd-9431-41fa-a140-c7c727a2210e',
 3: '8b7a5de5-6ed4-4066-9025-df9ae88ebfb3',
 4: '3a42e3a8-e9e5-4734-be9b-d70309aad979',
 5: '55bd8e2a-cf80-413c-b5b9-07d684d198f1'}

In [36]:
vector_store.get_by_ids(['4750e96e-4bbb-4958-a72c-fd748819c9f4'])

[]

# Retrieval

In [37]:
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 4})
retriever

VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x7d4cc6182180>, search_kwargs={'k': 4})

In [38]:
retriever.invoke('What is langchain')

[Document(id='25e030ee-38ff-4670-b86a-c5b9de70ae7b', metadata={}, page_content="I’m excited to announce the release of our latest LangChain Academy\nfoundations course, Introduction to LangChain in Python. We’ve entered a new era of AI, one\nwhere our apps don’t just respond, they think, plan, and act autonomously. Today, we're building agents –\nAI systems that can reason and interact with their environments\nto get real work done. Imagine a team of assistants\nthat can summarize your inbox, schedule meetings, and perform\nmarket research 24/7. In this course, you'll learn to"),
 Document(id='88420877-d228-4f7f-925f-92888b277665', metadata={}, page_content="market research 24/7. In this course, you'll learn to\nbuild deployment-ready agents like these using LangChain. LangChain is the best way to get started\nwith building agents. Together with the community,\nwe've learned from production use cases and identified the essential\ncomponents an agent needs. The “create agent” abstractio

# Augmentation

In [39]:
llm = ChatGroq(model='groq/compound', temperature=0.1)

In [40]:
prompt = PromptTemplate(
    template="""
    You are a helpful assistant.
    Answer Only form the provided transcript context.
    If No found. just say You dont know.
    {context}
    Question: {question}
    """,
    input_variables=['context', 'question']
)

question = 'Is the topic about LangChain? discussed in this video? if yes then what was discussed'
retrieved_docs = retriever.invoke(question)

context_text = "\n\n".join(doc.page_content for doc in retrieved_docs) 

In [41]:
retrieved_docs

[Document(id='25e030ee-38ff-4670-b86a-c5b9de70ae7b', metadata={}, page_content="I’m excited to announce the release of our latest LangChain Academy\nfoundations course, Introduction to LangChain in Python. We’ve entered a new era of AI, one\nwhere our apps don’t just respond, they think, plan, and act autonomously. Today, we're building agents –\nAI systems that can reason and interact with their environments\nto get real work done. Imagine a team of assistants\nthat can summarize your inbox, schedule meetings, and perform\nmarket research 24/7. In this course, you'll learn to"),
 Document(id='88420877-d228-4f7f-925f-92888b277665', metadata={}, page_content="market research 24/7. In this course, you'll learn to\nbuild deployment-ready agents like these using LangChain. LangChain is the best way to get started\nwith building agents. Together with the community,\nwe've learned from production use cases and identified the essential\ncomponents an agent needs. The “create agent” abstractio

In [42]:
context_text

"I’m excited to announce the release of our latest LangChain Academy\nfoundations course, Introduction to LangChain in Python. We’ve entered a new era of AI, one\nwhere our apps don’t just respond, they think, plan, and act autonomously. Today, we're building agents –\nAI systems that can reason and interact with their environments\nto get real work done. Imagine a team of assistants\nthat can summarize your inbox, schedule meetings, and perform\nmarket research 24/7. In this course, you'll learn to\n\nmarket research 24/7. In this course, you'll learn to\nbuild deployment-ready agents like these using LangChain. LangChain is the best way to get started\nwith building agents. Together with the community,\nwe've learned from production use cases and identified the essential\ncomponents an agent needs. The “create agent” abstraction in LangChain\ncaptures these qualities in their simplest form. In this course,\nyou'll learn how to build agents using this abstraction\n\nwith creating an e

In [43]:
final_prompt = prompt.invoke({'context': context_text, "question":question})

In [44]:
final_prompt

StringPromptValue(text="\n    You are a helpful assistant.\n    Answer Only form the provided transcript context.\n    If No found. just say You dont know.\n    I’m excited to announce the release of our latest LangChain Academy\nfoundations course, Introduction to LangChain in Python. We’ve entered a new era of AI, one\nwhere our apps don’t just respond, they think, plan, and act autonomously. Today, we're building agents –\nAI systems that can reason and interact with their environments\nto get real work done. Imagine a team of assistants\nthat can summarize your inbox, schedule meetings, and perform\nmarket research 24/7. In this course, you'll learn to\n\nmarket research 24/7. In this course, you'll learn to\nbuild deployment-ready agents like these using LangChain. LangChain is the best way to get started\nwith building agents. Together with the community,\nwe've learned from production use cases and identified the essential\ncomponents an agent needs. The “create agent” abstracti

# Generation

In [45]:
ans = llm.invoke(final_prompt)
print(ans.content)

Yes. The transcript discusses LangChain. It explains that the course teaches how to build deployment‑ready agents using LangChain’s “create agent” abstraction, customize language models with out‑of‑the‑box arguments, system prompts, tools, short‑term memory, and middleware (e.g., an email‑assistant that automates your inbox). It also mentions a plug‑and‑play chat interface for demoing these agents.


# Building a Chain

In [None]:
from langchain_core.runnables import RunnableParallel, RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser

In [58]:
# concatenate the retrieved_docs, function
def contcatenated_docs(retrieved_docs):
    context_text = "\n\n".join(doc.page_content for doc in retrieved_docs)
    return context_text

In [59]:
parallel_chain = RunnableParallel({
    'context': retriever | RunnableLambda(contcatenated_docs),
    'question': RunnablePassthrough()
})

In [60]:
parallel_chain.invoke('what is langchain?')

{'context': "I’m excited to announce the release of our latest LangChain Academy\nfoundations course, Introduction to LangChain in Python. We’ve entered a new era of AI, one\nwhere our apps don’t just respond, they think, plan, and act autonomously. Today, we're building agents –\nAI systems that can reason and interact with their environments\nto get real work done. Imagine a team of assistants\nthat can summarize your inbox, schedule meetings, and perform\nmarket research 24/7. In this course, you'll learn to\n\nmarket research 24/7. In this course, you'll learn to\nbuild deployment-ready agents like these using LangChain. LangChain is the best way to get started\nwith building agents. Together with the community,\nwe've learned from production use cases and identified the essential\ncomponents an agent needs. The “create agent” abstraction in LangChain\ncaptures these qualities in their simplest form. In this course,\nyou'll learn how to build agents using this abstraction\n\nwith c

In [61]:
parser = StrOutputParser()

main_chain = parallel_chain | prompt | llm | parser

In [62]:
main_chain.invoke('Can you summerize the video?')

'The video describes a LangChain course that teaches you to build agents step‑by‑step.  \n\n* **Module\u202f1** – Basics: customize a language model with arguments and system prompts, then add tools and short‑term memory.  \n* **Module\u202f2** – More advanced features: MCP, customized memory, multi‑agent systems, culminating in a team of synchronized wedding planners that can fetch up‑to‑date flight prices and venue details.  \n* **Module\u202f3** – Level‑up with middleware: add dynamic tools, prompts, and models; introduce human‑in‑the‑loop gating for sensitive actions; summarize long conversations to protect the context window. Projects include a personal‑chef assistant that suggests recipes from fridge photos and an email assistant that can automate an entire inbox.  \n\nThe course also provides a plug‑and‑play chat interface for quickly demoing the agents.'