In [1]:
! pip install -q youtube_transcript_api langchain-community faiss-cpu langchain_google_genai


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.2[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [None]:
from youtube_transcript_api import YouTubeTranscriptApi,TranscriptsDisabled
from langchain_text_splitters import RecursiveCharacterTextSplitter

  import pynvml  # type: ignore[import]


**1. Document Ingestion**

In [3]:
# https://www.youtube.com/watch?v=eDCxDAYMZnw
def doc_ingest(video_id):
  try:
    yt = YouTubeTranscriptApi()
    transcript = yt.fetch(video_id,languages=['en'])
    combined_text = " ".join(chunk.text for chunk in transcript)
    return combined_text
    # combined all of the chunked transcripts into one string
    # print(combined_text)
  except TranscriptsDisabled:
    print("No transcript is available for this video!")

**2. Text Splitting**

In [4]:
from langchain_core.documents import Document

def split_documents(docs,chunk_size=1000,chunk_overlap=200):
  text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap
    )

  doc = Document(page_content=docs)#creating a document object cause that is what splitter accepts
  text_chunks = text_splitter.split_documents([doc])
  return text_chunks

video_id = "eDCxDAYMZnw"
combined_text = doc_ingest(video_id)
# print(len(combined_text))

split_chunks = split_documents(combined_text,200,50)
print(split_chunks[1])

page_content='When asked about his secret, he said something surprising. He claimed that he never focused on being better than his opponent, but only on perfecting his own technique every single day. This mindset'


**3. Storing the chunks in a vector store**

In [5]:
from langchain_community.embeddings import HuggingFaceEmbeddings

def download_embeddings(model_name="sentence-transformers/all-MiniLM-L6-v2"):
    embeddings = HuggingFaceEmbeddings(model_name=model_name)
    return embeddings

embeddings = download_embeddings()
print("Embeddings model downloaded successfully.")

  embeddings = HuggingFaceEmbeddings(model_name=model_name)


Embeddings model downloaded successfully.


In [6]:
from langchain_community.vectorstores import FAISS
vector_store = FAISS.from_documents(split_chunks,embeddings)
#convert the given chunks to respective vectors ; the vector ids are different every time!

# print(vector_store.index_to_docstore_id)
# chunks are respe

**RETRIEVER**

In [7]:
retriever = vector_store.as_retriever(search_type="similarity",search_kwargs={"k":3})
#using same vector store as a retriever which searches for semantic similarity and outputs 3 relevant blocks ;
# retriever.invoke("what is blackhole ? ")

**Setting up LLM**

In [8]:
from langchain_google_genai import GoogleGenerativeAI
from langchain_core.prompts import PromptTemplate

from dotenv import load_dotenv
import os
# Load environment variables from .env file
load_dotenv()
# Get API key from Colab user data secrets

api_key = os.getenv("GEMINI_API_KEY")
os.environ["GOOGLE_API_KEY"] = api_key


llm = GoogleGenerativeAI(model="gemini-2.5-flash")

# print(result)

prompt_template = PromptTemplate(
    template="""
    You are a helpful assistant.
    Answer ONLY from the provided transcript context of the video.
    If the context is insufficient, just say that you donot know the answer.
    Context: {context}
    Question: {question}
    """,
    input_variables=["context", "question"]
)
#efficient prompt for llm questioning!

In [9]:
retrieved_docs = retriever.invoke("Why were 90s programmers so legendary ?")

def format_docs(retrieved_docs):
    context_text = "\n\n".join(content.page_content for content in retrieved_docs)
    return context_text

context = format_docs(retrieved_docs)
print(context)
question = "Why were 90s programmers so legendary ?"
#so basically if the context retrieved is not flawless, the answer of the LLM wouldn't be good ; so it all depends upon the context provided by the retriever!

and stances. They never stopped practicing the fundamentals even after they had learned advanced techniques that seemed more impressive and exciting. This goes against how most people approach

the advanced techniques that look impressive to others. However, the samurai understood something crucial about how mastery actually works. Excellence in anything is not about knowing a thousand

This goes against how most people approach learning today. We rush through the basics because they feel boring and simple. We want to move quickly to the advanced techniques that look impressive to


In [10]:
prompt = prompt_template.invoke({"context":context,"question":question})

GENERATION

In [11]:
answer = llm.invoke(prompt)
print(answer)

I do not know the answer based on the provided transcript. The transcript discusses the learning approach of samurai and contrasts it with how people approach learning today, but it does not mention 90s programmers.


**Building a chain!**

In [12]:
#building a chain -- a pipeline so that none of the processes be manually invoked and the orchestration be handled by the pipeline itself ; 

from langchain_core.runnables import RunnableParallel,RunnableLambda,RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

parallel_chain = RunnableParallel({
    "context":retriever | RunnableLambda(format_docs),
    "question":RunnablePassthrough()
}
)
# a parallel chain where :
#retriever gets the question -- input -- and feeds it into format_docs (converted into runnable through runnablelambda)
# parallel_chain.invoke("What is blackhole ? ")

In [None]:
parser = StrOutputParser()

main_chain = parallel_chain | prompt_template | llm | parser
#parallel chain combined with prompt and llm to parse the ouptut ultimately forming a linear chain !
main_chain.invoke('what does the path to mastery look like ?')

'The path to mastery rests on three powerful foundations. The first foundation is to never stop perfecting the fundamentals, no matter how advanced you become. The second foundation begins with "follow" but is incomplete in the provided text. The third foundation is not detailed.'