In [None]:
from youtube_transcript_api import TranscriptsDisabled, YouTubeTranscriptApi, NoTranscriptFound
from app.rag.youtube_client import youtube_id_extractor
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate
from dotenv import load_dotenv

In [70]:
# youtube video url 
url = "https://www.youtube.com/watch?v=DNBaUCCST3I"

# extracting video ID
video_id = youtube_id_extractor(url)
video_id

'DNBaUCCST3I'

Indexing

In [71]:
# Data Ingestion
try:
    ytt_api = YouTubeTranscriptApi()

    fetched_transcript = ytt_api.fetch(video_id, languages=['en'])

    if fetched_transcript:
        original_text = " ".join(snippet.text for snippet in fetched_transcript)
        print(len(original_text))
    else:
        raise NoTranscriptFound

except TranscriptsDisabled:
    print("No subtitles available for this video.")

except NoTranscriptFound:
    print("No English subtitle exists for this video.")

9767


In [72]:
# Text Splitting
splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=150
)

chunks = splitter.create_documents([original_text])
print(chunks[1].page_content)

is part of ML and so on you will have a very good understanding of a bigger picture. Once that is done you have to learn Python programming. You are going to use Python as a programming language for your agent AI application and therefore you need to have a basic understanding of this programming language. You will find many amazing tutorials on YouTube. Our channel also have this uh tutorial playlist where we are not only teaching concepts but we have done a lot of hands-on coding. There are a lot of exercises. So if you go to video description you will find a link to GitHub where we have given all these exercises. So once you learn through this tutorials, do some exercises, you will have a good understanding of Python. Python is a easiest programming language. Even a high school student can learn this in like few days. Okay. Once you have learned Python, now to move into Genai, your prerequisite step will be NLP foundation. Natural language processing is a science where you process


In [108]:
print(type(chunks[0]))

<class 'langchain_core.documents.base.Document'>


In [73]:
# Embeddings Generation

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2") #768 vector size

print(embeddings.embed_documents([chunks[0].page_content]))

[[-0.00642899377271533, -0.03912746533751488, -0.03814999386668205, -0.0030349043663591146, -0.03293267637491226, 0.04186036065220833, 0.011526877991855145, -0.03843093663454056, 0.06374586373567581, -0.001919934176839888, -0.010754251852631569, 0.03400886058807373, -0.04979201778769493, 0.07648493349552155, 0.0036313028540462255, -0.053465813398361206, 0.03664891794323921, -0.002674052258953452, 0.042753394693136215, -0.02193068154156208, -0.012525089085102081, -0.007744071073830128, -0.0784684345126152, 0.030769838020205498, -0.02772603929042816, -0.045808225870132446, -0.04694909229874611, -0.032276686280965805, 8.424354018643498e-05, -0.003989900462329388, -0.023536978289484978, -0.06517796963453293, 0.047439511865377426, 0.06972373276948929, 2.0207078250678023e-06, -0.06016882508993149, -0.0179424025118351, -0.015187956392765045, -0.015414934605360031, -0.061257630586624146, 0.06421276926994324, 0.01805582456290722, 0.0005521719576790929, 0.015041087754070759, -0.0652824193239212,

In [74]:
# Vector store

vector_store = FAISS.from_documents(chunks, embeddings)

In [75]:
doc_id = vector_store.index_to_docstore_id[0]
print(vector_store.get_by_ids([doc_id]))

[Document(id='6865277b-de66-4078-a901-0d5e34d45310', metadata={}, page_content="Agentic AI is very hot right now and in this video I'm going to share a practical stepbystep road map to learn Agentic AI along with free learning resources. The first step in this road map is to gain understanding of AI basics. There are so many confusing terms like statistical ML, deep learning, ML versus AI, NLP. You need to know what all these different term means. And for this we have an excellent onehour tutorial on YouTube where I have clarified all these concepts in a very simple language that even a high school student can understand it easily along with some visualization. For example, to explain neural networks, I have used this specific visualization. So once you watch this one hour video your fundamentals on the AI family tree like where everything stands you know like ML is part of AI and then statistical ML and deep learning is part of ML and so on you will have a very good understanding of a

Retrieval

In [76]:
retriever = vector_store.as_retriever(
    search_kwargs={'k':5}
)

In [77]:
docs = retriever.invoke('What should i learn in python?')

In [78]:
print(len(docs[3].page_content))

999


Augmentation

In [79]:
load_dotenv()
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint

In [80]:
# creating model for augmentation and final answer

llm = HuggingFaceEndpoint(
    repo_id="meta-llama/Meta-Llama-3.1-8B-Instruct",
    task="text-generation",
    max_new_tokens=512,
    temperature=0.2,
    do_sample=False,  # deterministic
    repetition_penalty=1.03
)

model = ChatHuggingFace(llm=llm)

In [81]:
# creating prompt template
prompt = PromptTemplate(
    template="""
      You are a helpful assistant.
      Answer ONLY from the provided transcript context.
      If the context is insufficient, just say you don't know.

      {context}
      Question: {question}
    """,
    input_variables = ['context', 'question']
)

In [89]:
# query 

question = "Can you summarized the whole video?"

retrieved_docs = retriever.invoke(question)

In [90]:
context_text = "\n\n".join(doc.page_content for doc in retrieved_docs)
context_text

"together you know like company will always motivate you and help you with the inspiration. I hope you like this road map. Check video description for the PDF link. If you have any questions please post in the comment box below. I will try my best to answer as many questions as possible. Thank you very much for watching. If you like this video, give it a thumbs up and share it with your friends. [Music]\n\ntutorial that even a beginner can start and learn it. Okay. Then comes lang graph. So lang graph is used to build reliable agents using stateful graphs. Okay. So this is one and halfhour video which covers graphs, simple graphs, graphs with conditions, chatbot with tools, you know, chatbot with memory, human in the loop. We also cover langsmith which is used for monitoring tracing etc. Okay. So all of that is covered in this 1 and 1/2 hour video. So at this stage uh you would also like to build your own MCP server. Model context protocol is used to build a H&TK applications and for t

In [91]:
# creating final prompt

final_prompt = prompt.invoke({"context": context_text, "question": question})

In [92]:
final_prompt

StringPromptValue(text="\n      You are a helpful assistant.\n      Answer ONLY from the provided transcript context.\n      If the context is insufficient, just say you don't know.\n\n      together you know like company will always motivate you and help you with the inspiration. I hope you like this road map. Check video description for the PDF link. If you have any questions please post in the comment box below. I will try my best to answer as many questions as possible. Thank you very much for watching. If you like this video, give it a thumbs up and share it with your friends. [Music]\n\ntutorial that even a beginner can start and learn it. Okay. Then comes lang graph. So lang graph is used to build reliable agents using stateful graphs. Okay. So this is one and halfhour video which covers graphs, simple graphs, graphs with conditions, chatbot with tools, you know, chatbot with memory, human in the loop. We also cover langsmith which is used for monitoring tracing etc. Okay. So al

Generation

In [106]:
answer = model.invoke(final_prompt)
print(answer.content)

The video covers a roadmap for learning AGI (Artificial General Intelligence) and building real-world AI applications. It starts by introducing a tutorial that covers graphs, simple graphs, chatbots, and langsmith, which is used for monitoring and tracing. 

The speaker suggests building a MCP (Model Context Protocol) server for H&TK (Human-in-the-Loop) applications and recommends a free learning resource. However, for an ad-free experience with more practical content, the speaker suggests the Gennai Boot Camp.

The video then explains the next steps in learning AGI, which include understanding NLP (Natural Language Processing) and then generative AI (GAI). The speaker explains the key concepts needed for GAI, such as LLM (Large Language Model) and vector databases like ChromaDB and Pinecone. 

The speaker recommends a three-hour course on GAI that covers concepts like LLM, embeddings, and retrieval-augmented generation. The course is practical and includes coding and project-based lea