### Get Transcript

In [1]:
from langchain_text_splitters import RecursiveCharacterTextSplitter


In [2]:
from youtube_transcript_api import YouTubeTranscriptApi , TranscriptsDisabled

try:
    video_length = 0
    video_id="Usr8ODIDU-g&t=54s"
    ytt_api = YouTubeTranscriptApi()
    res = ytt_api.fetch(video_id)
    transcript = " ".join([snippet.text for snippet in res])
except TranscriptsDisabled:
    print("Transcripts for this video are disabled by the creator")

In [3]:
print(video_length)
print(transcript)

0
is there a case to be made that brothers karamazov is the greatest book ever written yeah there's a case to be made for that i don't know is it better than crime and punishment yes yeah you think so why do you i'm not arguing with it why do you think that uh well this is every every book is a person some of my best friends are inside that book yeah it's an amazing book and there's no doubt about it uh i think it's some books are defined by your personal relationship with them and that one was definitive and i almost graduated to that one because for the longest time the idiot was my favorite book uh of all because i identified with the ideas represented by prince michigan i also identified oh that's interesting to prince michigan as a as a human being holy fool the fool because the world kind of my whole life still kind of sees me saw me in my perception my narrow perception is kind of the fool and i different from the interpretation that a lot of people take of this book i see him a

### Create Chunks

In [4]:
num_words = len(transcript.split(" "))
if num_words >= 5000:
    splitter = RecursiveCharacterTextSplitter(chunk_size=1000 , chunk_overlap = 200)
else:
    splitter = RecursiveCharacterTextSplitter(chunk_size=300 , chunk_overlap = 20)


In [5]:
chunks = splitter.create_documents([transcript])

In [6]:
len(chunks)

25

In [7]:
print(chunks)

[Document(metadata={}, page_content="is there a case to be made that brothers karamazov is the greatest book ever written yeah there's a case to be made for that i don't know is it better than crime and punishment yes yeah you think so why do you i'm not arguing with it why do you think that uh well this is every every book is a person"), Document(metadata={}, page_content="book is a person some of my best friends are inside that book yeah it's an amazing book and there's no doubt about it uh i think it's some books are defined by your personal relationship with them and that one was definitive and i almost graduated to that one because for the longest time the idiot"), Document(metadata={}, page_content="time the idiot was my favorite book uh of all because i identified with the ideas represented by prince michigan i also identified oh that's interesting to prince michigan as a as a human being holy fool the fool because the world kind of my whole life still kind of sees me saw me in 

### Storing in Vector Store

In [16]:
from langchain_ollama import OllamaEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_ollama import  OllamaLLM , ChatOllama
from langchain_core.prompts import PromptTemplate

In [9]:
embedd_model = OllamaEmbeddings(model="llama3.2:latest")

In [10]:
vector_store = FAISS.from_documents(
    documents = chunks ,
    embedding=embedd_model
)

### Retrieval

In [None]:
retriever = vector_store.as_retriever(search_type="similarity" , search_kwargs={'k': 8})

In [15]:
retriever.invoke("what they are talking about?")

[Document(id='ecf5e00f-3688-4e9f-b2a8-683f5b5005ac', metadata={}, page_content="and that was a lot i learned a lot from freud i learned a lot from rogers and i learned a lot from well from dostoevsky and nietzsche i'm going to do a course on dostoevsky and nietzsche for this peterson academy this is coming up in january oh that'll be damn i'm really looking forward to it"),
 Document(id='2ac85f2e-8933-4796-8c1a-5c6200888b32', metadata={}, page_content='sense not to encounter a great dead friend and fail to learn no and and i mean i tried to separate the wheat from the chaff when i read you know and i read all the great clinicians all of them perhaps not those who are foremost in the pantheon and i tried to pull out what i could and that was a lot'),
 Document(id='29d46cf4-8ec6-458c-8a20-01251e82554e', metadata={}, page_content='interesting and then taken as a collective we create the world together in that way you'),
 Document(id='b7df306c-693b-4c83-af62-5d505de64904', metadata={}, pag

### Augemantation

In [17]:
llm = ChatOllama(model="llama3.2:latest")

In [18]:
prompt = PromptTemplate(
    template="""
            You are a helpful assitant.
            Answer from provided transcript context ONLY.
            if the context is insufficient then reply that you don't know.
            Context is \n
            {context}
            \n
            Question : {question}
""",
    input_variables=['context' , 'question']
)

In [24]:
question = "Did they mentioned about brothers koromozov?"
retrived_docs = retriever.invoke(question)

In [25]:
doc_context = "\n\n".join(doc.page_content for doc in retrived_docs)

In [26]:
final_prompt = prompt.invoke({'context':doc_context , "question":question})

In [27]:
answer = llm.invoke(final_prompt)
print(answer.content)

No, there is no mention of Brothers Karamazov in the provided transcript.


### Building Chains

In [30]:
from langchain_core.runnables import RunnableLambda , RunnablePassthrough , RunnableParallel
from langchain_core.output_parsers import StrOutputParser

In [33]:
def format_docs(retrived_docs):
    doc_context = "\n\n".join(doc.page_content for doc in retrived_docs)
    return doc_context

In [34]:
parallel_chain = RunnableParallel({
    "context":retriever | RunnableLambda(format_docs),
    "question":RunnablePassthrough()
})

In [35]:
parser = StrOutputParser()

main_chain = parallel_chain | prompt | llm | parser

In [37]:
res = main_chain.invoke("Who is Fyodor Dostoevsky")
print(res)

Fyodor Dostoevsky is a Russian novelist, psychologist, and philosopher.


In [1]:
from helpers import create_chunks, document_retriever, final_output, get_transcript , create_vector_store

transcript = get_transcript(link=input("Paste video link: "))
# print(transcript)
chunks = create_chunks(transcript=transcript)
vector_store = create_vector_store(chunks)

q8VePUwjB9Y
Working...
Transcript Loaded !!!
155 chunks are created...
Creating Vector Store...


In [6]:
query = input("Enter your query: ")

retriver = document_retriever(vector_store=vector_store)

output = final_output(query = query ,retriever=retriver )

print(output)

Retrieving Documents...
The speaker has mentioned Friedrich Nietzsche in relation to his writing style (aphoristic) and style of writers like Dostoevsky. They also mentioned "Beyond Good and Evil" as a book, specifically the first half of it. Additionally, they made a passing comment about Protestantism being compared to the "woke mob", but no specific books or authors were discussed in this context.


In [1]:
import re

def get_video_id(link):
    # Extract video ID from YouTube link
    match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", link)
    return match.group(1) if match else None

get_video_id("https://www.youtube.com/watch?v=erUfLIi9OFM&t=1300s")

'erUfLIi9OFM'