## Installing Libraries

In [24]:
from youtube_transcript_api import YouTubeTranscriptApi,TranscriptsDisabled,NoTranscriptFound
from langchain_openai import ChatOpenAI,OpenAIEmbeddings
from langchain_core.prompts import PromptTemplate
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter

from langchain_core.runnables import RunnableParallel,RunnablePassthrough,RunnableLambda
from langchain_core.output_parsers import StrOutputParser

## i) Indexing

#### Step 1: Document Ingestion

In [3]:

video_id = "lTCzIDITaac"

try:
    # Create an instance of the API
    api = YouTubeTranscriptApi()

    # Fetch transcript using the instance method
    transcript_list = api.fetch(video_id)

    # Use attribute access (.text) instead of dictionary access (["text"])
    transcript = " ".join(chunk.text for chunk in transcript_list)
    print(transcript)

except TranscriptsDisabled:
    print("No captions available for this video.")
except NoTranscriptFound:
    print("No transcript found for this video.")
except Exception as e:
    print(f"Unexpected error: {e}")
print(transcript_list)
print(len(transcript_list))
print(type(transcript_list))

Let's [Music] go. [Music] For me, what does capitalism mean and what is the difference between capitalism and other forms? For me, capitalism is about giving people as much economic freedom as possible. Will our visas get canceled if we say why Americans are unhappy? I hope not. I'm not sure we've gone that bad as yet in America. Is there a line? I think Bitcoin is here to stay. Crypto is here to stay. This has lasted long enough. It's getting mainstream. You think so? Yeah. Whether it's banks or asset management firms who told you 5 years ago we'll never touch this thing. Have you got an allocation? [Music] Hi Ruch, thank you for coming. Uh, you've been on this table before. You've had lunch here before. So, I hope you feel slightly more comfortable than you might in a new place altogether. Yeah, I think that's uh fair. I mean, in terms of although the approach to the apartment is uh a bit of a project, but otherwise I feel has something changed uh in what way? Approach to the apartme

#### Step 2 : Text Splitting

In [10]:
splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
chunks=splitter.create_documents([transcript])
print(chunks[0])
print(type(chunks[0]))
print(type(chunks))
print(len(chunks))

page_content='Let's [Music] go. [Music] For me, what does capitalism mean and what is the difference between capitalism and other forms? For me, capitalism is about giving people as much economic freedom as possible. Will our visas get canceled if we say why Americans are unhappy? I hope not. I'm not sure we've gone that bad as yet in America. Is there a line? I think Bitcoin is here to stay. Crypto is here to stay. This has lasted long enough. It's getting mainstream. You think so? Yeah. Whether it's banks or asset management firms who told you 5 years ago we'll never touch this thing. Have you got an allocation? [Music] Hi Ruch, thank you for coming. Uh, you've been on this table before. You've had lunch here before. So, I hope you feel slightly more comfortable than you might in a new place altogether. Yeah, I think that's uh fair. I mean, in terms of although the approach to the apartment is uh a bit of a project, but otherwise I feel has something changed uh in what way? Approach 

#### Step 3 & 4: Embedding Generation and Storing in Vector Store

In [11]:
embeddings=OpenAIEmbeddings()
vector_store=FAISS.from_documents(chunks,embeddings)

In [13]:
vector_store.index_to_docstore_id

{0: '866cc4ad-c2e0-4b08-8a3c-47767e3b9eb5',
 1: '2cf707fc-ce29-4741-9545-c614f55571e1',
 2: '7be12496-101f-4993-a116-51c3f1bd5558',
 3: '759dc305-8438-4dba-bc8a-7d16b7e829fe',
 4: '1b725efa-0424-4de1-bf13-14dc5f82c054',
 5: '903712be-72ae-4482-a9cc-b28f07ae2a4e',
 6: '2bb81e2e-3cd1-4d97-b7e6-c6837c018fac',
 7: '625ef0b3-6f51-4633-8ce0-ddf3b282df8a',
 8: 'e7a6fec1-a24a-46c9-b553-ff85b7cc8c23',
 9: 'cd52e244-3014-4351-a5eb-96d84453592b',
 10: '34a330f4-efcf-4501-9888-5dc9648135e9',
 11: '384825c1-15f2-4015-a035-2b16ebadb003',
 12: 'c0c89abf-538e-41b3-92b4-4fdd9b635295',
 13: '934befe2-dbc7-4563-890a-f4198c62dea0',
 14: '007386b9-30d9-4bf4-8e9d-b7cda9c006f6',
 15: 'bc284c44-849f-4af7-a805-a58469eee827',
 16: 'a2958809-ee6a-4bfb-81ef-095ca17868a0',
 17: '21668030-74d8-4cb1-9dda-ef38020509c0',
 18: 'e18f4e4f-369a-4684-a1d6-6e8763a7571a',
 19: '8bc8f4e1-85b2-4fc4-b9eb-69cf0df79681',
 20: '2c6e56ff-2bf4-41c2-a84f-ad79c473662a',
 21: '5b5c4965-2bd4-4039-9a22-603c1072bcf8',
 22: '991808e8-3753-

## ii) Retrieval

In [15]:
retriver=vector_store.as_retriever(search_type="similarity",search_kwargs={"k":4})  ## 4 similar vectors dhundke dega

In [19]:
retriver.invoke("china vs india")

[Document(id='2bc19dbe-5c3b-467b-97d6-f1d427579394', metadata={}, page_content="remember that in 2019 in even in places like Hjao you room service was actually being done by robots. This is huge advances in China. So technology is its edge and even on the AI front the only other country in the world today which can do AI of any meaningful scale is China. No European country or any emerging market can. So I'd say that China in terms of so the two big negatives in China too much debt and demographics are all negative now in China is India going the same way demographic wise yeah but we are far far sort of you know our population is not shrinking in our lifetimes not up until 2100 they say yeah so as I said like in our lifetimes and you know beyond a point that's is too far off so for the next few years yes our demographic our population growth also has slowed down but it's no way close to sort of you know being a drag on growth or something. In China's case it is and they have too much d

## iii) Augmentation

In [20]:
prompt=PromptTemplate(
    template="""
    You are a helpful assistant.
    Answer ONLY from the provided tanscript content.
    If the context is insufficient,just say you dont know.

    Context:{context}

    Question:{question}
    """,
    input_variables=["context","question"],
)

In [21]:
question="What are we talking here?"
retrieved_docs=retriver.invoke(question)

## iv) Generation

In [22]:
context_text="\n\n".join(docs.page_content for docs in retrieved_docs)
final_promt=prompt.format(context=context_text,question=question)

In [23]:
llm=ChatOpenAI()
answer=llm.invoke(final_promt)
print(answer.content)

Sorry, I do not have enough information to provide an accurate answer to your question.


## Building Chains

In [25]:
def format_docs(retrieved_docs):
  context_text="\n\n".join(docs.page_content for docs in retrieved_docs)
  return context_text

In [26]:
parallel_chain=RunnableParallel({
    'context':retriver | RunnableLambda(format_docs),   ## Lamnbda  beacuse format_docs ko runnable banana hai
    'question':RunnablePassthrough()   ## Jo input mein mila uski ko dalna
})

In [27]:
parser=StrOutputParser()

In [28]:
main_chain=parallel_chain | prompt | llm | parser

In [29]:
main_chain.invoke("Who are the speakers, their names?")

'The speakers mentioned in the provided transcript are Putin and Christine Lagard.'