### Install and Import Required Libraries

In [1]:
## Install Necessary Libraries
!pip install -q youtube-transcript-api langchain-community langchain-google-genai faiss-cpu

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/2.2 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━[0m [32m1.9/2.2 MB[0m [31m58.3 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m39.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m85.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.8/44.8 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.3/31.3 MB[0m [31m18.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m58.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.4/44.4 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
## Import Libraries
from youtube_transcript_api import YouTubeTranscriptApi
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain_core.runnables import RunnableParallel, RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate
from google.colab import userdata
import os

In [3]:
# get the API key from Google Colab Secrets
api_token = userdata.get("GOOGLE_API_KEY")

# set the API Key and print
os.environ['GOOGLE_API_KEY'] = api_token
# print(api_token)

### Step 1: Document Ingestion
##### First we will fetch the transcript of the video using YouTubeTranscriptApi.

In [25]:
## provide the video ID
youtube_video_id = 'Gfr50f6ZBvo'

try:
    ## specifies the language in which video in
    transcript_list = YouTubeTranscriptApi.get_transcript(youtube_video_id, languages=['en'])
    print(transcript_list[0:2])
except Exception as e:
    print(f"Error Occured in Transcription: {e}")

[{'text': 'the following is a conversation with', 'start': 0.08, 'duration': 3.44}, {'text': 'demus hasabis', 'start': 1.76, 'duration': 4.96}]


In [27]:
## combine the list to get the full text
transcript = " ".join([text_dict['text'] for text_dict in transcript_list])
# print(transcript)

In [28]:
## Split the transcript text into small chunks for better search
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
text_chunks = text_splitter.create_documents([transcript])
print(len(text_chunks))

168


In [29]:
text_chunks[0]

Document(metadata={}, page_content="the following is a conversation with demus hasabis ceo and co-founder of deepmind a company that has published and builds some of the most incredible artificial intelligence systems in the history of computing including alfred zero that learned all by itself to play the game of gold better than any human in the world and alpha fold two that solved protein folding both tasks considered nearly impossible for a very long time demus is widely considered to be one of the most brilliant and impactful humans in the history of artificial intelligence and science and engineering in general this was truly an honor and a pleasure for me to finally sit down with him for this conversation and i'm sure we will talk many times again in the future this is the lex friedman podcast to support it please check out our sponsors in the description and now dear friends here's demis hassabis let's start with a bit of a personal question am i an ai program you wrote to inter

### Embedding and Store
##### Generate the embedding of the text chunks and store them in a vector store for future retrival

In [30]:
## create the embeddings of the text chunks and store them in FAISS vector store
chunks_embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
print(chunks_embeddings)

# Create FAISS vector store from documents
faiss_vector_store = FAISS.from_documents(text_chunks, chunks_embeddings)

# Check how many vectors are in the index
print(faiss_vector_store.index.ntotal)

client=<google.ai.generativelanguage_v1beta.services.generative_service.client.GenerativeServiceClient object at 0x798c85785090> model='models/embedding-001' task_type=None google_api_key=SecretStr('**********') credentials=None client_options=None transport=None request_options=None
168


In [31]:
## unique id of the text chunks
faiss_vector_store.index_to_docstore_id.get(0)

'85f4b41c-e35b-4cfe-b3ee-acc03fefb0a0'

In [32]:
faiss_vector_store.get_by_ids([faiss_vector_store.index_to_docstore_id.get(0)])

[Document(id='85f4b41c-e35b-4cfe-b3ee-acc03fefb0a0', metadata={}, page_content="the following is a conversation with demus hasabis ceo and co-founder of deepmind a company that has published and builds some of the most incredible artificial intelligence systems in the history of computing including alfred zero that learned all by itself to play the game of gold better than any human in the world and alpha fold two that solved protein folding both tasks considered nearly impossible for a very long time demus is widely considered to be one of the most brilliant and impactful humans in the history of artificial intelligence and science and engineering in general this was truly an honor and a pleasure for me to finally sit down with him for this conversation and i'm sure we will talk many times again in the future this is the lex friedman podcast to support it please check out our sponsors in the description and now dear friends here's demis hassabis let's start with a bit of a personal qu

In [33]:
## by default the chunks embeddings will be stored in the RAM memory, to save the data locally to the disk
faiss_vector_store.save_local("faiss_vector_store")
## load the data from local
faiss_vector_store = FAISS.load_local("faiss_index", chunks_embeddings)

In [34]:
first_vector = faiss_vector_store.index.reconstruct(0)
print(f"Embedding length: {len(first_vector)}")

Embedding length: 768


### Reterival
##### To retrive the relevant text chunks we will create a retriver object and on the basis of similarity top k text chunks will be picked and returned.

In [35]:
## top 3 text chunks on the basis of similarity
retriever = faiss_vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 3})
retriever

VectorStoreRetriever(tags=['FAISS', 'GoogleGenerativeAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x798c85888790>, search_kwargs={'k': 3})

In [36]:
retriever.invoke('What is deepmind.')

[Document(id='ae767766-dce6-41bc-9a2d-19c0f750c412', metadata={}, page_content="that are amazingly smart at certain things like maybe playing go and chess and other things but they don't feel at all in any shape or form conscious in the way that you know you do to me or i do to you and um and i think actually building ai is uh these intelligent constructs uh is one of the best ways to explore the mystery of consciousness to break it down because um we're going to have devices that are pretty smart at certain things or capable of certain things but potentially won't have any semblance of self-awareness or other things and in fact i would advocate if there's a choice building systems in the first place ai systems that are not conscious to begin with uh are just tools um until we understand them better and the capabilities better so on that topic just not as the ceo of deep mind just as a human being let me ask you about this one particular anecdotal evidence of the google engineer who ma

### Augmentation
##### Prepare the prompt with the help of context and query

In [37]:
## define prompt
prompt = PromptTemplate(
    template="""
      You are a helpful assistant, who will provide me the answer of the given question on the basis of provided context..
      Answer ONLY from the provided transcript context.
      If the context is insufficient, just say "I don't have answer to the specific question.".

      {context}
      Question: {question}
    """,
    input_variables = ['context', 'question']
)

In [38]:
question = "Is the topic of nuclear fusion discussed in this video? if yes then what was discussed?"
retrieved_docs = retriever.invoke(question)

In [39]:
## make a single paragraph from retrieved documents
context_text = "\n\n".join(doc.page_content for doc in retrieved_docs)
context_text

"in this case in fusion we we collaborated with epfl in switzerland the swiss technical institute who are amazing they have a test reactor that they were willing to let us use which you know i double checked with the team we were going to use carefully and safely i was impressed they managed to persuade them to let us use it and um and it's a it's an amazing test reactor they have there and they try all sorts of pretty crazy experiments on it and um the the the what we tend to look at is if we go into a new domain like fusion what are all the bottleneck problems uh like thinking from first principles you know what are all the bottleneck problems that are still stopping fusion working today and then we look at we you know we get a fusion expert to tell us and then we look at those bottlenecks and we look at the ones which ones are amenable to our ai methods today yes right and and and then and would be interesting from a research perspective from our point of view from an ai point of\n\

In [40]:
## preapre prompt
final_prompt = prompt.invoke({"context": context_text, "question": question})
final_prompt

StringPromptValue(text='\n      You are a helpful assistant, who will provide me the answer of the given question on the basis of provided context..\n      Answer ONLY from the provided transcript context.\n      If the context is insufficient, just say "I don\'t have answer to the specific question.".\n\n      in this case in fusion we we collaborated with epfl in switzerland the swiss technical institute who are amazing they have a test reactor that they were willing to let us use which you know i double checked with the team we were going to use carefully and safely i was impressed they managed to persuade them to let us use it and um and it\'s a it\'s an amazing test reactor they have there and they try all sorts of pretty crazy experiments on it and um the the the what we tend to look at is if we go into a new domain like fusion what are all the bottleneck problems uh like thinking from first principles you know what are all the bottleneck problems that are still stopping fusion w

### Generation
##### Create LLM Object and generate the answer

In [41]:
model = ChatGoogleGenerativeAI(
    model='gemini-2.0-flash',
    temperature=0.2
)
model

ChatGoogleGenerativeAI(model='models/gemini-2.0-flash', google_api_key=SecretStr('**********'), temperature=0.2, client=<google.ai.generativelanguage_v1beta.services.generative_service.client.GenerativeServiceClient object at 0x798cb988fb90>, default_metadata=(), model_kwargs={})

In [42]:
## call the llm and print generated text
answer = model.invoke(final_prompt)
print(answer.content)

Yes, nuclear fusion is discussed. The discussion includes:

*   Collaboration with EPFL in Switzerland, who allowed use of their test reactor.
*   Identifying bottleneck problems in fusion and using AI methods to address them.
*   Using AI to control and hold plasma in specific shapes for record amounts of time, which was published in a Nature paper.
*   Exploring different plasma shapes for better energy production.
*   Talking to fusion startups to identify the next problem to tackle in the fusion area.
*   Using reinforcement learning to predict and control the plasma by moving the magnetic field, replacing traditional handcrafted controllers.


### By using Chains

In [43]:
def format_docs(retrieved_docs):
    """
    Formats a list of retrieved documents into a single string.

    This function takes a list of text chunks retrieved from a vector store
    and concatenates their textual content into a single string, separated by double newlines for readability.

    Parameters:
    ----------
    retrieved_docs : list
        A list of document objects, where each object must have a `page_content` attribute containing the text of the document.

    Returns:
    -------
    str
        A single string containing the combined content of all documents, separated by two newline characters ("\n\n").
    """
    context_text = "\n\n".join(doc.page_content for doc in retrieved_docs)
    return context_text

In [44]:
## chain1: get the text chunks from vector store and create a single paragarph
parallel_chain = RunnableParallel({
    'context': retriever | RunnableLambda(format_docs),
    'question': RunnablePassthrough()
})

In [45]:
parallel_chain.invoke('who is Demis')

{'context': "the following is a conversation with demus hasabis ceo and co-founder of deepmind a company that has published and builds some of the most incredible artificial intelligence systems in the history of computing including alfred zero that learned all by itself to play the game of gold better than any human in the world and alpha fold two that solved protein folding both tasks considered nearly impossible for a very long time demus is widely considered to be one of the most brilliant and impactful humans in the history of artificial intelligence and science and engineering in general this was truly an honor and a pleasure for me to finally sit down with him for this conversation and i'm sure we will talk many times again in the future this is the lex friedman podcast to support it please check out our sponsors in the description and now dear friends here's demis hassabis let's start with a bit of a personal question am i an ai program you wrote to interview people until i get

In [46]:
## create parser object
parser = StrOutputParser()
final_chain = parallel_chain | prompt | model | parser

final_chain.invoke('Can you summarize the video')

"Here is a summary of the video, based on the context you provided:\n\nThe speaker discusses finding your unique skills and passions to make a difference in the world. They then transition to questions about a day in the life, including waking habits, coffee consumption, and computer usage. The speaker also touches on fundamental explanations of physics, more careful specifications, and glimpses of things missed in today's physics. They mention a deeper, simpler explanation of things than the standard model. The discussion also involves simulating chemistry, describing electron clouds, and learning a simulation to describe more chemistry types. The goal is to simulate large materials by building functionals that approximate Schrodinger's equation, allowing the description of what electrons are doing and how they interact."