In [2]:
from dotenv import load_dotenv
load_dotenv()

True

In [3]:
import os
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

In [4]:
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate

In [5]:
video_id = "5MWT_doo68k"
try:
    transcript_list = YouTubeTranscriptApi.get_transcript(video_id=video_id, languages=["en"])
    transcript = " ".join(chunk["text"] for chunk in transcript_list)
    print(transcript)
except TranscriptsDisabled:
    print("Your video has no transcripts :(")

Chris Anderson: Sam, welcome to TED. Thank you so much for coming. Sam Altman: Thank you. It's an honor. CA: Your company has been releasing
crazy insane new models   pretty much every other week
it feels like. I've been playing with a couple of them. I'd like to show you
what I've been playing. So, Sora, this is the image
and video generator. I asked Sora this: What will it look like when you share
some shocking revelations here at TED? You want to see
how it imagined it, you know? (Laughter) I mean, not bad, right? How would you grade that? Five fingers on all hands. SA: Very close to what I'm wearing,
you know, it's good. CA: I've never seen you
quite that animated. SA: No, I'm not that animated of a person. CA: So maybe a B-plus. But this one genuinely astounded me. When I asked it to come up with a diagram that shows the difference
between intelligence and consciousness. Like  how would you do that? This is what it did. I mean, this is so simple,
but it's incredible. What is the k

In [8]:
splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap = 100)
chunks = splitter.create_documents([transcript])

In [9]:
len(chunks)

64

In [10]:
chunks[22]

Document(metadata={}, page_content="on a conscious model or something that's capable of self-improvement\nor anything like that. You know, I ... people have very different views of what\nthe big AI risks are going to be. And I myself have like\nevolved on thinking about where we're going to see those. I continue to believe there will come\nvery powerful models that people can misuse in big ways. People talk a lot about the potential\nfor new kinds of bioterror, models that can present\nlike a real cybersecurity challenge, models that are capable\nof self-improvement in a way that leads\nto some sort of loss of control. So I think there are big risks there. And then there's a lot of other stuff, which honestly is kind of\nwhat I think, many people mean, where people talk about disinformation or models saying things")

In [11]:
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
vector_store = FAISS.from_documents(chunks, embeddings)

In [12]:
vector_store.index_to_docstore_id

{0: 'aae5627d-68f5-49eb-963e-2a136b8d8e0e',
 1: '49843663-a3f0-4f9c-8bc2-a30722517654',
 2: 'db15e8a9-92e7-49be-bfb1-cf79eab85486',
 3: '5c67410f-f1f2-4579-ba24-86ba96e15dbc',
 4: '7fa37d91-a6db-43de-8e43-ce7fb4e4a8cd',
 5: '99735b2a-f21a-467a-b019-0151de481baa',
 6: '2febda66-0a6e-42b7-b3f5-5c99defcd13a',
 7: '5da78733-8051-4148-b03a-b620e5b9b967',
 8: '201362f5-dba9-46d0-95c4-82f44e81f342',
 9: '7665b605-f638-4786-9a2b-c901699820ce',
 10: '094eebfa-0381-4685-92d4-2382bcceddc2',
 11: '0e7599d6-513b-41cb-bbee-36c186be25b7',
 12: '897e725c-a608-4cc0-a043-b114be7ed075',
 13: 'edbcc9a1-59ea-420b-ab83-257701249a63',
 14: '57a8b0e3-db8c-4ec8-a349-1ad3e5226996',
 15: 'bd40e1af-72c3-4b4e-887a-9674092988f2',
 16: 'f53da324-5d39-4e00-838c-fc62cb4c1520',
 17: '87a49f05-bb69-44d4-9576-676de250e587',
 18: '4c91f304-ac52-4a63-abc8-4d590b258531',
 19: '6b421c86-cbd0-4f18-b3d0-4859a3561220',
 20: '00425d8d-961e-431e-9ab8-bf5c1b724caf',
 21: 'f6d1451d-6bb2-4eed-acc7-73167187efe0',
 22: 'beeb2489-8c61-

In [13]:
vector_store.get_by_ids(['0eaccc7b-87f3-40eb-a309-0ba352e7bbdd'])

[Document(id='0eaccc7b-87f3-40eb-a309-0ba352e7bbdd', metadata={}, page_content="at the end of this conversation, you're in control, you pick. But have you considered it\nfrom this person's perspective or the impact it will have on this? I think AI can help us be wiser and make better collective governance\ndecisions than we could before. CA: We're out of time. Sam, I'll give you the last word. What kind of world do you believe, all things considered,\nyour son will grow up into? SA: I remember -- it's so long ago now, I don't know when the first iPad came out. Is it like 15 years, something like that? I remember watching\na YouTube video at the time, of like a little toddler sitting in a doctor's office\nwaiting room or something, and there was a magazine,")]

In [14]:
retriever = vector_store.as_retriever(search_type = "similarity", search_kwargs = {"k":4})

In [15]:
retriever

VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x11a3c63e0>, search_kwargs={'k': 4})

In [16]:
retriever.invoke("What did Sam Atlan say about OpenAI?")

[Document(id='220a5de4-0428-4bc4-90e0-729f9b70a753', metadata={}, page_content="to one dimension here. You know, probably some\nof the good things are true and probably some\nof the criticism is true. In terms of OpenAI, our goal is to make AGI and distribute it, make it safe, for the broad\nbenefit of humanity. I think by all accounts,\nwe have done a lot in that direction. Clearly our tactics\nhave shifted over time. I think we didn't really know\nwhat we were going to be when we grew up. We didn't think we would have\nto build a company around this. We learned a lot about how it goes and the realities of what these systems\nwere going to take from capital. But I think we've been, in terms of putting incredibly capable AI with a high degree of safety\nin the hands of a lot of people, and giving them tools to sort of do whatever amazing"),
 Document(id='6b744f61-6e1d-4731-bba3-9d290fcd15f8', metadata={}, page_content="that was much more powerful than anything being done. I mean, it is

In [17]:
llm = ChatOpenAI(model = "gpt-4o-mini", temperature = 0.2)

In [19]:
prompt = PromptTemplate(
    template = '''
You are a helpful assistant. Answer ONLY from the provided transcript context. 
If the context is insufficient, just say you don't know.
{context}
Question: {question}
''',
input_variables=['context', 'question']
)

In [20]:
question = "Was there any discussion about Deepseek? If yes, what was being discussed about it?"
retrieved_docs = retriever.invoke(question)

In [21]:
retrieved_docs

[Document(id='897e725c-a608-4cc0-a043-b114be7ed075', metadata={}, page_content="than DeepSeek allegedly spent, although I know there's\ncontroversy around that. Are you confident that the actual\nbetter model is going to be recognized? Or are you actually like, isn't this in some ways life-threatening\nto the notion that, yeah, by going to massive scale,\ntens of billions of dollars of investment, we can maintain an incredible lead. SA: All day long, I call people\nand beg them to give us their GPUs. We are so incredibly constrained. Our growth is going like this. DeepSeek launched,\nand it didn’t seem to impact it. There's other stuff that's happening. CA: Tell us about the growth, actually. You gave me a shocking\nnumber backstage there. SA: I have never seen"),
 Document(id='0e7599d6-513b-41cb-bbee-36c186be25b7', metadata={}, page_content="hosted our first community session to kind of decide the parameters\nof our open-source model and how we want to shape it. We're going to do a ve

In [22]:
context = "\n\n".join(doc.page_content for doc in retrieved_docs)
context

"than DeepSeek allegedly spent, although I know there's\ncontroversy around that. Are you confident that the actual\nbetter model is going to be recognized? Or are you actually like, isn't this in some ways life-threatening\nto the notion that, yeah, by going to massive scale,\ntens of billions of dollars of investment, we can maintain an incredible lead. SA: All day long, I call people\nand beg them to give us their GPUs. We are so incredibly constrained. Our growth is going like this. DeepSeek launched,\nand it didn’t seem to impact it. There's other stuff that's happening. CA: Tell us about the growth, actually. You gave me a shocking\nnumber backstage there. SA: I have never seen\n\nhosted our first community session to kind of decide the parameters\nof our open-source model and how we want to shape it. We're going to do a very\npowerful open-source model. I think this is important. We're going to do something\nnear the frontier, I think, better than any current\nopen-source model 

In [23]:
final_prompt = prompt.invoke({'context': context, 'question':question})

In [24]:
final_prompt

StringPromptValue(text="\nYou are a helpful assistant. Answer ONLY from the provided transcript context. \nIf the context is insufficient, just say you don't know.\nthan DeepSeek allegedly spent, although I know there's\ncontroversy around that. Are you confident that the actual\nbetter model is going to be recognized? Or are you actually like, isn't this in some ways life-threatening\nto the notion that, yeah, by going to massive scale,\ntens of billions of dollars of investment, we can maintain an incredible lead. SA: All day long, I call people\nand beg them to give us their GPUs. We are so incredibly constrained. Our growth is going like this. DeepSeek launched,\nand it didn’t seem to impact it. There's other stuff that's happening. CA: Tell us about the growth, actually. You gave me a shocking\nnumber backstage there. SA: I have never seen\n\nhosted our first community session to kind of decide the parameters\nof our open-source model and how we want to shape it. We're going to do

In [25]:
response = llm.invoke(final_prompt)
print(response.content)

Yes, there was discussion about DeepSeek. The conversation touched on the impact of DeepSeek's launch, with SA mentioning that it didn't seem to affect their growth. There was also a comparison of spending, with CA noting that they seem to be spending significantly more than DeepSeek. Additionally, SA expressed a belief that open-source models have an important place in the ecosystem, indicating that they were late to act on that but plan to do it well now.


Chaining

In [26]:
from langchain_core.runnables import RunnableParallel, RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser

In [27]:
def format_docs(retrieved_docs):
    context = '\n\n'.join(doc.page_content for doc in retrieved_docs)
    return context

In [28]:
parallel_chain = RunnableParallel(
    {
        'context': retriever|RunnableLambda(format_docs),
        'question': RunnablePassthrough()
    }
)

In [29]:
parallel_chain.invoke('What did Sam Altan say about their new developments?')

{'context': 'Chris Anderson: Sam, welcome to TED. Thank you so much for coming. Sam Altman: Thank you. It\'s an honor. CA: Your company has been releasing\ncrazy insane new models   pretty much every other week\nit feels like. I\'ve been playing with a couple of them. I\'d like to show you\nwhat I\'ve been playing. So, Sora, this is the image\nand video generator. I asked Sora this: What will it look like when you share\nsome shocking revelations here at TED? You want to see\nhow it imagined it, you know? (Laughter) I mean, not bad, right? How would you grade that? Five fingers on all hands. SA: Very close to what I\'m wearing,\nyou know, it\'s good. CA: I\'ve never seen you\n\nare getting more capable, we have a system where we all\nget to understand what\'s being released in the world. I think this is really important. And I think we’re not far away\nfrom models that are going to be of great public interest in that sense. CA: So Sam, I asked\nyour o1-pro reasoning model, which is inc

In [30]:
 parser = StrOutputParser()

In [31]:
chain = parallel_chain | prompt | llm | parser

In [32]:
chain.invoke("What's the key takeaway from this video?")

'The key takeaway from the video is the discussion around the development of advanced AI and the responsibilities that come with it. There is a recognition of the potential risks and the importance of creating a framework to mitigate those risks while also emphasizing the need for collective input from the global population rather than decisions being made solely by a small elite. The conversation highlights the balance between innovation and safety in AI development, as well as the importance of transparency and trust in the process.'