In [None]:
!pip install langchain langchain_community langchain_google_genai youtube-transcript-api faiss-cpu keybert

In [2]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain_community.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate
from langchain.document_loaders import YoutubeLoader
from langchain.chains import LLMChain
from keybert import KeyBERT

In [3]:
video_url = "https://www.youtube.com/watch?v=KKNCiRWd_j0"
loader = YoutubeLoader.from_youtube_url(video_url)
transcript = loader.load()

In [4]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=2000,
    chunk_overlap=100,
    separators=[" "],
)

docs = text_splitter.split_documents(transcript)

texts = [doc.page_content for doc in docs]

In [5]:
google_api_key = 'AIzaSyARn_PcqweM5MXHxYaIWGQcf-BDJMP1bDw'
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=google_api_key)
vector_store = FAISS.from_texts(texts, embedding=embeddings)

vector_store.save_local("faiss_index")

template = """
        You are a helpful assistant that can answer questions about YouTube videos
        based on the video's transcript: {docs}

        If you feel like you don't have enough information to answer the question, just say "I don't know".
        \n\n

        Context:\n {docs}\n
        Question: \n{question}\n

        Answer:
"""

In [7]:
model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.4, google_api_key=google_api_key)
prompt = PromptTemplate(template=template, input_variables=["docs", "question"])

chain = load_qa_chain(model, chain_type="stuff", prompt=prompt, document_variable_name="docs")

question = "Who is the speaker and what is his belief?"

result = chain.run(input_documents=docs, question=question)
formatted_result = "\n".join(result.split(". "))

print(formatted_result)

The speaker is Mustafa Suleyman, a researcher who has been working on AI for almost 15 years
He believes that AI should be understood as a new digital species and that it has the potential to be a powerful tool for good, but also that it is important to be aware of the risks and to take steps to mitigate them.


In [8]:
summary_template = """
        You are a helpful assistant that can summarize YouTube videos
        based on the video's transcript: {docs}

    Summary:
"""

summary_prompt = PromptTemplate(template=summary_template, input_variables=["docs"])

summary_chain = LLMChain(llm=model, prompt=summary_prompt)

  summary_chain = LLMChain(llm=model, prompt=summary_prompt)


In [9]:
def summarize_transcript(docs):
    combined_text = " ".join([doc.page_content for doc in docs])
    summary = summary_chain.run(docs=combined_text)
    formatted_summary = "\n".join(summary.split(". "))
    return formatted_summary

summary_text = summarize_transcript(docs)
print("SUMMARY OF THE VIDEO:\n")
print(summary_text)

SUMMARY OF THE VIDEO:

Mustafa Suleyman, an AI researcher, believes that AI is best understood as a new digital species
He argues that AI is not merely a tool, but rather a dynamic and emergent entity with the potential to profoundly impact humanity
Suleyman emphasizes the importance of safety and ethical considerations in the development and deployment of AI, and he suggests that we should think of AI as a reflection of humanity itself, with the potential to embody our best qualities
He cautions against the "pessimism aversion trap" and highlights the need to confront potential risks, such as autonomy and recursive self-improvement, while also recognizing that we are still some way from these more advanced capabilities
Suleyman believes that by injecting the best parts of ourselves into AI, we can avoid its potential downsides and harness its immense potential for good.


In [11]:
kw_model = KeyBERT()
combined_text = " ".join([doc.page_content for doc in docs])
keywords = kw_model.extract_keywords(combined_text, top_n=10)

print("KEYWORDS EXTRACTED FROM THE VIDEO:\n")
for keyword in keywords:
    print(f"{keyword[0].upper()}")

KEYWORDS EXTRACTED FROM THE VIDEO:

AI
AIS
FRINGE
CIVILIZATION
AGI
IQ
TECHNOLOGICAL
INTELLIGENCE
TECHNOLOGISTS
IMAGINATION
