In [None]:
%pip install langchain -q
%pip install youtube_transcript_api faiss-gpu -q 
%pip install langchain-google-genai -qU

In [None]:
import os
os.environ["GOOGLE_API_KEY"] = "YOUR_KEY"

In [None]:
from langchain.document_loaders import YoutubeLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import GooglePalmEmbeddings
from langchain.vectorstores import FAISS
from langchain import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_google_genai import ChatGoogleGenerativeAI

In [None]:
EMBEDDINGS = GooglePalmEmbeddings()

def create_db(video_url):
  loader = YoutubeLoader.from_youtube_url(video_url)
  transcript = loader.load()

  splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
  docs = splitter.split_documents(transcript)

  return FAISS.from_documents(docs, EMBEDDINGS)

In [None]:
def query_db(db, query, k=4):
  docs = db.similarity_search(query, k=k)

  page_content = " ".join([d.page_content for d in docs])

  llm = ChatGoogleGenerativeAI(model="gemini-pro", convert_system_message_to_human=True)

  prompt = PromptTemplate(
      input_variables=["question", "docs"],
      template="""
        You are a helpful assistant that can answer questions about YouTube videos
        based on the video's transcript. Given a question and a video transcript,
        answer the question in a comprehensive and informative way,
        using only the factual information from the transcript.

        If you don't have enough information to answer the question, say "I don't know."

        Question: {question}

        Transcript: {docs}
        """,
    )

  chain = prompt | llm | StrOutputParser()

  response = chain.invoke({"question": query, "docs": page_content})

  response = response.replace("\n", "")

  return response, docs

In [None]:
video_url = 'https://www.youtube.com/watch?v=bSHp7WVpPgc'
db = create_db(video_url)

In [None]:
import textwrap
query = "Can you summarize the video?"
response, docs = query_db(db, query)
print(textwrap.fill(response, width=85))

In [None]:
query = "What are they saying about opensoruce?"
response, docs = query_db(db, query)
print(textwrap.fill(response, width=85))