In [9]:
!pip install langchain_community langchain_groq youtube_transcript_api faiss-cpu



In [10]:
from langchain.document_loaders import YoutubeLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain_groq import ChatGroq
from langchain.vectorstores import FAISS
from langchain.chains import LLMChain
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate
)
import textwrap

In [11]:
embeddings = HuggingFaceEmbeddings()
     

  embeddings = HuggingFaceEmbeddings()


In [12]:
def create_db_from_youtube_video_url(video_url):
  loader = YoutubeLoader.from_youtube_url(video_url)
  transcript = loader.load()

  text_splitter = RecursiveCharacterTextSplitter(chunk_size = 2000, chunk_overlap = 100)
  docs = text_splitter.split_documents(transcript)
  db = FAISS.from_documents(docs, embeddings)
  return db
     

In [None]:
def get_response_from_query(db, query, k = 4):
  docs = db.similarity_search(query, k = k)
  docs_page_content = " ".join([d.page_content for d in docs])

  chat = ChatGroq(api_key = "your groq api", model = "llama-3.3-70b-versatile", temperature=0)

  template = """You are a helpful assistant that that can answer questions about youtube videos
        based on the video's transcript: {docs}

        Only use the factual information from the transcript to answer the question.

        If you feel like you don't have enough information to answer the question, say "I don't know"."""

  system_message_prompt = SystemMessagePromptTemplate.from_template(template)
  human_template = "Answer the following question: {question}"

  human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)

  chat_prompt = ChatPromptTemplate.from_messages(
      [system_message_prompt, human_message_prompt]
  )

  chain = LLMChain(llm = chat, prompt = chat_prompt)

  response = chain.run(question = query, docs = docs_page_content)

  response = response.replace("\n", "")

  return response, docs

In [14]:
video_url = "https://youtu.be/ZY2JbLtbDWo?si=TIvzY-X94FRuFZ44"
db = create_db_from_youtube_video_url(video_url)
query = "what is this video about?"
response, docs = get_response_from_query(db, query)
print(textwrap.fill(response, width = 70))

This video appears to be about a cricket match, specifically a 2014
IPL (Indian Premier League) elimination match, with commentary on the
game as it unfolds, including descriptions of shots, wickets, and
player performances.


In [15]:
print(docs)

[Document(metadata={'source': 'ZY2JbLtbDWo'}, page_content="[Music] that's what they're playing for they hope that they can get into the final and can contest that and be champions for 2014 finally EG back Third Man back it's time who will stay who will go good swing first up it's humid here in Mumbai particularly muggy tonight shot that's a one leg side now offside for four short four [Applause] runs fraction short it doesn't need much for Michael hussy to find that fence oh he waited for it who that's a terrific yes back maximum slower Ball but it still got this away and has gone so fast and it beats the man out there at Deep Cover [Music] Point that's big it is very big and I think we probably would have done the same thing on this round Bal him jja gets his man hussy swinging across the line and the sound of Timber so the chenai Super Kings finally get the Breakthrough Anderson goes back another [Applause] six he goes piig again Anderson not this time held out but he's done some da