In [1]:
import os
from dotenv import load_dotenv
assert os.getenv("OPENAI_API_KEY") is not None

from typing import Any
import gradio as gr

from langchain_openai import OpenAIEmbeddings
from langchain_openai.chat_models import ChatOpenAI

from langchain_community.vectorstores.faiss import FAISS

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.messages import get_buffer_string
from langchain_core.runnables import RunnableLambda
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_core.vectorstores import VectorStoreRetriever
from langchain.document_loaders import YoutubeLoader
from langchain.schema import format_document
from langchain.prompts.prompt import PromptTemplate

from langchain.memory import ConversationBufferMemory

from operator import itemgetter

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class VideoQueryLLM():
    def __init__(self, youtube_url:str) -> None:
        self.youtube_url = youtube_url
        self.retriever = self.generate_retriever_from_videourl()
        self._init_prompts_templates()
        self.memory = ConversationBufferMemory(
            return_messages=True, output_key="answer", input_key="question"
        )
        self.standalone_question, self.final_chain = self.create_chain()

    def generate_retriever_from_videourl(self) -> VectorStoreRetriever:
        transcript_loader = YoutubeLoader.from_youtube_url(self.youtube_url, language="en")
        transcript = transcript_loader.load()
        oai_embedding_model = OpenAIEmbeddings()
        vector_store = FAISS.from_documents(transcript, oai_embedding_model)
        faiss_retriever = vector_store.as_retriever()
        return  faiss_retriever
    
    def _init_prompts_templates(self)-> None:
        _template = """Given the following conversation and a follow up question, 
        rephrase the follow up question to be a standalone question.

        Chat History:
        {chat_history}
        Follow Up Input: {question}
        Standalone question:"""
        self.CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)

        template = """Answer the question based only on the following context:
        {context}

        Question: {question}
        """
        self.ANSWER_PROMPT = ChatPromptTemplate.from_template(template)

        self.DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")

    def _combine_documents(
        self, docs, document_prompt, document_separator="\n\n"
        ):
        doc_strings = [format_document(doc, document_prompt) for doc in docs]
        return document_separator.join(doc_strings)

    def create_chain(self):
        # First we add a step to load memory
        # This adds a "memory" key to the input object
        loaded_memory = RunnablePassthrough.assign(
            chat_history=RunnableLambda(self.memory.load_memory_variables) | itemgetter("history"),
        )
        # Now we calculate the standalone question
        standalone_question = {
            "standalone_question": {
                "question": lambda x: x["question"],
                "chat_history": lambda x: get_buffer_string(x["chat_history"]),
            }
            | self.CONDENSE_QUESTION_PROMPT
            | ChatOpenAI(temperature=0)
            | StrOutputParser(),
        }

        # Now we retrieve the documents
        retrieved_documents = {
            "docs": itemgetter("standalone_question") | self.retriever,
            "question": lambda x: x["standalone_question"],
        }

        # Now we construct the inputs for the final prompt
        final_inputs = {
            "context": lambda x: self._combine_documents(docs= x["docs"], document_prompt=self.DEFAULT_DOCUMENT_PROMPT),
            "question": itemgetter("question"),
        }
        # And finally, we do the part that returns the answers
        answer = {
            "answer": final_inputs | self.ANSWER_PROMPT | ChatOpenAI(), 
            "docs": itemgetter("docs"),
        }
        # And now we put it all together!
        standalone_question = loaded_memory | standalone_question 

        final_chain = standalone_question | retrieved_documents | answer
        return standalone_question, final_chain 


In [3]:
def initialize_video_query_llm(youtube_url:str) -> None:
    global VideoQueryLLM_obj
    VideoQueryLLM_obj =  VideoQueryLLM(youtube_url)

    # Check if the YouTube link is valid.
    if not youtube_url:
        raise gr.Error('Paste a Youtube link')

    # Get the URL of the YouTube video.
    url = youtube_url.replace('watch?v=', 'embed/')
    print(url)

    # Create the HTML code for the embedded YouTube video.
    embed_html = f"<iframe width='560' height='315' src={url} title='YouTube video player' \
    frameborder='0' allow='accelerometer; autoplay; clipboard-write; encrypted-media; \
    gyroscope; picture-in-picture; web-share' allowfullscreen></iframe>"


    # Return the HTML code and an empty list.
    return embed_html, "Video transcripted and LLM chain initialized."


In [15]:
def my_chat_function(message: str, history):
    input = {"question": message, }
    if 'VideoQueryLLM_obj' not in globals():
        history.append((message, "Please provide a YouTube link first and Process Video"))
        return "", history
    print(VideoQueryLLM_obj.standalone_question.invoke(input))
    response = VideoQueryLLM_obj.final_chain.invoke(input)
    VideoQueryLLM_obj.memory.save_context(input, {"answer": response["answer"].content})
    VideoQueryLLM_obj.memory.load_memory_variables({})  
    history.append((message, response["answer"].content))
    return "", history

In [22]:
if 'VideoQueryLLM_obj' in globals():
    print("VideoQueryLLM_obj already exists... so deleting it.")
    del(VideoQueryLLM_obj)    


with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# Video GPT-3 Chatbot")
    gr.Markdown("Ask a question about the video and the chatbot will answer it.")
    gr.Markdown("Paste a YouTube link and let the conversation begin!")

    with gr.Row():
        with gr.Column():
            inp = gr.Textbox(label = "Enter YouTube URL here.")
            btn = gr.Button(value="Process Video")
            video = gr.HTML(label=True)
            disp = gr.Textbox(label="Status")
            
        with gr.Column():
            chatbot = gr.Chatbot(label="Ask me anything about the video!")
            msg = gr.Textbox(label="Type your message here.")
            clear = gr.ClearButton([msg, chatbot])

    msg.submit(my_chat_function, [msg, chatbot], [msg, chatbot])
    btn.click(initialize_video_query_llm, inputs=inp, outputs=[video, disp] )

demo.launch()

Running on local URL:  http://127.0.0.1:7872

To create a public link, set `share=True` in `launch()`.


