In [1]:
import os
from dotenv import load_dotenv
assert os.getenv("OPENAI_API_KEY") is not None

import gradio as gr

from langchain_openai import OpenAIEmbeddings
from langchain_openai.chat_models import ChatOpenAI

from langchain_community.vectorstores.faiss import FAISS

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.messages import get_buffer_string
from langchain_core.runnables import RunnableLambda
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_core.vectorstores import VectorStoreRetriever
from langchain.document_loaders import YoutubeLoader
from langchain.schema import format_document
from langchain.prompts.prompt import PromptTemplate

from langchain.memory import ConversationBufferMemory

from operator import itemgetter

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def generate_retriever_from_videourl(youtube_url:str) -> VectorStoreRetriever:
    transcript_loader = YoutubeLoader.from_youtube_url(youtube_url, language="en")
    transcript = transcript_loader.load()
    oai_embedding_model = OpenAIEmbeddings()
    vector_store = FAISS.from_documents(transcript, oai_embedding_model)
    faiss_retriever = vector_store.as_retriever()
    return faiss_retriever

In [5]:
def prompts_templates()-> tuple[PromptTemplate, PromptTemplate, PromptTemplate]:
    _template = """Given the following conversation and a follow up question, 
    rephrase the follow up question to be a standalone question.

    Chat History:
    {chat_history}
    Follow Up Input: {question}
    Standalone question:"""
    CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)

    template = """Answer the question based only on the following context:
    {context}

    Question: {question}
    """
    ANSWER_PROMPT = ChatPromptTemplate.from_template(template)

    DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")
    return CONDENSE_QUESTION_PROMPT, ANSWER_PROMPT, DEFAULT_DOCUMENT_PROMPT

In [6]:
def _combine_documents(
    docs, document_prompt, document_separator="\n\n"
):
    doc_strings = [format_document(doc, document_prompt) for doc in docs]
    return document_separator.join(doc_strings)

In [8]:
def create_chain(
    memory: ConversationBufferMemory, 
    faiss_retriever: VectorStoreRetriever, 
    CONDENSE_QUESTION_PROMPT: PromptTemplate, 
    ANSWER_PROMPT: PromptTemplate, 
    DEFAULT_DOCUMENT_PROMPT: PromptTemplate
):
    # First we add a step to load memory
    # This adds a "memory" key to the input object
    loaded_memory = RunnablePassthrough.assign(
        chat_history=RunnableLambda(memory.load_memory_variables) | itemgetter("history"),
    )
    # Now we calculate the standalone question
    standalone_question = {
        "standalone_question": {
            "question": lambda x: x["question"],
            "chat_history": lambda x: get_buffer_string(x["chat_history"]),
        }
        | CONDENSE_QUESTION_PROMPT
        | ChatOpenAI(temperature=0)
        | StrOutputParser(),
    }

    # Now we retrieve the documents
    retrieved_documents = {
        "docs": itemgetter("standalone_question") | faiss_retriever,
        "question": lambda x: x["standalone_question"],
    }

    # Now we construct the inputs for the final prompt
    final_inputs = {
        "context": lambda x: _combine_documents(docs= x["docs"], document_prompt=DEFAULT_DOCUMENT_PROMPT),
        "question": itemgetter("question"),
    }
    # And finally, we do the part that returns the answers
    answer = {
        "answer": final_inputs | ANSWER_PROMPT | ChatOpenAI(), 
        "docs": itemgetter("docs"),
    }
    # And now we put it all together!
    standalone_question = loaded_memory | standalone_question 

    final_chain = standalone_question | retrieved_documents | answer
    return standalone_question, final_chain     

In [11]:
faiss_retriever = generate_retriever_from_videourl("https://www.youtube.com/watch?v=fJ9rUzIMcZQ")
memory = ConversationBufferMemory(
    return_messages=True, output_key="answer", input_key="question"
)
CONDENSE_QUESTION_PROMPT, ANSWER_PROMPT, DEFAULT_DOCUMENT_PROMPT = prompts_templates()
standalone_question, final_chain = create_chain(faiss_retriever=faiss_retriever, 
                                                memory=memory, 
                                                CONDENSE_QUESTION_PROMPT=CONDENSE_QUESTION_PROMPT, 
                                                ANSWER_PROMPT=ANSWER_PROMPT, 
                                                DEFAULT_DOCUMENT_PROMPT=DEFAULT_DOCUMENT_PROMPT)
def my_chat_function(message: str, history):
    input = {"question": message, }
    print(standalone_question.invoke(input))
    response = final_chain.invoke(input)
    memory.save_context(input, {"answer": response["answer"].content})
    memory.load_memory_variables({})

    return response["answer"].content
demo = gr.ChatInterface(my_chat_function, chatbot=gr.Chatbot(height=300), theme="soft")
demo.launch()

Running on local URL:  http://127.0.0.1:7865

To create a public link, set `share=True` in `launch()`.




{'standalone_question': 'What is the video about?'}
{'standalone_question': 'Who wrote the lyrics and music of the song "Bohemian Rhapsody" by Queen?'}
