diff --git a/example-apps/chatbot-rag-app/api/app.py b/example-apps/chatbot-rag-app/api/app.py index 40b20d0a..661343ef 100644 --- a/example-apps/chatbot-rag-app/api/app.py +++ b/example-apps/chatbot-rag-app/api/app.py @@ -1,9 +1,7 @@ from flask import Flask, jsonify, request, Response from flask_cors import CORS -from queue import Queue from uuid import uuid4 -from chat import ask_question, parse_stream_message -import threading +from chat import ask_question import os import sys @@ -23,18 +21,8 @@ def api_chat(): if question is None: return jsonify({"msg": "Missing question from request JSON"}), 400 - stream_queue = Queue() session_id = request.args.get("session_id", str(uuid4())) - - print("Chat session ID: ", session_id) - - threading.Thread( - target=ask_question, args=(question, stream_queue, session_id) - ).start() - - return Response( - parse_stream_message(session_id, stream_queue), mimetype="text/event-stream" - ) + return Response(ask_question(question, session_id), mimetype="text/event-stream") @app.cli.command() diff --git a/example-apps/chatbot-rag-app/api/chat.py b/example-apps/chatbot-rag-app/api/chat.py index 8abe58bc..4f2e84d7 100644 --- a/example-apps/chatbot-rag-app/api/chat.py +++ b/example-apps/chatbot-rag-app/api/chat.py @@ -1,18 +1,10 @@ -from langchain.callbacks.base import BaseCallbackHandler -from langchain.chains import ConversationalRetrievalChain -from langchain.prompts.chat import ( - HumanMessagePromptTemplate, - SystemMessagePromptTemplate, - ChatPromptTemplate, -) -from langchain.prompts.prompt import PromptTemplate from langchain.vectorstores import ElasticsearchStore -from queue import Queue from llm_integrations import get_llm from elasticsearch_client import ( elasticsearch_client, get_elasticsearch_chat_message_history, ) +from flask import render_template, stream_with_context, current_app import json import os @@ -21,135 +13,50 @@ "ES_INDEX_CHAT_HISTORY", "workplace-app-docs-chat-history" ) ELSER_MODEL = os.getenv("ELSER_MODEL", ".elser_model_2") -POISON_MESSAGE = "~~~END~~~" SESSION_ID_TAG = "[SESSION_ID]" SOURCE_TAG = "[SOURCE]" DONE_TAG = "[DONE]" - -class QueueCallbackHandler(BaseCallbackHandler): - def __init__( - self, - queue: Queue, - ): - self.queue = queue - self.in_human_prompt = True - - def on_retriever_end(self, documents, *, run_id, parent_run_id=None, **kwargs): - if len(documents) > 0: - for doc in documents: - source = { - "name": doc.metadata["name"], - "page_content": doc.page_content, - "url": doc.metadata["url"], - "icon": doc.metadata["category"], - "updated_at": doc.metadata.get("updated_at", None), - } - self.queue.put(f"{SOURCE_TAG} {json.dumps(source)}") - - def on_llm_new_token(self, token, **kwargs): - if not self.in_human_prompt: - self.queue.put(token) - - def on_llm_start( - self, - serialized, - prompts, - *, - run_id, - parent_run_id=None, - tags=None, - metadata=None, - **kwargs, - ): - self.in_human_prompt = prompts[0].startswith("Human:") - - def on_llm_end(self, response, *, run_id, parent_run_id=None, **kwargs): - if not self.in_human_prompt: - self.queue.put(POISON_MESSAGE) - - store = ElasticsearchStore( es_connection=elasticsearch_client, index_name=INDEX, strategy=ElasticsearchStore.SparseVectorRetrievalStrategy(model_id=ELSER_MODEL), ) -general_system_template = """ -Human: Use the following passages to answer the user's question. -Each passage has a SOURCE which is the title of the document. When answering, give the source name of the passages you are answering from, put them in a comma seperated list, prefixed at the start with SOURCES: $sources then print an empty line. - -Example: - -Question: What is the meaning of life? -Response: -The meaning of life is 42. \n -SOURCES: Hitchhiker's Guide to the Galaxy \n - -If you don't know the answer, just say that you don't know, don't try to make up an answer. - ----- -{context} ----- +@stream_with_context +def ask_question(question, session_id): + yield f"data: {SESSION_ID_TAG} {session_id}\n\n" + current_app.logger.debug("Chat session ID: %s", session_id) -""" -general_user_template = "Question: {question}" -qa_prompt = ChatPromptTemplate.from_messages( - [ - SystemMessagePromptTemplate.from_template(general_system_template), - HumanMessagePromptTemplate.from_template(general_user_template), - ] -) + chat_history = get_elasticsearch_chat_message_history( + INDEX_CHAT_HISTORY, session_id + ) -document_prompt = PromptTemplate( - input_variables=["page_content", "name"], - template=""" ---- -NAME: "{name}" -PASSAGE: -{page_content} ---- -""", -) + if len(chat_history.messages) > 0: + # create a condensed question + condense_question_prompt = render_template( + 'condense_question_prompt.txt', question=question, + chat_history=chat_history.messages) + question = get_llm().invoke(condense_question_prompt).content -retriever = store.as_retriever() -llm = get_llm() -chat = ConversationalRetrievalChain.from_llm( - llm=llm, - retriever=store.as_retriever(), - return_source_documents=True, - combine_docs_chain_kwargs={"prompt": qa_prompt, "document_prompt": document_prompt}, - verbose=True, -) + current_app.logger.debug('Question: %s', question) + docs = store.as_retriever().invoke(question) + for doc in docs: + doc_source = {**doc.metadata, 'page_content': doc.page_content} + current_app.logger.debug('Retrieved document passage from: %s', doc.metadata['name']) + yield f'data: {SOURCE_TAG} {json.dumps(doc_source)}\n\n' -def parse_stream_message(session_id, queue: Queue): - yield f"data: {SESSION_ID_TAG} {session_id}\n\n" + qa_prompt = render_template('rag_prompt.txt', question=question, docs=docs) - message = None - break_out_flag = False - while True: - message = queue.get() - for line in message.splitlines(): - if line == POISON_MESSAGE: - break_out_flag = True - break - yield f"data: {line}\n\n" - if break_out_flag: - break + answer = '' + for chunk in get_llm().stream(qa_prompt): + yield f'data: {chunk.content}\n\n' + answer += chunk.content yield f"data: {DONE_TAG}\n\n" + current_app.logger.debug('Answer: %s', answer) - -def ask_question(question, queue, session_id): - chat_history = get_elasticsearch_chat_message_history( - INDEX_CHAT_HISTORY, session_id - ) - result = chat( - {"question": question, "chat_history": chat_history.messages}, - callbacks=[QueueCallbackHandler(queue)], - ) - - chat_history.add_user_message(result["question"]) - chat_history.add_ai_message(result["answer"]) + chat_history.add_user_message(question) + chat_history.add_ai_message(answer) diff --git a/example-apps/chatbot-rag-app/api/llm_integrations.py b/example-apps/chatbot-rag-app/api/llm_integrations.py index 5d928d18..38a5a46d 100644 --- a/example-apps/chatbot-rag-app/api/llm_integrations.py +++ b/example-apps/chatbot-rag-app/api/llm_integrations.py @@ -5,15 +5,15 @@ LLM_TYPE = os.getenv("LLM_TYPE", "openai") -def init_openai_chat(): +def init_openai_chat(temperature): OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") - return ChatOpenAI(openai_api_key=OPENAI_API_KEY, streaming=True, temperature=0.2) -def init_vertex_chat(): + return ChatOpenAI(openai_api_key=OPENAI_API_KEY, streaming=True, temperature=temperature) +def init_vertex_chat(temperature): VERTEX_PROJECT_ID = os.getenv("VERTEX_PROJECT_ID") VERTEX_REGION = os.getenv("VERTEX_REGION", "us-central1") vertexai.init(project=VERTEX_PROJECT_ID, location=VERTEX_REGION) - return ChatVertexAI(streaming=True, temperature=0.2) -def init_azure_chat(): + return ChatVertexAI(streaming=True, temperature=temperature) +def init_azure_chat(temperature): OPENAI_VERSION=os.getenv("OPENAI_VERSION", "2023-05-15") BASE_URL=os.getenv("OPENAI_BASE_URL") OPENAI_API_KEY=os.getenv("OPENAI_API_KEY") @@ -24,8 +24,8 @@ def init_azure_chat(): openai_api_version=OPENAI_VERSION, openai_api_key=OPENAI_API_KEY, streaming=True, - temperature=0.2) -def init_bedrock(): + temperature=temperature) +def init_bedrock(temperature): AWS_ACCESS_KEY=os.getenv("AWS_ACCESS_KEY") AWS_SECRET_KEY=os.getenv("AWS_SECRET_KEY") AWS_REGION=os.getenv("AWS_REGION") @@ -35,7 +35,7 @@ def init_bedrock(): client=BEDROCK_CLIENT, model_id=AWS_MODEL_ID, streaming=True, - model_kwargs={"temperature":0.2}) + model_kwargs={"temperature":temperature}) MAP_LLM_TYPE_TO_CHAT_MODEL = { "azure": init_azure_chat, @@ -44,8 +44,8 @@ def init_bedrock(): "vertex": init_vertex_chat, } -def get_llm(): +def get_llm(temperature=0.2): if not LLM_TYPE in MAP_LLM_TYPE_TO_CHAT_MODEL: raise Exception("LLM type not found. Please set LLM_TYPE to one of: " + ", ".join(MAP_LLM_TYPE_TO_CHAT_MODEL.keys()) + ".") - return MAP_LLM_TYPE_TO_CHAT_MODEL[LLM_TYPE]() \ No newline at end of file + return MAP_LLM_TYPE_TO_CHAT_MODEL[LLM_TYPE](temperature=temperature) diff --git a/example-apps/chatbot-rag-app/api/templates/condense_question_prompt.txt b/example-apps/chatbot-rag-app/api/templates/condense_question_prompt.txt new file mode 100644 index 00000000..6a79142b --- /dev/null +++ b/example-apps/chatbot-rag-app/api/templates/condense_question_prompt.txt @@ -0,0 +1,8 @@ +Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language. + +Chat history: +{% for dialogue_turn in chat_history -%} +{% if dialogue_turn.type == 'human' %}Question: {{ dialogue_turn.content }}{% elif dialogue_turn.type == 'ai' %}Response: {{ dialogue_turn.content }}{% endif %} +{% endfor -%} +Follow Up Question: {{ question }} +Standalone question: diff --git a/example-apps/chatbot-rag-app/api/templates/rag_prompt.txt b/example-apps/chatbot-rag-app/api/templates/rag_prompt.txt new file mode 100644 index 00000000..3e191c9f --- /dev/null +++ b/example-apps/chatbot-rag-app/api/templates/rag_prompt.txt @@ -0,0 +1,26 @@ +Use the following passages to answer the user's question. +Each passage has a NAME which is the title of the document. When answering, give the source name of the passages you are answering from at the end. Put them in a comma separated list, prefixed with SOURCES:. + +Example: + +Question: What is the meaning of life? +Response: +The meaning of life is 42. + +SOURCES: Hitchhiker's Guide to the Galaxy + +If you don't know the answer, just say that you don't know, don't try to make up an answer. + +---- + +{% for doc in docs -%} +--- +NAME: {{ doc.metadata.name }} +PASSAGE: +{{ doc.page_content }} +--- + +{% endfor -%} +---- +Question: {{ question }} +Response: