Skip to content

Commit

Permalink
Better UI for API-level errors.
Browse files Browse the repository at this point in the history
+ Reworked vector store loading in /api/completion
  • Loading branch information
matteocargnelutti committed Feb 13, 2024
1 parent 3a61f49 commit 6230bff
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 37 deletions.
13 changes: 12 additions & 1 deletion warc_gpt/static/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,18 @@ chatInput.addEventListener("submit", async (e) => {
messageInput.value = "";

} catch(err) {
messageInput.value = sanitizeString(message, false); // Put unprocessed message back in textarea
// Put unprocessed message back in textarea
messageInput.value = sanitizeString(message, false);

// Show error message
chatUI.insertAdjacentHTML("beforeend",
/*html*/`
<article class="message ai errror">
<p class="model">Error</p>
<p class="response">An error occurred while processing the request.</p>
</article>`
);

throw(err);
} finally {
messageInput.removeAttribute("disabled");
Expand Down
75 changes: 39 additions & 36 deletions warc_gpt/views/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,13 @@

litellm.telemetry = False

vector_store_cache = {
"chroma_client": None,
"embedding_model": None,
"chroma_collection": None,
}
""" Module-level "caching" for vector store connection. """


@current_app.route("/api/models")
def get_models():
Expand Down Expand Up @@ -153,59 +160,55 @@ def post_completion():
)

#
# Retrieve context - unless in no_rag mode
# Load vector store (from "cache" if available)
#

# Load vector store client and collection
# Shared at app-level through:
# - g.chroma_client
# - g.chroma_collection
try:
assert not no_rag

if "chroma_client" in g:
chroma_client = g.chroma_client
if vector_store_cache.get("embedding_model", None) is None:
embedding_model = SentenceTransformer(
model_name_or_path=environ["VECTOR_SEARCH_SENTENCE_TRANSFORMER_MODEL"],
device=environ["VECTOR_SEARCH_SENTENCE_TRANSFORMER_DEVICE"],
)
vector_store_cache["embedding_model"] = embedding_model
else:
embedding_model = vector_store_cache["embedding_model"]

assert embedding_model
except Exception:
current_app.logger.error(traceback.format_exc())
return jsonify({"error": "Could not load embedding model."}), 500

try:
if vector_store_cache.get("chroma_client", None) is None:
chroma_client = chromadb.PersistentClient(
path=environ["VECTOR_SEARCH_PATH"],
settings=chromadb.Settings(anonymized_telemetry=False),
)

g.chroma_client = chroma_client

if "chroma_collection" in g:
chroma_collection = g.chroma_collection
vector_store_cache["chroma_client"] = chroma_client
else:
chroma_collection = chroma_client.get_collection(
name=environ["VECTOR_SEARCH_COLLECTION_NAME"]
)
chroma_client = vector_store_cache["chroma_client"]

g.chroma_collection = chroma_collection

except AssertionError:
pass # no_rag mode
assert chroma_client
except Exception:
current_app.logger.error(traceback.format_exc())
return jsonify({"error": "Could not load vector store."}), 500
return jsonify({"error": "Could not load ChromaDB client."}), 500

# Load embedding model (Shared at app-level via g.embedding_model)
try:
assert not no_rag

if "embedding_model" in g:
embedding_model = g.embedding_model
else:
embedding_model = SentenceTransformer(
environ["VECTOR_SEARCH_SENTENCE_TRANSFORMER_MODEL"],
device=environ["VECTOR_SEARCH_SENTENCE_TRANSFORMER_DEVICE"],
if vector_store_cache.get("chroma_collection", None) is None:
chroma_collection = chroma_client.get_collection(
name=os.environ["VECTOR_SEARCH_COLLECTION_NAME"],
)
vector_store_cache["chroma_collection"] = chroma_collection
else:
chroma_collection = vector_store_cache["chroma_collection"]

g.embedding_model = embedding_model
except AssertionError:
pass # no_rag mode
assert chroma_collection
except Exception:
current_app.logger.error(traceback.format_exc())
return jsonify({"error": "Could not load embedding model."}), 500
return jsonify({"error": "Could not load ChromaDB collection."}), 500

#
# Retrieve context - unless in no_rag mode
#

# Retrieve context chunks
try:
Expand Down

0 comments on commit 6230bff

Please sign in to comment.