Merge branch 'main' into pr/115

arc53 · Feb 24, 2023 · 287f75d · 287f75d
2 parents 18b7402 + 3e5a686
commit 287f75d
Show file tree

Hide file tree

Showing 19 changed files with 911 additions and 283 deletions.
diff --git a/.gitignore b/.gitignore
@@ -108,7 +108,7 @@ venv/
 ENV/
 env.bak/
 venv.bak/
-
+.flaskenv
 # Spyder project settings
 .spyderproject
 .spyproject

diff --git a/application/Dockerfile b/application/Dockerfile
@@ -1,10 +1,19 @@
-FROM python:3.9
+FROM python:3.11-slim-bullseye as builder
 
+# Tiktoken requires Rust toolchain, so build it in a separate stage
+RUN apt-get update && apt-get install -y gcc curl
+RUN curl https://sh.rustup.rs -sSf | sh -s -- -y && apt-get install --reinstall libc6-dev -y
+ENV PATH="/root/.cargo/bin:${PATH}"
+RUN pip install --upgrade pip && pip install tiktoken==0.1.2
+
+FROM python:3.11-slim-bullseye
+# Copy pre-built packages from builder stage
+COPY --from=builder /usr/local/lib/python3.11/site-packages/ /usr/local/lib/python3.11/site-packages/
 WORKDIR /app
 COPY . /app
-RUN pip install --no-cache-dir -r requirements.txt
 ENV FLASK_APP=app.py
 ENV FLASK_ENV=development
+RUN pip install -r requirements.txt
 
 EXPOSE 5000
 

diff --git a/application/app.py b/application/app.py
@@ -9,7 +9,7 @@
 from langchain.chains.question_answering import load_qa_chain
 from langchain.embeddings import OpenAIEmbeddings, HuggingFaceHubEmbeddings, CohereEmbeddings, HuggingFaceInstructEmbeddings
 from langchain.prompts import PromptTemplate
-
+from error import bad_request
 # os.environ["LANGCHAIN_HANDLER"] = "langchain"
 
 if os.getenv("LLM_NAME") is not None:
@@ -74,6 +74,7 @@ def api_answer():
     data = request.get_json()
     question = data["question"]
     history = data["history"]
+    print('-'*5)
     if not api_key_set:
         api_key = data["api_key"]
     else:
@@ -83,62 +84,68 @@ def api_answer():
     else:
         embeddings_key = os.getenv("EMBEDDINGS_KEY")
 
+    # use try and except  to check for exception
+    try:
 
-    # check if the vectorstore is set
-    if "active_docs" in data:
-        vectorstore = "vectors/" + data["active_docs"]
-        if data['active_docs'] == "default":
+        # check if the vectorstore is set
+        if "active_docs" in data:
+            vectorstore = "vectors/" + data["active_docs"]
+            if data['active_docs'] == "default":
+                vectorstore = ""
+        else:
             vectorstore = ""
-    else:
-        vectorstore = ""
-
-    # loading the index and the store and the prompt template
-    # Note if you have used other embeddings than OpenAI, you need to change the embeddings
-    if embeddings_choice == "openai_text-embedding-ada-002":
-        docsearch = FAISS.load_local(vectorstore, OpenAIEmbeddings(openai_api_key=embeddings_key))
-    elif embeddings_choice == "huggingface_sentence-transformers/all-mpnet-base-v2":
-        docsearch = FAISS.load_local(vectorstore, HuggingFaceHubEmbeddings())
-    elif embeddings_choice == "huggingface_hkunlp/instructor-large":
-        docsearch = FAISS.load_local(vectorstore, HuggingFaceInstructEmbeddings())
-    elif embeddings_choice == "cohere_medium":
-        docsearch = FAISS.load_local(vectorstore, CohereEmbeddings(cohere_api_key=embeddings_key))
-
-    # create a prompt template
-    if history:
-        history = json.loads(history)
-        template_temp = template_hist.replace("{historyquestion}", history[0]).replace("{historyanswer}", history[1])
-        c_prompt = PromptTemplate(input_variables=["summaries", "question"], template=template_temp, template_format="jinja2")
-    else:
-        c_prompt = PromptTemplate(input_variables=["summaries", "question"], template=template, template_format="jinja2")
-
-    if llm_choice == "openai":
-        llm = OpenAI(openai_api_key=api_key, temperature=0)
-    elif llm_choice == "manifest":
-        llm = ManifestWrapper(client=manifest, llm_kwargs={"temperature": 0.001, "max_tokens": 2048})
-    elif llm_choice == "huggingface":
-        llm = HuggingFaceHub(repo_id="bigscience/bloom", huggingfacehub_api_token=api_key)
-    elif llm_choice == "cohere":
-        llm = Cohere(model="command-xlarge-nightly", cohere_api_key=api_key)
-
-    qa_chain = load_qa_chain(llm=llm, chain_type="map_reduce",
-                             combine_prompt=c_prompt)
-
-    chain = VectorDBQA(combine_documents_chain=qa_chain, vectorstore=docsearch, k=4)
-
-    # fetch the answer
-    result = chain({"query": question})
-    print(result)
-
-    # some formatting for the frontend
-    result['answer'] = result['result']
-    result['answer'] = result['answer'].replace("\\n", "<br>")
-    result['answer'] = result['answer'].replace("SOURCES:", "")
-    # mock result
-    # result = {
-    #     "answer": "The answer is 42",
-    #     "sources": ["https://en.wikipedia.org/wiki/42_(number)", "https://en.wikipedia.org/wiki/42_(number)"]
-    # }
-    return result
+
+        # loading the index and the store and the prompt template
+        # Note if you have used other embeddings than OpenAI, you need to change the embeddings
+        if embeddings_choice == "openai_text-embedding-ada-002":
+            docsearch = FAISS.load_local(vectorstore, OpenAIEmbeddings(openai_api_key=embeddings_key))
+        elif embeddings_choice == "huggingface_sentence-transformers/all-mpnet-base-v2":
+            docsearch = FAISS.load_local(vectorstore, HuggingFaceHubEmbeddings())
+        elif embeddings_choice == "huggingface_hkunlp/instructor-large":
+            docsearch = FAISS.load_local(vectorstore, HuggingFaceInstructEmbeddings())
+        elif embeddings_choice == "cohere_medium":
+            docsearch = FAISS.load_local(vectorstore, CohereEmbeddings(cohere_api_key=embeddings_key))
+
+        # create a prompt template
+        if history:
+            history = json.loads(history)
+            template_temp = template_hist.replace("{historyquestion}", history[0]).replace("{historyanswer}", history[1])
+            c_prompt = PromptTemplate(input_variables=["summaries", "question"], template=template_temp, template_format="jinja2")
+        else:
+            c_prompt = PromptTemplate(input_variables=["summaries", "question"], template=template, template_format="jinja2")
+
+        if llm_choice == "openai":
+            llm = OpenAI(openai_api_key=api_key, temperature=0)
+        elif llm_choice == "manifest":
+            llm = ManifestWrapper(client=manifest, llm_kwargs={"temperature": 0.001, "max_tokens": 2048})
+        elif llm_choice == "huggingface":
+            llm = HuggingFaceHub(repo_id="bigscience/bloom", huggingfacehub_api_token=api_key)
+        elif llm_choice == "cohere":
+            llm = Cohere(model="command-xlarge-nightly", cohere_api_key=api_key)
+
+        qa_chain = load_qa_chain(llm=llm, chain_type="map_reduce",
+                                combine_prompt=c_prompt)
+
+        chain = VectorDBQA(combine_documents_chain=qa_chain, vectorstore=docsearch, k=4)
+
+
+        # fetch the answer
+        result = chain({"query": question})
+        print(result)
+
+        # some formatting for the frontend
+        result['answer'] = result['result']
+        result['answer'] = result['answer'].replace("\\n", "<br>")
+        result['answer'] = result['answer'].replace("SOURCES:", "")
+        # mock result
+        # result = {
+        #     "answer": "The answer is 42",
+        #     "sources": ["https://en.wikipedia.org/wiki/42_(number)", "https://en.wikipedia.org/wiki/42_(number)"]
+        # }
+        return result
+    except Exception as e:
+        print(str(e))
+        return bad_request(500,str(e))
 
 
 @app.route("/api/docs_check", methods=["POST"])

diff --git a/application/error.py b/application/error.py
@@ -0,0 +1,13 @@
+from flask import jsonify
+from werkzeug.http import HTTP_STATUS_CODES
+
+def response_error(code_status,message=None):
+    payload = {'error':HTTP_STATUS_CODES.get(code_status,"something went wrong")}
+    if message:
+        payload['message'] = message
+    response = jsonify(payload)
+    response.status_code = code_status
+    return response
+
+def bad_request(status_code=400,message=''):
+    return response_error(code_status=status_code,message=message)
diff --git a/application/requirements.txt b/application/requirements.txt
@@ -64,6 +64,7 @@ transformers==4.26.0
 typer==0.7.0
 typing-inspect==0.8.0
 typing_extensions==4.4.0
+unstructured==0.4.8
 urllib3==1.26.14
 Werkzeug==2.2.3
 XlsxWriter==3.0.8

diff --git a/application/static/src/chat.js b/application/static/src/chat.js
@@ -1,55 +1,73 @@
-var el = document.getElementById('message-form');
-if (el) {
-    el.addEventListener("submit", function (event) {
-        console.log("submitting")
-        event.preventDefault()
-        var message = document.getElementById("message-input").value;
-        msg_html = '<div class="bg-blue-500 text-white p-2 rounded-lg mb-2 self-end"><p class="text-sm">'
-        msg_html += message
-        msg_html += '</p></div>'
-        document.getElementById("messages").innerHTML += msg_html;
-        let chatWindow = document.getElementById("messages-container");
-        chatWindow.scrollTop = chatWindow.scrollHeight;
-        document.getElementById("message-input").value = "";
-        document.getElementById("button-submit").innerHTML = '<i class="fa fa-circle-o-notch fa-spin"></i> Thinking...';
-        document.getElementById("button-submit").disabled = true;
-        if (localStorage.getItem('activeDocs') == null) {
-            localStorage.setItem('activeDocs', 'default')
-        }
-
-        fetch('/api/answer', {
-            method: 'POST',
-            headers: {
-                'Content-Type': 'application/json',
-            },
-
-            body: JSON.stringify({question: message,
-                api_key: localStorage.getItem('apiKey'),
-                embeddings_key: localStorage.getItem('apiKey'),
-                history: localStorage.getItem('chatHistory'),
-                active_docs: localStorage.getItem('activeDocs')}),
+var form = document.getElementById('message-form');
+var errorModal = document.getElementById('error-alert')
+document.getElementById('close').addEventListener('click',()=>{
+    errorModal.classList.toggle('hidden')
+})
+
+
+function submitForm(event){
+    event.preventDefault()
+    var message = document.getElementById("message-input").value;
+    console.log(message.length)
+    if(message.length === 0){
+        return
+    }
+    msg_html = '<div class="bg-blue-500 text-white p-2 rounded-lg mb-2 self-end"><p class="text-sm">'
+    msg_html += message
+    msg_html += '</p></div>'
+    document.getElementById("messages").innerHTML += msg_html;
+    let chatWindow = document.getElementById("messages-container");
+    chatWindow.scrollTop = chatWindow.scrollHeight;
+    document.getElementById("message-input").value = "";
+    document.getElementById("button-submit").innerHTML = '<i class="fa fa-circle-o-notch fa-spin"></i> Thinking...';
+    document.getElementById("button-submit").disabled = true;
+    if (localStorage.getItem('activeDocs') == null) {
+        localStorage.setItem('activeDocs', 'default')
+    }
+
+
+    fetch('/api/answer', {
+        method: 'POST',
+        headers: {
+            'Content-Type': 'application/json',
+        },
+
+        body: JSON.stringify({question: message,
+            api_key: localStorage.getItem('apiKey'),
+            embeddings_key: localStorage.getItem('apiKey'),
+            history: localStorage.getItem('chatHistory'),
+            active_docs: localStorage.getItem('activeDocs')}),
+    }).then((response)=> response.json())
+    .then(data => {
+            console.log('Success:', data);
+            if(data.error){
+            document.getElementById('text-error').textContent = `Error : ${JSON.stringify(data.message)}`
+            errorModal.classList.toggle('hidden')
+            }
+            if(data.answer){
+            msg_html = '<div class="bg-indigo-500 text-white p-2 rounded-lg mb-2 self-start"><code class="text-sm">'
+            msg_html += data.answer
+            msg_html += '</code></div>'
+            document.getElementById("messages").innerHTML += msg_html;
+            let chatWindow = document.getElementById("messages-container");
+            chatWindow.scrollTop = chatWindow.scrollHeight;
+            }
+            document.getElementById("button-submit").innerHTML = 'Send';
+            document.getElementById("button-submit").disabled = false;
+            let chatHistory = [message, data.answer || ''];
+            localStorage.setItem('chatHistory', JSON.stringify(chatHistory));
+
+
+
+
         })
-            .then(response => response.json())
-            .then(data => {
-                console.log('Success:', data);
-                msg_html = '<div class="bg-indigo-500 text-white p-2 rounded-lg mb-2 self-start"><code class="text-sm">'
-                msg_html += data.answer
-                msg_html += '</code></div>'
-                document.getElementById("messages").innerHTML += msg_html;
-                let chatWindow = document.getElementById("messages-container");
-                chatWindow.scrollTop = chatWindow.scrollHeight;
-                document.getElementById("button-submit").innerHTML = 'Send';
-                document.getElementById("button-submit").disabled = false;
-                let chatHistory = [message, data.answer];
-                localStorage.setItem('chatHistory', JSON.stringify(chatHistory));
-            })
-            .catch((error) => {
-                console.error('Error:', error);
-                console.log(error);
-                document.getElementById("button-submit").innerHTML = 'Send';
-                document.getElementById("button-submit").disabled = false;
-            });
-
-
-    });
-}
+        .catch((error) => {
+            console.error('Error:', error);
+            // console.log(error);
+            // document.getElementById("button-submit").innerHTML = 'Send';
+            // document.getElementById("button-submit").disabled = false;
+
+        });
+}
+
+window.addEventListener('submit',submitForm)
diff --git a/application/templates/index.html b/application/templates/index.html
@@ -16,7 +16,7 @@
 
 
   <body>
-
+    
 
 
     <header class="bg-white p-2 flex justify-between items-center">
@@ -28,6 +28,17 @@ <h1 class="text-lg font-medium">DocsGPT 🦖 Preview</h1>
         {% endif %}
             </div>
     </header>
+
+
+ <!-- Alert Info  -->
+ <div class="border flex justify-between 
+  w-auto px-4 py-3 rounded relative 
+  hidden" style="background-color: rgb(197, 51, 51);color: white;" id="error-alert" role="alert">
+  <span class="block sm:inline" id="text-error"></span>
+  <strong class="text-xl align-center alert-del" style="cursor: pointer;" id="close">&times;</strong>
+</div>
+
+
     <div class="lg:flex ml-2 mr-2">
       <div class="lg:w-3/4 min-h-screen max-h-screen">
         <div class="w-full flex flex-col h-5/6">
@@ -59,6 +70,8 @@ <h1 class="text-lg font-medium">DocsGPT 🦖 Preview</h1>
         </form>
         </div>
 
+
+
 
     </div>
         </div>
@@ -77,11 +90,16 @@ <h1 class="text-lg font-medium">DocsGPT 🦖 Preview</h1>
     </div>
 
   <div class="flex items-center justify-center h-full">
-
+
+
 </div>
 
+
+
+
 {% if not api_key_set %}
-<div class="fixed z-10 overflow-y-auto top-0 w-full left-0 hidden" id="modal">
+
+<div class="fixed z-10 overflow-y-auto top-0 w-full left-0 show" id="modal">
   <div class="flex items-center justify-center min-height-100vh pt-4 px-4 pb-20 text-center sm:block sm:p-0">
     <div class="fixed inset-0 transition-opacity">
       <div class="absolute inset-0 bg-gray-900 opacity-75" />
@@ -105,6 +123,9 @@ <h2>Before you can start using DocsGPT we need you to provide an API key for llm
   </div>
 </div>
 {% endif %}
+
+
+
       <script>
           function docsIndex() {
                 // loads latest index from https://raw.githubusercontent.com/arc53/DocsHUB/main/combined.json