Merge pull request #362 from arc53/feature/startup-script-cpu-inference

script + cpu optimisations
arc53 · Oct 1, 2023 · c73dd77 · c73dd77 · vercel · Oct 1, 2023
2 parents 891e5fe + cd9b03b
commit c73dd77
Show file tree

Hide file tree

Showing 8 changed files with 25 additions and 46 deletions.
diff --git a/application/api/answer/routes.py b/application/api/answer/routes.py
@@ -118,6 +118,8 @@ def complete_stream(question, docsearch, chat_history, api_key, conversation_id)
 
 
     docs = docsearch.search(question, k=2)
+    if settings.LLM_NAME == "llama.cpp":
+        docs = [docs[0]]
     # join all page_content together with a newline
     docs_together = "\n".join([doc.page_content for doc in docs])
     p_chat_combine = chat_combine_template.replace("{summaries}", docs_together)

diff --git a/application/api/user/routes.py b/application/api/user/routes.py
@@ -6,7 +6,6 @@
 from bson.objectid import ObjectId
 from werkzeug.utils import secure_filename
 import http.client
-from celery.result import AsyncResult
 
 from application.api.user.tasks import ingest
 
@@ -142,7 +141,8 @@ def upload_file():
 def task_status():
     """Get celery job status."""
     task_id = request.args.get("task_id")
-    task = AsyncResult(task_id)
+    from application.celery import celery
+    task = celery.AsyncResult(task_id)
     task_meta = task.info
     return {"status": task.status, "result": task_meta}
 

diff --git a/application/celery.py b/application/celery.py
@@ -2,7 +2,7 @@
 from application.core.settings import settings
 
 def make_celery(app_name=__name__):
-    celery = Celery(app_name, broker=settings.CELERY_BROKER_URL)
+    celery = Celery(app_name, broker=settings.CELERY_BROKER_URL, backend=settings.CELERY_RESULT_BACKEND)
     celery.conf.update(settings)
     return celery
 

diff --git a/application/core/settings.py b/application/core/settings.py
@@ -2,7 +2,7 @@
 import os
 
 from pydantic import BaseSettings
-current_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 
 class Settings(BaseSettings):

diff --git a/application/llm/llama_cpp.py b/application/llm/llama_cpp.py
@@ -1,15 +1,16 @@
 from application.llm.base import BaseLLM
+from application.core.settings import settings
 
 class LlamaCpp(BaseLLM):
 
-    def __init__(self, api_key, llm_name='/Users/pavel/Desktop/docsgpt/application/models/orca-test.bin'):
+    def __init__(self, api_key, llm_name=settings.MODEL_PATH, **kwargs):
         global llama
         try:
             from llama_cpp import Llama
         except ImportError:
             raise ImportError("Please install llama_cpp using pip install llama-cpp-python")
 
-        llama = Llama(model_path=llm_name)
+        llama = Llama(model_path=llm_name, n_ctx=2048)
 
     def gen(self, model, engine, messages, stream=False, **kwargs):
         context = messages[0]['content']

diff --git a/docker-compose-local.yaml b/docker-compose-local.yaml
@@ -6,42 +6,10 @@ services:
     environment:
       - VITE_API_HOST=http://localhost:7091
       - VITE_API_STREAMING=$VITE_API_STREAMING
+      - VITE_EMBEDDINGS_NAME=$EMBEDDINGS_NAME
     ports:
       - "5173:5173"
 
-  # backend:
-  #   build: ./application
-  #   environment:
-  #     - LLM_NAME=$LLM_NAME
-  #     - EMBEDDINGS_NAME=$EMBEDDINGS_NAME
-  #     - CELERY_BROKER_URL=redis://redis:6379/0
-  #     - CELERY_RESULT_BACKEND=redis://redis:6379/1
-  #     - MONGO_URI=mongodb://mongo:27017/docsgpt
-  #   ports:
-  #     - "7091:7091"
-  #   volumes:
-  #     - ./application/indexes:/app/application/indexes
-  #     - ./application/inputs:/app/application/inputs
-  #     - ./application/vectors:/app/application/vectors
-  #     - ./application/models:/app/application/models
-  #   depends_on:
-  #     - redis
-  #     - mongo
-
-  worker:
-    build: ./application
-    command: celery -A application.app.celery worker -l INFO
-    environment:
-      - LLM_NAME=$LLM_NAME
-      - EMBEDDINGS_NAME=$EMBEDDINGS_NAME
-      - CELERY_BROKER_URL=redis://redis:6379/0
-      - CELERY_RESULT_BACKEND=redis://redis:6379/1
-      - MONGO_URI=mongodb://mongo:27017/docsgpt
-      - API_URL=http://backend:7091
-    depends_on:
-      - redis
-      - mongo
-
   redis:
     image: redis:6-alpine
     ports:

diff --git a/frontend/src/Navigation.tsx b/frontend/src/Navigation.tsx
@@ -59,6 +59,7 @@ export default function Navigation({
 
   const navRef = useRef(null);
   const apiHost = import.meta.env.VITE_API_HOST || 'https://docsapi.arc53.com';
+  const embeddingsName = import.meta.env.VITE_EMBEDDINGS_NAME || 'openai_text-embedding-ada-002';
 
   useEffect(() => {
     if (!conversations) {
@@ -253,7 +254,7 @@ export default function Navigation({
               <div className="absolute top-12 left-0 right-6 ml-2 mr-4 max-h-52 overflow-y-scroll bg-white shadow-lg">
                 {docs ? (
                   docs.map((doc, index) => {
-                    if (doc.model === 'openai_text-embedding-ada-002') {
+                    if (doc.model === embeddingsName) {
                       return (
                         <div
                           key={index}

diff --git a/setup.sh b/setup.sh
@@ -23,24 +23,31 @@ download_locally() {
     # check if docsgpt-7b-f16.gguf does not exist
     if [ ! -f models/docsgpt-7b-f16.gguf ]; then
         echo "Downloading the model..."
-        wget -P models https://docsgpt.s3.eu-west-1.amazonaws.com/models/docsgpt-7b-f16.gguf
+        wget -P models https://d3dg1063dc54p9.cloudfront.net/models/docsgpt-7b-f16.gguf
         echo "Model downloaded to models directory."
     else
         echo "Model already exists."
     fi
 
     docker-compose -f docker-compose-local.yaml build && docker-compose -f docker-compose-local.yaml up -d
-    python -m venv venv
-    source venv/bin/activate
-    pip install -r application/requirements.txt
-    pip install llama-cpp-python
+    #python -m venv venv
+    #source venv/bin/activate
+    #pip install -r application/requirements.txt
+    #pip install llama-cpp-python
+    #pip install sentence-transformers
+    export LLM_NAME=llama.cpp
+    export EMBEDDINGS_NAME=huggingface_sentence-transformers/all-mpnet-base-v2
     export FLASK_APP=application/app.py
     export FLASK_DEBUG=true
+    export CELERY_BROKER_URL=redis://localhost:6379/0
+    export CELERY_RESULT_BACKEND=redis://localhost:6379/1
     echo "The application is now running on http://localhost:5173"
     echo "You can stop the application by running the following command:"
     echo "Ctrl + C and then"
+    echo "Then pkill -f 'flask run' and then"
     echo "docker-compose down"
-    flask run --host=0.0.0.0 --port=7091
+    flask run --host=0.0.0.0 --port=7091 &
+    celery -A application.app.celery worker -l INFO
 }
 
 # Function to handle the choice to use the OpenAI API