Merge pull request #825 from arc53/feat/mongodb

Public LLM
arc53 · Jan 9, 2024 · 667b66b · 667b66b · vercel · Jan 9, 2024
2 parents 103118d + c0f7b34
commit 667b66b
Show file tree

Hide file tree

Showing 13 changed files with 141 additions and 37 deletions.
diff --git a/.env-template b/.env-template
@@ -1,4 +1,5 @@
 API_KEY=<LLM api key (for example, open ai key)>
+LLM_NAME=docsgpt
 VITE_API_STREAMING=true
 
 #For Azure (you can delete it if you don't use Azure)

diff --git a/README.md b/README.md
@@ -86,17 +86,18 @@ On Mac OS or Linux, write:
 
 `./setup.sh`
 
-It will install all the dependencies and allow you to download the local model or use OpenAI.
+It will install all the dependencies and allow you to download the local model, use OpenAI or use our LLM API.
 
 Otherwise, refer to this Guide:
 
 1. Download and open this repository with `git clone https://github.com/arc53/DocsGPT.git`
-2. Create a `.env` file in your root directory and set the env variable `API_KEY` with your [OpenAI API key](https://platform.openai.com/account/api-keys) and `VITE_API_STREAMING` to true or false, depending on whether you want streaming answers or not.
+2. Create a `.env` file in your root directory and set the env variables and `VITE_API_STREAMING` to true or false, depending on whether you want streaming answers or not.
    It should look like this inside:
 
    ```
-   API_KEY=Yourkey
+   LLM_NAME=[docsgpt or openai or others] 
    VITE_API_STREAMING=true
+   API_KEY=[if LLM_NAME is openai]
    ```
 
    See optional environment variables in the [/.env-template](https://github.com/arc53/DocsGPT/blob/main/.env-template) and [/application/.env_sample](https://github.com/arc53/DocsGPT/blob/main/application/.env_sample) files.
@@ -126,7 +127,7 @@ docker compose -f docker-compose-dev.yaml up -d
 > Make sure you have Python 3.10 or 3.11 installed.
 
 1. Export required environment variables or prepare a `.env` file in the `/application` folder:
-   - Copy [.env_sample](https://github.com/arc53/DocsGPT/blob/main/application/.env_sample) and create `.env` with your OpenAI API token for the `API_KEY` and `EMBEDDINGS_KEY` fields.
+   - Copy [.env_sample](https://github.com/arc53/DocsGPT/blob/main/application/.env_sample) and create `.env`.
 
 (check out [`application/core/settings.py`](application/core/settings.py) if you want to see more config options.)
 
@@ -147,14 +148,22 @@ python -m venv venv
  venv/Scripts/activate
 ```
 
-3. Change to the `application/` subdir by the command `cd application/` and install dependencies for the backend:
+3. Download embedding model and save it in the `model/` folder:
+You can use the script below, or download it manually from [here](https://d3dg1063dc54p9.cloudfront.net/models/embeddings/mpnet-base-v2.zip), unzip it and save it in the `model/` folder.
 
 ```commandline
-pip install -r application/requirements.txt
+wget https://d3dg1063dc54p9.cloudfront.net/models/embeddings/mpnet-base-v2.zip
+unzip mpnet-base-v2.zip -d model
+rm mpnet-base-v2.zip
+
+4. Change to the `application/` subdir by the command `cd application/` and install dependencies for the backend:
+
+```commandline
+pip install -r requirements.txt
 ```
 
-4. Run the app using `flask --app application/app.py run --host=0.0.0.0 --port=7091`.
-5. Start worker with `celery -A application.app.celery worker -l INFO`.
+5. Run the app using `flask --app application/app.py run --host=0.0.0.0 --port=7091`.
+6. Start worker with `celery -A application.app.celery worker -l INFO`.
 
 ### Start Frontend
 

diff --git a/application/Dockerfile b/application/Dockerfile
@@ -7,13 +7,19 @@ ENV PATH="/root/.cargo/bin:${PATH}"
 RUN pip install --upgrade pip && pip install tiktoken==0.5.2
 COPY requirements.txt .
 RUN pip install -r requirements.txt
+RUN apt-get install -y wget unzip
+RUN wget https://d3dg1063dc54p9.cloudfront.net/models/embeddings/mpnet-base-v2.zip
+RUN unzip mpnet-base-v2.zip -d model
+RUN rm mpnet-base-v2.zip
 
 FROM python:3.11-slim-bullseye
 
 # Copy pre-built packages and binaries from builder stage
 COPY --from=builder /usr/local/ /usr/local/
 
 WORKDIR /app
+COPY --from=builder /model /app/model
+
 COPY . /app/application
 ENV FLASK_APP=app.py
 ENV FLASK_DEBUG=true

diff --git a/application/core/settings.py b/application/core/settings.py
@@ -7,8 +7,8 @@
 
 
 class Settings(BaseSettings):
-    LLM_NAME: str = "openai"
-    EMBEDDINGS_NAME: str = "openai_text-embedding-ada-002"
+    LLM_NAME: str = "docsgpt"
+    EMBEDDINGS_NAME: str = "huggingface_sentence-transformers/all-mpnet-base-v2"
     CELERY_BROKER_URL: str = "redis://localhost:6379/0"
     CELERY_RESULT_BACKEND: str = "redis://localhost:6379/1"
     MONGO_URI: str = "mongodb://localhost:27017/docsgpt"

diff --git a/application/index.faiss b/application/index.faiss
diff --git a/application/index.pkl b/application/index.pkl
diff --git a/application/llm/docsgpt_provider.py b/application/llm/docsgpt_provider.py
@@ -0,0 +1,49 @@
+from application.llm.base import BaseLLM
+import json
+import requests
+
+class DocsGPTAPILLM(BaseLLM):
+
+    def __init__(self, *args, **kwargs):
+        self.endpoint =  "https://llm.docsgpt.co.uk"
+
+
+    def gen(self, model, engine, messages, stream=False, **kwargs):
+        context = messages[0]['content']
+        user_question = messages[-1]['content']
+        prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
+
+        response = requests.post(
+            f"{self.endpoint}/answer",
+            json={
+                "prompt": prompt,
+                "max_new_tokens": 30
+            }
+        )
+        response_clean = response.json()['a'].split("###")[0]
+
+        return response_clean
+
+    def gen_stream(self, model, engine, messages, stream=True, **kwargs):
+        context = messages[0]['content']
+        user_question = messages[-1]['content']
+        prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
+
+        # send prompt to endpoint /stream
+        response = requests.post(
+            f"{self.endpoint}/stream",
+            json={
+                "prompt": prompt,
+                "max_new_tokens": 256
+            },
+            stream=True
+        )
+
+        for line in response.iter_lines():
+            if line:
+                #data = json.loads(line)
+                data_str = line.decode('utf-8')
+                if data_str.startswith("data: "):
+                    data = json.loads(data_str[6:])
+                    yield data['a']
+
diff --git a/application/llm/llm_creator.py b/application/llm/llm_creator.py
@@ -3,6 +3,7 @@
 from application.llm.huggingface import HuggingFaceLLM
 from application.llm.llama_cpp import LlamaCpp
 from application.llm.anthropic import AnthropicLLM
+from application.llm.docsgpt_provider import DocsGPTAPILLM
 
 
 
@@ -13,7 +14,8 @@ class LLMCreator:
         'sagemaker': SagemakerAPILLM,
         'huggingface': HuggingFaceLLM,
         'llama.cpp': LlamaCpp,
-        'anthropic': AnthropicLLM
+        'anthropic': AnthropicLLM,
+        'docsgpt': DocsGPTAPILLM
     }
 
     @classmethod

diff --git a/application/vectorstore/base.py b/application/vectorstore/base.py
@@ -44,6 +44,11 @@ def _get_embeddings(self, embeddings_name, embeddings_key=None):
             embedding_instance = embeddings_factory[embeddings_name](
                 cohere_api_key=embeddings_key
             )
+        elif embeddings_name == "huggingface_sentence-transformers/all-mpnet-base-v2":
+            embedding_instance = embeddings_factory[embeddings_name](
+                model_name="./model/all-mpnet-base-v2",
+                model_kwargs={"device": "cpu"},
+            )
         else:
             embedding_instance = embeddings_factory[embeddings_name]()
 

diff --git a/docs/pages/Deploying/Quickstart.md b/docs/pages/Deploying/Quickstart.md
@@ -8,15 +8,15 @@ Just run the following command:
 ./setup.sh
 ```
 
-This command will install all the necessary dependencies and provide you with an option to download the local model or use OpenAI.
+This command will install all the necessary dependencies and provide you with an option to use our LLM API, download the local model or use OpenAI.
 
 If you prefer to follow manual steps, refer to this guide:
 
 1. Open and download this repository with 
    ```bash
    git clone https://github.com/arc53/DocsGPT.git
    ```
-2. Create a `.env` file in your root directory and set your `API_KEY` with your [OpenAI API key](https://platform.openai.com/account/api-keys).
+2. Create a `.env` file in your root directory and set your `API_KEY` with your [OpenAI API key](https://platform.openai.com/account/api-keys). (optional in case you want to use OpenAI)
 3. Run the following commands:
    ```bash
    docker-compose build && docker-compose up

diff --git a/frontend/src/Navigation.tsx b/frontend/src/Navigation.tsx
@@ -64,7 +64,8 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
   const navRef = useRef(null);
   const apiHost = import.meta.env.VITE_API_HOST || 'https://docsapi.arc53.com';
   const embeddingsName =
-    import.meta.env.VITE_EMBEDDINGS_NAME || 'openai_text-embedding-ada-002';
+    import.meta.env.VITE_EMBEDDINGS_NAME ||
+    'huggingface_sentence-transformers/all-mpnet-base-v2';
 
   const navigate = useNavigate();
 
@@ -181,15 +182,17 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
           <img
             src={Expand}
             alt="menu toggle"
-            className={`${!navOpen ? 'rotate-180' : 'rotate-0'
-              } m-auto transition-all duration-200`}
+            className={`${
+              !navOpen ? 'rotate-180' : 'rotate-0'
+            } m-auto transition-all duration-200`}
           />
         </button>
       )}
       <div
         ref={navRef}
-        className={`${!navOpen && '-ml-96 md:-ml-[18rem]'
-          } duration-20 fixed top-0 z-20 flex h-full w-72 flex-col border-r-2 bg-white transition-all`}
+        className={`${
+          !navOpen && '-ml-96 md:-ml-[18rem]'
+        } duration-20 fixed top-0 z-20 flex h-full w-72 flex-col border-r-2 bg-white transition-all`}
       >
         <div
           className={'visible mt-2 flex h-[6vh] w-full justify-between md:h-12'}
@@ -207,8 +210,9 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
             <img
               src={Expand}
               alt="menu toggle"
-              className={`${!navOpen ? 'rotate-180' : 'rotate-0'
-                } m-auto transition-all duration-200`}
+              className={`${
+                !navOpen ? 'rotate-180' : 'rotate-0'
+              } m-auto transition-all duration-200`}
             />
           </button>
         </div>
@@ -223,8 +227,9 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
             );
           }}
           className={({ isActive }) =>
-            `${isActive ? 'bg-gray-3000' : ''
-            } group mx-4 mt-4 sticky flex cursor-pointer gap-2.5 rounded-3xl border border-silver p-3 hover:border-rainy-gray hover:bg-gray-3000`
+            `${
+              isActive ? 'bg-gray-3000' : ''
+            } group sticky mx-4 mt-4 flex cursor-pointer gap-2.5 rounded-3xl border border-silver p-3 hover:border-rainy-gray hover:bg-gray-3000`
           }
         >
           <img
@@ -237,12 +242,10 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
           </p>
         </NavLink>
         <div className="mb-auto h-[56vh] overflow-x-hidden  overflow-y-scroll">
-
           {conversations && (
             <div>
               <p className="ml-6 mt-3 text-sm font-semibold">Chats</p>
               <div className="conversations-container">
-
                 {conversations?.map((conversation) => (
                   <ConversationTile
                     key={conversation.id}
@@ -259,7 +262,7 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
           )}
         </div>
 
-        <div className='h-auto flex-col flex justify-end'>
+        <div className="flex h-auto flex-col justify-end">
           <div className="flex flex-col-reverse border-b-2">
             <div className="relative my-4 flex gap-2 px-2">
               <div
@@ -274,8 +277,9 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
                 <img
                   src={Arrow2}
                   alt="arrow"
-                  className={`${!isDocsListOpen ? 'rotate-0' : 'rotate-180'
-                    } ml-auto mr-3 w-3 transition-all`}
+                  className={`${
+                    !isDocsListOpen ? 'rotate-0' : 'rotate-180'
+                  } ml-auto mr-3 w-3 transition-all`}
                 />
               </div>
               <img
@@ -330,11 +334,16 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
             <NavLink
               to="/settings"
               className={({ isActive }) =>
-                `my-auto mx-4 flex h-9 cursor-pointer gap-4 rounded-3xl hover:bg-gray-100 ${isActive ? 'bg-gray-3000' : ''
+                `my-auto mx-4 flex h-9 cursor-pointer gap-4 rounded-3xl hover:bg-gray-100 ${
+                  isActive ? 'bg-gray-3000' : ''
                 }`
               }
             >
-              <img src={SettingGear} alt="settings" className="ml-2 w-5 opacity-60" />
+              <img
+                src={SettingGear}
+                alt="settings"
+                className="ml-2 w-5 opacity-60"
+              />
               <p className="my-auto text-sm text-eerie-black">Settings</p>
             </NavLink>
           </div>
@@ -343,7 +352,8 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
             <NavLink
               to="/about"
               className={({ isActive }) =>
-                `my-auto mx-4 flex h-9 cursor-pointer gap-4 rounded-3xl hover:bg-gray-100 ${isActive ? 'bg-gray-3000' : ''
+                `my-auto mx-4 flex h-9 cursor-pointer gap-4 rounded-3xl hover:bg-gray-100 ${
+                  isActive ? 'bg-gray-3000' : ''
                 }`
               }
             >
@@ -357,7 +367,11 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
               rel="noreferrer"
               className="my-auto mx-4 flex h-9 cursor-pointer gap-4 rounded-3xl hover:bg-gray-100"
             >
-              <img src={Documentation} alt="documentation" className="ml-2 w-5" />
+              <img
+                src={Documentation}
+                alt="documentation"
+                className="ml-2 w-5"
+              />
               <p className="my-auto text-sm text-eerie-black">Documentation</p>
             </a>
             <a
@@ -379,7 +393,9 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
               className="my-auto mx-4 flex h-9 cursor-pointer gap-4 rounded-3xl hover:bg-gray-100"
             >
               <img src={Github} alt="github-link" className="ml-2 w-5" />
-              <p className="my-auto text-sm text-eerie-black">Visit our Github</p>
+              <p className="my-auto text-sm text-eerie-black">
+                Visit our Github
+              </p>
             </a>
           </div>
         </div>

diff --git a/setup.sh b/setup.sh
@@ -3,9 +3,10 @@
 # Function to prompt the user for their choice
 prompt_user() {
     echo "Do you want to:"
-    echo "1. Download the language model locally (12GB)"
-    echo "2. Use the OpenAI API"
-    read -p "Enter your choice (1/2): " choice
+    echo "1. Use DocsGPT public API (simple and free)"
+    echo "2. Download the language model locally (12GB)"
+    echo "3. Use the OpenAI API (requires an API key)"
+    read -p "Enter your choice (1, 2 or 3): " choice
 }
 
 # Function to handle the choice to download the model locally
@@ -67,15 +68,30 @@ use_openai() {
     echo "docker-compose down"
 }
 
+use_docsgpt() {
+    echo "LLM_NAME=docsgpt" > .env
+    echo "VITE_API_STREAMING=true" >> .env
+    echo "The .env file has been created with API_KEY set to your provided key."
+
+    docker-compose build && docker-compose up -d
+
+    echo "The application will run on http://localhost:5173"
+    echo "You can stop the application by running the following command:"
+    echo "docker-compose down"
+}
+
 # Prompt the user for their choice
 prompt_user
 
 # Handle the user's choice
 case $choice in
     1)
-        download_locally
+        use_docsgpt
         ;;
     2)
+        download_locally
+        ;;
+    3)
         use_openai
         ;;
     *)

diff --git a/tests/test_vector_store.py b/tests/test_vector_store.py
@@ -14,6 +14,6 @@ def test_init_local_faiss_store_huggingface():
     index.faiss file in the application/ folder results in a
     dimension mismatch error.
     """
-    settings.EMBEDDINGS_NAME = "huggingface_sentence-transformers/all-mpnet-base-v2"
+    settings.EMBEDDINGS_NAME = "openai_text-embedding-ada-002"
     with pytest.raises(ValueError):
         FaissStore("application/", "", None)