Resolve

h2oai · Sep 15, 2023 · 17a0291 · 17a0291
2 parents c7fad85 + 80dd0f7
commit 17a0291
Show file tree

Hide file tree

Showing 29 changed files with 1,829 additions and 289 deletions.
diff --git a/Makefile b/Makefile
@@ -1,8 +1,7 @@
 all: clean dist
 
 PACKAGE_VERSION       := `cat version.txt | tr -d '\n'`
-BUILD_TAG_FILES       := docker_build_script_ubuntu.sh requirements.txt Dockerfile `ls reqs_optional/*.txt | sort`
-BUILD_TAG             := $(shell md5sum $(BUILD_TAG_FILES) 2> /dev/null | sort | md5sum | cut -d' ' -f1)
+BUILD_TAG             := $(shell git describe --always --dirty)
 DOCKER_TEST_IMAGE     := harbor.h2o.ai/h2ogpt/test-image:$(BUILD_TAG)
 PYTHON_BINARY         ?= `which python`
 DEFAULT_MARKERS       ?= "not need_tokens and not need_gpu"
@@ -51,6 +50,7 @@ docker_build_deps:
 	@sed -i '/# Install prebuilt dependencies/,$$d' docker_build_script_ubuntu.sh
 	@docker build -t h2ogpt-deps-builder -f Dockerfile .
 	@mv docker_build_script_ubuntu.sh.back docker_build_script_ubuntu.sh
+	@mkdir -p prebuilt_deps
 	@docker run \
 		--rm \
 		-it \
@@ -69,12 +69,9 @@ docker_build_deps:
 		--rm \
 		-it \
 		--entrypoint bash \
-		--runtime nvidia \
 		-v `pwd`:/dot \
-		-v /etc/passwd:/etc/passwd:ro \
-		-v /etc/group:/etc/group:ro \
-		-u `id -u`:`id -g` \
-		h2ogpt-deps-builder -c " \
+		quay.io/pypa/manylinux2014_x86_64 -c " \
+			ln -s /usr/local/bin/python3.10 /usr/local/bin/python3 && cd /tmp && \
 			git clone https://github.com/h2oai/duckdb.git && \
 			cd duckdb && \
 			git checkout dcd8c1ffc53dd020623630efb99ba6a3a4cbc5ad && \

diff --git a/README.md b/README.md
@@ -43,6 +43,7 @@ wget https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/resolve/main/llama-2-7
 python generate.py --base_model='llama' --prompt_type=llama2
 ```
 then go to your browser by visiting [http://127.0.0.1:7860](http://127.0.0.1:7860) or [http://localhost:7860](http://localhost:7860).
+If encounter issues with llama-cpp-python or other packages that try to compile and fail, try binary wheels for your platform as linked in the detailed instructions below.
 
 ---
 

diff --git a/client/h2ogpt_client/_core.py b/client/h2ogpt_client/_core.py
@@ -125,6 +125,7 @@ def create(
         params["pdf_loaders"] = None
         params["url_loaders"] = None
         params["jq_schema"] = None
+        params["visible_models"] = None
         return TextCompletion(self._client, params)
 
 
@@ -248,7 +249,8 @@ def create(
         params["pdf_loaders"] = None
         params["url_loaders"] = None
         params["jq_schema"] = None
-        params["chatbot"] = []  # chat history
+        params["visible_models"] = None
+        params["chatbot"] = []  # chat history (FIXME: Only works if 1 model?)
         return ChatCompletion(self._client, params)
 
 

diff --git a/client/h2ogpt_client/_utils.py b/client/h2ogpt_client/_utils.py
@@ -40,6 +40,7 @@
     pdf_loaders="pdf_loaders",
     url_loaders="url_loaders",
     jq_schema="jq_schema",
+    visible_models="visible_models",
 )
 
 

diff --git a/client/poetry.lock b/client/poetry.lock
diff --git a/docs/FAQ.md b/docs/FAQ.md
@@ -1,5 +1,28 @@
 ## Frequently asked questions
 
+### Migration from Chroma < 0.4 to > 0.4
+
+#### Option 1: Use old Chroma for old DBs
+
+Do nothing as user.  h2oGPT will by default not migrate for old databases.  This is the default way handled internally by requirements added in `requirements_optional_langchain.txt` by adding special wheels for old versions of chromadb and hnswlib, handling migration better than chromadb itself.
+
+#### Option 2: Automatically Migrate
+
+h2oGPT by default does not migrate automatically with `--auto_migrate_db=False` for `generate.py`.  One can set this to `True` for auto-migration, which may time some time for larger databases.  This will occur on-demand when accessing a database.  This takes about 0.03s per chunk.
+
+#### Option 3: Manually Migrate
+
+One can set that to False and manually migrate databases by doing the following.
+
+* Install and run migration tool
+```
+pip install chroma-migrate
+chroma-migrate
+```
+* Choose DuckDB
+* Choose "Files I can use ..."
+* Choose your collection path, e.g. `db_dir_UserData` for collection name `UserData`
+
 ### Adding Models
 
 One can choose any Hugging Face model or quantized GGML model file in h2oGPT.

diff --git a/docs/README_LINUX.md b/docs/README_LINUX.md
@@ -140,12 +140,11 @@ These instructions are for Ubuntu x86_64 (other linux would be similar with diff
   * If one sees `/usr/bin/nvcc` mentioned in errors, that file needs to be removed as would likely conflict with version installed for conda.  
   * Note that once `llama-cpp-python` is compiled to support CUDA, it no longer works for CPU mode, so one would have to reinstall it without the above options to recovers CPU mode or have a separate h2oGPT env for CPU mode.
 
-* Control Core Count:
+* Control Core Count for chroma < 0.4 using chromamigdb package:
     * Duckdb used by Chroma < 0.4 uses DuckDB 0.8.1 that has no control over number of threads per database, `import duckdb` leads to all virtual cores as threads and each db consumes another number of threads equal to virtual cores.  To prevent this, one can rebuild duckdb using [this modification](https://github.com/h2oai/duckdb/commit/dcd8c1ffc53dd020623630efb99ba6a3a4cbc5ad) or one can try to use the prebuild wheel for x86_64 built on Ubuntu 20.
         ```bash
-        pip install https://h2o-release.s3.amazonaws.com/h2ogpt/duckdb-0.8.2.dev4025%2Bg9698e9e6a8.d20230907-cp310-cp310-linux_x86_64.whl --no-cache-dir --force-reinstall
+        pip install https://h2o-release.s3.amazonaws.com/h2ogpt/duckdb-0.8.2.dev4025%2Bg9698e9e6a8.d20230907-cp310-cp310-linux_x86_64.whl --no-cache-dir --force-reinstall --no-deps
       ```
-      See [Dockerfile](../Dockerfile) for more details.
 
 ### Compile Install Issues
   * `/usr/local/cuda/include/crt/host_config.h:132:2: error: #error -- unsupported GNU version! gcc versions later than 11 are not supported!`

diff --git a/gradio_utils/css.py b/gradio_utils/css.py
@@ -12,16 +12,30 @@ def get_css(kwargs) -> str:
 
 
 def make_css_base() -> str:
-    css1 = """
-        #col_container {margin-left: auto; margin-right: auto; text-align: left;}
-        """
-    return css1 + """
+    return """
+    #col_container {margin-left: auto; margin-right: auto; text-align: left;}
+
     @import url('https://fonts.googleapis.com/css2?family=Source+Sans+Pro:wght@400;600&display=swap');
     
     body.dark{#warning {background-color: #555555};}
     
     label > span:first-child {
         background-color: var(--primary-200)
+
+    #sidebar {
+        order: 1;
+        
+        @media (max-width: 463px) {
+          order: 2;
+        }
+    }
+    
+    #col-tabs {
+        order: 2;
+        
+        @media (max-width: 463px) {
+          order: 1;
+        }
     }
     
     #small_btn {
@@ -88,13 +102,57 @@ def make_css_base() -> str:
         display: flex;
         justify-content: center;
         border: 1px solid var(--primary-500) !important;
+        
+        @media (max-width: 463px) {
+          width: 56px;
+        }
     }
     
     #attach-button > img {
         margin-right: 0;
     }
     
     #prompt-form > label > textarea {
-        padding-right: 40px;
+        padding-right: 104px;
+        
+        @media (max-width: 463px) {
+          min-height: 94px;
+          padding-right: 70px;
+        }
+    }
+    
+    #header-links {
+        float: left;
+        justify-content: left;
+        height: 80px;
+        width: 195px;
+        margin-top: 0px;
+    }
+    
+    #main-logo {
+        display: flex;
+        justify-content: center;
+        margin-bottom: 30px;
+        margin-right: 330px;
+        
+        @media (max-width: 463px) {
+          justify-content: flex-end;
+          margin-right: 0;
+          margin-bottom: 0;
+        }
+    }
+    
+    #visible-models > label > div.wrap > div.wrap-inner > div.secondary-wrap > div.remove-all {
+        display: none !important;
+    }
+    
+    #visible-models > label > div.wrap > div.wrap-inner > div.token {
+        display: none !important;
+    }
+    
+    #visible-models > label > div.wrap > div.wrap-inner > div.secondary-wrap::before {
+        content: "Select";
+        padding: 0 4px;
+        margin-right: 2px;
     }
     """
diff --git a/gradio_utils/prompt_form.py b/gradio_utils/prompt_form.py
@@ -5,29 +5,41 @@
 
 
 def make_chatbots(output_label0, output_label0_model2, **kwargs):
+    visible_models = kwargs['visible_models']
+    all_models = kwargs['all_models']
+
     text_outputs = []
     chat_kwargs = []
-    for model_state_lock in kwargs['model_states']:
+    for model_state_locki, model_state_lock in enumerate(kwargs['model_states']):
         if os.environ.get('DEBUG_MODEL_LOCK'):
             model_name = model_state_lock["base_model"] + " : " + model_state_lock["inference_server"]
         else:
             model_name = model_state_lock["base_model"]
         output_label = f'h2oGPT [{model_name}]'
         min_width = 250 if kwargs['gradio_size'] in ['small', 'large', 'medium'] else 160
-        chat_kwargs.append(dict(label=output_label, visible=kwargs['model_lock'], elem_classes='chatsmall',
+        chat_kwargs.append(dict(label=output_label, elem_classes='chatsmall',
                                 height=kwargs['height'] or 400, min_width=min_width,
-                                show_copy_button=kwargs['show_copy_button']))
+                                show_copy_button=kwargs['show_copy_button'],
+                                visible=kwargs['model_lock'] and (visible_models is None or
+                                                                  model_state_locki in visible_models or
+                                                                  all_models[model_state_locki] in visible_models
+                                                                  )))
 
+    # base view on initial visible choice
+    if visible_models:
+        len_visible = len(visible_models)
+    else:
+        len_visible = len(kwargs['model_states'])
     if kwargs['model_lock_columns'] == -1:
-        kwargs['model_lock_columns'] = len(kwargs['model_states'])
+        kwargs['model_lock_columns'] = len_visible
     if kwargs['model_lock_columns'] is None:
         kwargs['model_lock_columns'] = 3
 
     ncols = kwargs['model_lock_columns']
     if kwargs['model_states'] == 0:
         nrows = 0
     else:
-        nrows = math.ceil(len(kwargs['model_states']) / kwargs['model_lock_columns'])
+        nrows = math.ceil(len_visible / kwargs['model_lock_columns'])
 
     if kwargs['model_lock_columns'] == 0:
         # not using model_lock
@@ -43,49 +55,49 @@ def make_chatbots(output_label0, output_label0_model2, **kwargs):
     elif nrows == 2:
         with gr.Row():
             for mii, (chat_kwargs1, model_state_lock) in enumerate(zip(chat_kwargs, kwargs['model_states'])):
-                if mii >= len(kwargs['model_states']) / 2:
+                if mii >= len_visible / 2:
                     continue
                 text_outputs.append(gr.Chatbot(**chat_kwargs1))
         with gr.Row():
             for mii, (chat_kwargs1, model_state_lock) in enumerate(zip(chat_kwargs, kwargs['model_states'])):
-                if mii < len(kwargs['model_states']) / 2:
+                if mii < len_visible / 2:
                     continue
                 text_outputs.append(gr.Chatbot(**chat_kwargs1))
     elif nrows == 3:
         with gr.Row():
             for mii, (chat_kwargs1, model_state_lock) in enumerate(zip(chat_kwargs, kwargs['model_states'])):
-                if mii >= 1 * len(kwargs['model_states']) / 3:
+                if mii >= 1 * len_visible / 3:
                     continue
                 text_outputs.append(gr.Chatbot(**chat_kwargs1))
         with gr.Row():
             for mii, (chat_kwargs1, model_state_lock) in enumerate(zip(chat_kwargs, kwargs['model_states'])):
-                if mii < 1 * len(kwargs['model_states']) / 3 or mii >= 2 * len(kwargs['model_states']) / 3:
+                if mii < 1 * len_visible / 3 or mii >= 2 * len_visible / 3:
                     continue
                 text_outputs.append(gr.Chatbot(**chat_kwargs1))
         with gr.Row():
             for mii, (chat_kwargs1, model_state_lock) in enumerate(zip(chat_kwargs, kwargs['model_states'])):
-                if mii < 2 * len(kwargs['model_states']) / 3:
+                if mii < 2 * len_visible / 3:
                     continue
                 text_outputs.append(gr.Chatbot(**chat_kwargs1))
     elif nrows >= 4:
         with gr.Row():
             for mii, (chat_kwargs1, model_state_lock) in enumerate(zip(chat_kwargs, kwargs['model_states'])):
-                if mii >= 1 * len(kwargs['model_states']) / 4:
+                if mii >= 1 * len_visible / 4:
                     continue
                 text_outputs.append(gr.Chatbot(**chat_kwargs1))
         with gr.Row():
             for mii, (chat_kwargs1, model_state_lock) in enumerate(zip(chat_kwargs, kwargs['model_states'])):
-                if mii < 1 * len(kwargs['model_states']) / 4 or mii >= 2 * len(kwargs['model_states']) / 4:
+                if mii < 1 * len_visible / 4 or mii >= 2 * len_visible / 4:
                     continue
                 text_outputs.append(gr.Chatbot(**chat_kwargs1))
         with gr.Row():
             for mii, (chat_kwargs1, model_state_lock) in enumerate(zip(chat_kwargs, kwargs['model_states'])):
-                if mii < 2 * len(kwargs['model_states']) / 4 or mii >= 3 * len(kwargs['model_states']) / 4:
+                if mii < 2 * len_visible / 4 or mii >= 3 * len_visible / 4:
                     continue
                 text_outputs.append(gr.Chatbot(**chat_kwargs1))
         with gr.Row():
             for mii, (chat_kwargs1, model_state_lock) in enumerate(zip(chat_kwargs, kwargs['model_states'])):
-                if mii < 3 * len(kwargs['model_states']) / 4:
+                if mii < 3 * len_visible / 4:
                     continue
                 text_outputs.append(gr.Chatbot(**chat_kwargs1))
 

diff --git a/helm/h2ogpt-chart/Chart.yaml b/helm/h2ogpt-chart/Chart.yaml
@@ -15,10 +15,10 @@ type: application
 # This is the chart version. This version number should be incremented each time you make changes
 # to the chart and its templates, including the app version.
 # Versions are expected to follow Semantic Versioning (https://semver.org/)
-version: 0.1.0-67
+version: 0.1.0-85
 
 # This is the version number of the application being deployed. This version number should be
 # incremented each time you make changes to the application. Versions are not expected to
 # follow Semantic Versioning. They should reflect the version the application is using.
 # It is recommended to use it with quotes.
-appVersion: 0.1.0-67
+appVersion: 0.1.0-85
diff --git a/reqs_optional/requirements_optional_langchain.txt b/reqs_optional/requirements_optional_langchain.txt
@@ -13,7 +13,14 @@ openai==0.27.8
 replicate==0.10.0
 
 # local vector db
-chromadb==0.3.25
+chromadb==0.4.10
+
+# chroma migration
+chroma-migrate==0.0.7
+duckdb==0.7.1
+https://h2o-release.s3.amazonaws.com/h2ogpt/chromamigdb-0.3.25-py3-none-any.whl
+https://h2o-release.s3.amazonaws.com/h2ogpt/hnswmiglib-0.7.0.tgz
+
 # server vector db
 #pymilvus==2.2.8