ci fail resolve

c0sogi · Jul 31, 2023 · 6a02465 · 6a02465
1 parent 5708e3a
commit 6a02465
Show file tree

Hide file tree

Showing 22 changed files with 525 additions and 443 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -1,5 +1,3 @@
-# .github/workflows/ci.yml
-
 name: Continuous Integration
 
 on:
@@ -17,7 +15,7 @@ jobs:
     strategy:
       matrix:
         os: [ubuntu-latest, windows-latest, macos-latest]
-        python-version: ['3.9', '3.10', '3.11']
+        python-version: ['3.8', '3.9', '3.10', '3.11']
 
     steps:
     - name: Check out code
@@ -28,15 +26,15 @@ jobs:
       with:
         python-version: ${{ matrix.python-version }}
 
-    - name: Execute server & install minimum dependencies
-      run: python -m main --install-pkgs --skip-tf-install --skip-torch-install
-
-    - name: Run tests
-      run: python -m unittest discover tests
+    - name: Setup Python, install dependencies, and run tests
+      run: |
+        python -m pip install --upgrade pip
+        python -m llama_api.server.app_settings --install-pkgs
+        python -m unittest discover tests
 
   build-release:
+    if: github.event_name == 'push' && success()
     needs: build-and-test
-    if: ${{ success() }}
     name: Create Release
     runs-on: ubuntu-latest
     steps:

diff --git a/Dockerfile b/Dockerfile
@@ -41,7 +41,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     && apt-get clean \
     && rm -rf /tmp/* \
     && cd /app \
-    && python3 -m llama_api.server.app_settings
+    && python3 -m llama_api.server.app_settings --force-cuda --install-pkgs
 
 # Set the working directory and start the server.
 WORKDIR /app

diff --git a/llama_api/schemas/function_calling.py b/llama_api/schemas/function_calling.py
@@ -1,7 +1,6 @@
 """Helper classes for wrapping functions in OpenAI's API"""
 
 from dataclasses import dataclass
-from types import NoneType
 from typing import (
     Any,
     Dict,
@@ -76,7 +75,7 @@ def _get_json_type(python_type: Type[JsonTypes]) -> str:
             return "object"
         elif python_type is list:
             return "array"
-        elif python_type is NoneType or python_type is None:
+        elif python_type is type(None) or python_type is None:
             return "null"
         else:
             raise ValueError(

diff --git a/llama_api/server/app_settings.py b/llama_api/server/app_settings.py
@@ -1,3 +1,4 @@
+import argparse
 import platform
 from contextlib import asynccontextmanager
 from os import environ
@@ -6,13 +7,13 @@
 
 from ..shared.config import Config
 from ..utils.dependency import (
+    get_installed_packages,
     get_poetry_executable,
     git_clone,
     install_all_dependencies,
     install_package,
     install_tensorflow,
     install_pytorch,
-    is_package_available,
 )
 from ..utils.llama_cpp import build_shared_lib
 from ..utils.logger import ApiLogger
@@ -76,12 +77,10 @@ def initialize_before_launch(
             # Install poetry
             logger.warning(f"⚠️ Poetry not found: {poetry}")
             install_package("poetry", force=True)
-        if not skip_pytorch_install or not is_package_available("torch"):
+        if not skip_pytorch_install:
             # Install pytorch
             install_pytorch(force_cuda=force_cuda)
-        if not skip_tensorflow_install or not is_package_available(
-            "tensorflow"
-        ):
+        if not skip_tensorflow_install:
             # Install tensorflow
             install_tensorflow()
 
@@ -91,18 +90,20 @@ def initialize_before_launch(
 
         # Build the shared library of LLaMA C++ code
         build_shared_lib(logger=logger)
+
+        # Get current packages installed
+        logger.info(f"📦 Installed packages: {get_installed_packages()}")
     else:
         logger.warning(
             "🏃‍♂️ Skipping package installation..."
             "If any packages are missing, "
-            "use `--install-packages` option to install them."
+            "use `--install-pkgs` option to install them."
         )
-
-    # Set the priority of the process
-    if platform.system() == "Windows":
+    try:
+        # Set the priority of the process
         set_priority(priority="high")
-    else:
-        set_priority(priority="normal")
+    except Exception:
+        pass
 
 
 @asynccontextmanager
@@ -171,14 +172,37 @@ def run(
 
 
 if __name__ == "__main__":
-    # Git clone the repositories & install the dependencies.
-    # This is done before the server is launched to avoid
-    # the overhead of doing it when the server is running.
-    # Forcing CUDA is necessary when building the docker image
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--install-pkgs",
+        action="store_true",
+        help="Install all required packages before running the server",
+    )
+    parser.add_argument(
+        "--force-cuda",
+        action="store_true",
+        help=(
+            "Force CUDA version of pytorch to be used"
+            "when installing pytorch. e.g. torch==2.0.1+cu118"
+        ),
+    )
+    parser.add_argument(
+        "--skip-torch-install",
+        action="store_true",
+        help="Skip installing pytorch, if `install-pkgs` is set",
+    )
+    parser.add_argument(
+        "--skip-tf-install",
+        action="store_true",
+        help="Skip installing tensorflow, if `install-pkgs` is set",
+    )
+
+    args = parser.parse_args()
+
     initialize_before_launch(
         git_and_disk_paths=Config.git_and_disk_paths,
-        install_packages=True,
-        force_cuda=True,
-        skip_pytorch_install=False,
-        skip_tensorflow_install=False,
+        install_packages=args.install_pkgs,
+        force_cuda=args.force_cuda,
+        skip_pytorch_install=args.skip_torch_install,
+        skip_tensorflow_install=args.skip_tf_install,
     )
diff --git a/llama_api/server/pools/llama.py b/llama_api/server/pools/llama.py
@@ -4,7 +4,7 @@
 from os import getpid
 from queue import Queue
 from threading import Event
-from typing import Dict, Iterator, List, Union
+from typing import Deque, Dict, Iterator, List, Union
 
 import model_definitions
 
@@ -36,8 +36,8 @@
 logger.info(f"🔧 {current_process()} is initiated with PID: {getpid()}")
 
 lazy = LazyImports()  # lazy-loader of modules
-completion_generators: deque["BaseCompletionGenerator"] = deque(maxlen=1)
-embedding_generators: deque["BaseEmbeddingGenerator"] = deque(maxlen=1)
+completion_generators: Deque["BaseCompletionGenerator"] = deque(maxlen=1)
+embedding_generators: Deque["BaseEmbeddingGenerator"] = deque(maxlen=1)
 
 
 def init() -> None:

diff --git a/llama_api/shared/config.py b/llama_api/shared/config.py
@@ -12,6 +12,6 @@ class Config:
     torch_source: str = "https://download.pytorch.org/whl/torch_stable.html"
     tensorflow_version: str = "==2.13.0"
     git_and_disk_paths: Dict[str, Union[Path, str]] = {
-        "https://github.com/abetlen/llama-cpp-python": "repositories/llama_cpp",
+        "https://github.com/abetlen/llama-cpp-python": "repositories/llama_cpp",  # noqa: E501
         "https://github.com/turboderp/exllama": "repositories/exllama",
     }
diff --git a/llama_api/utils/concurrency.py b/llama_api/utils/concurrency.py
@@ -3,17 +3,22 @@
 from contextlib import contextmanager
 from multiprocessing.managers import SyncManager
 from os import environ
-import platform
 from queue import Queue
+from sys import version_info
 from threading import Event
-from typing import Callable, Dict, Optional, ParamSpec, Tuple, TypeVar
+from typing import Callable, Dict, Optional, Tuple, TypeVar
 
 from fastapi.concurrency import run_in_threadpool
 
 from ..server.app_settings import set_priority
 from ..utils.logger import ApiLogger
 from ..utils.process_pool import ProcessPool
 
+if version_info >= (3, 10):
+    from typing import ParamSpec
+else:
+    from typing_extensions import ParamSpec
+
 T = TypeVar("T")
 P = ParamSpec("P")
 
@@ -27,10 +32,7 @@ def init_process_pool(env_vars: Dict[str, str]) -> None:
     and set the environment variables for the child processes"""
     try:
         # Set the priority of the process
-        if platform.system() == "Windows":
-            set_priority(priority="high")
-        else:
-            set_priority(priority="normal")
+        set_priority(priority="high")
     except Exception:
         pass
 
@@ -82,7 +84,7 @@ def run_in_executor(
     func: Callable[P, T],
     *args: P.args,
     **kwargs: P.kwargs,
-) -> Future[T]:
+) -> "Future[T]":
     """Run a function in an executor, and return a future"""
 
     if loop.is_closed:

diff --git a/llama_api/utils/dependency.py b/llama_api/utils/dependency.py
@@ -1,8 +1,9 @@
-from importlib.util import find_spec
 import os
 import sys
 from contextlib import contextmanager
+from importlib.util import find_spec
 from pathlib import Path
+from platform import mac_ver
 from re import compile
 from subprocess import PIPE, check_call, run
 from tempfile import mkstemp
@@ -71,6 +72,30 @@ def git_clone(git_path: str, disk_path: Union[Path, str]) -> Optional[bool]:
     return None
 
 
+def get_mac_major_version_string():
+    # platform.mac_ver() returns a tuple ('10.16', ('', '', ''), 'x86_64')
+    # Split the version string on '.' and take the first two components
+    major = mac_ver()[0].split(".")[0]
+
+    # Join the components with '_' and prepend 'macosx_'
+    return "macosx_" + major
+
+
+def get_installed_packages() -> List[str]:
+    """Return a list of installed packages"""
+    return [
+        package.split("==")[0]
+        for package in run(
+            [sys.executable, "-m", "pip", "freeze"],
+            text=True,
+            stdout=PIPE,
+            stderr=PIPE,
+        )
+        .stdout.strip()
+        .split("\n")
+    ]
+
+
 def get_poetry_executable() -> Path:
     """Construct the path to the poetry executable
     within the virtual environment.
@@ -89,30 +114,40 @@ def get_proper_torch_cuda_version(
     """Helper function that returns the proper CUDA version of torch."""
     if cuda_version == fallback_cuda_version:
         return fallback_cuda_version
-    elif check_if_torch_cuda_version_available(
-        cuda_version=cuda_version, source=source
+    elif check_if_torch_version_available(
+        version=f'cu{cuda_version.replace(".", "")}', source=source
     ):
         return cuda_version
     else:
         return fallback_cuda_version
 
 
-def check_if_torch_cuda_version_available(
-    cuda_version: str = Config.torch_version,
+def check_if_torch_version_available(
+    version: str = Config.torch_version,
     source: str = Config.torch_source,
 ) -> bool:
-    """Helper function that checks if the CUDA version of torch is available"""
+    """Helper function that checks if the version of torch is available"""
     try:
         # Determine the version of python, CUDA, and platform
-        cuda_ver = (f'cu{cuda_version.replace(".", "")}').encode()
+        canonical_version = compile(r"([0-9\.]+)").search(version)
+        if not canonical_version:
+            return False
+        package_ver = canonical_version.group().encode()
         python_ver = (
             f"cp{sys.version_info.major}{sys.version_info.minor}"
         ).encode()
-        platform = ("win" if sys.platform == "win32" else "linux").encode()
+        if "win32" in sys.platform:
+            platform = "win_amd64".encode()
+        elif "linux" in sys.platform:
+            platform = "linux_x86_64".encode()
+        elif "darwin" in sys.platform:
+            platform = get_mac_major_version_string().encode()
+        else:
+            return False
 
         # Check if the CUDA version of torch is available
         for line in urlopen(source).read().splitlines():
-            if cuda_ver in line and python_ver in line and platform in line:
+            if package_ver in line and python_ver in line and platform in line:
                 return True
         return False
     except Exception:
@@ -245,8 +280,17 @@ def install_pytorch(
         pip_install += ["-f", source]
     elif source:
         # If a source is specified, but CUDA is not available,
-        # install the CPU version of torch
-        pip_install.append(f"torch{torch_version}+cpu")
+
+        if check_if_torch_version_available(
+            version=f"{torch_version}+cpu",
+            source=source,
+        ):
+            # install the CPU version of torch if available
+            pip_install.append(f"torch{torch_version}+cpu")
+        else:
+            # else, install the canonical version of torch
+            pip_install.append(f"torch{torch_version}")
+
         # If a source is specified, add it to the pip install command
         pip_install += ["-f", source]
     else:

diff --git a/llama_api/utils/errors.py b/llama_api/utils/errors.py
@@ -29,7 +29,7 @@ class ErrorResponseCallbacks:
     @staticmethod
     def token_exceed_callback(
         request: Union[CreateCompletionRequest, CreateChatCompletionRequest],
-        match: Match[str],
+        match: "Match[str]",
     ) -> Tuple[int, ErrorResponse]:
         context_window = int(match.group(2))
         prompt_tokens = int(match.group(1))

diff --git a/llama_api/utils/lazy_imports.py b/llama_api/utils/lazy_imports.py
@@ -4,13 +4,13 @@
 
 
 from functools import wraps
-from typing import Callable, TypeVar, Union
+from typing import Callable, Set, TypeVar, Union
 
 from .logger import ApiLogger
 
 T = TypeVar("T")
 logger = ApiLogger(__name__)
-logged_modules: set[str] = set()
+logged_modules: Set[str] = set()
 
 
 def try_import(module_name: str):