Skip to content

Commit

Permalink
ci fail resolve
Browse files Browse the repository at this point in the history
  • Loading branch information
c0sogi authored and c0sogi committed Jul 31, 2023
1 parent 5708e3a commit 6a02465
Show file tree
Hide file tree
Showing 22 changed files with 525 additions and 443 deletions.
16 changes: 7 additions & 9 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
# .github/workflows/ci.yml

name: Continuous Integration

on:
Expand All @@ -17,7 +15,7 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
python-version: ['3.9', '3.10', '3.11']
python-version: ['3.8', '3.9', '3.10', '3.11']

steps:
- name: Check out code
Expand All @@ -28,15 +26,15 @@ jobs:
with:
python-version: ${{ matrix.python-version }}

- name: Execute server & install minimum dependencies
run: python -m main --install-pkgs --skip-tf-install --skip-torch-install

- name: Run tests
run: python -m unittest discover tests
- name: Setup Python, install dependencies, and run tests
run: |
python -m pip install --upgrade pip
python -m llama_api.server.app_settings --install-pkgs
python -m unittest discover tests
build-release:
if: github.event_name == 'push' && success()
needs: build-and-test
if: ${{ success() }}
name: Create Release
runs-on: ubuntu-latest
steps:
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
&& apt-get clean \
&& rm -rf /tmp/* \
&& cd /app \
&& python3 -m llama_api.server.app_settings
&& python3 -m llama_api.server.app_settings --force-cuda --install-pkgs

# Set the working directory and start the server.
WORKDIR /app
Expand Down
3 changes: 1 addition & 2 deletions llama_api/schemas/function_calling.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"""Helper classes for wrapping functions in OpenAI's API"""

from dataclasses import dataclass
from types import NoneType
from typing import (
Any,
Dict,
Expand Down Expand Up @@ -76,7 +75,7 @@ def _get_json_type(python_type: Type[JsonTypes]) -> str:
return "object"
elif python_type is list:
return "array"
elif python_type is NoneType or python_type is None:
elif python_type is type(None) or python_type is None:
return "null"
else:
raise ValueError(
Expand Down
62 changes: 43 additions & 19 deletions llama_api/server/app_settings.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import argparse
import platform
from contextlib import asynccontextmanager
from os import environ
Expand All @@ -6,13 +7,13 @@

from ..shared.config import Config
from ..utils.dependency import (
get_installed_packages,
get_poetry_executable,
git_clone,
install_all_dependencies,
install_package,
install_tensorflow,
install_pytorch,
is_package_available,
)
from ..utils.llama_cpp import build_shared_lib
from ..utils.logger import ApiLogger
Expand Down Expand Up @@ -76,12 +77,10 @@ def initialize_before_launch(
# Install poetry
logger.warning(f"⚠️ Poetry not found: {poetry}")
install_package("poetry", force=True)
if not skip_pytorch_install or not is_package_available("torch"):
if not skip_pytorch_install:
# Install pytorch
install_pytorch(force_cuda=force_cuda)
if not skip_tensorflow_install or not is_package_available(
"tensorflow"
):
if not skip_tensorflow_install:
# Install tensorflow
install_tensorflow()

Expand All @@ -91,18 +90,20 @@ def initialize_before_launch(

# Build the shared library of LLaMA C++ code
build_shared_lib(logger=logger)

# Get current packages installed
logger.info(f"📦 Installed packages: {get_installed_packages()}")
else:
logger.warning(
"🏃‍♂️ Skipping package installation..."
"If any packages are missing, "
"use `--install-packages` option to install them."
"use `--install-pkgs` option to install them."
)

# Set the priority of the process
if platform.system() == "Windows":
try:
# Set the priority of the process
set_priority(priority="high")
else:
set_priority(priority="normal")
except Exception:
pass


@asynccontextmanager
Expand Down Expand Up @@ -171,14 +172,37 @@ def run(


if __name__ == "__main__":
# Git clone the repositories & install the dependencies.
# This is done before the server is launched to avoid
# the overhead of doing it when the server is running.
# Forcing CUDA is necessary when building the docker image
parser = argparse.ArgumentParser()
parser.add_argument(
"--install-pkgs",
action="store_true",
help="Install all required packages before running the server",
)
parser.add_argument(
"--force-cuda",
action="store_true",
help=(
"Force CUDA version of pytorch to be used"
"when installing pytorch. e.g. torch==2.0.1+cu118"
),
)
parser.add_argument(
"--skip-torch-install",
action="store_true",
help="Skip installing pytorch, if `install-pkgs` is set",
)
parser.add_argument(
"--skip-tf-install",
action="store_true",
help="Skip installing tensorflow, if `install-pkgs` is set",
)

args = parser.parse_args()

initialize_before_launch(
git_and_disk_paths=Config.git_and_disk_paths,
install_packages=True,
force_cuda=True,
skip_pytorch_install=False,
skip_tensorflow_install=False,
install_packages=args.install_pkgs,
force_cuda=args.force_cuda,
skip_pytorch_install=args.skip_torch_install,
skip_tensorflow_install=args.skip_tf_install,
)
6 changes: 3 additions & 3 deletions llama_api/server/pools/llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from os import getpid
from queue import Queue
from threading import Event
from typing import Dict, Iterator, List, Union
from typing import Deque, Dict, Iterator, List, Union

import model_definitions

Expand Down Expand Up @@ -36,8 +36,8 @@
logger.info(f"🔧 {current_process()} is initiated with PID: {getpid()}")

lazy = LazyImports() # lazy-loader of modules
completion_generators: deque["BaseCompletionGenerator"] = deque(maxlen=1)
embedding_generators: deque["BaseEmbeddingGenerator"] = deque(maxlen=1)
completion_generators: Deque["BaseCompletionGenerator"] = deque(maxlen=1)
embedding_generators: Deque["BaseEmbeddingGenerator"] = deque(maxlen=1)


def init() -> None:
Expand Down
2 changes: 1 addition & 1 deletion llama_api/shared/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,6 @@ class Config:
torch_source: str = "https://download.pytorch.org/whl/torch_stable.html"
tensorflow_version: str = "==2.13.0"
git_and_disk_paths: Dict[str, Union[Path, str]] = {
"https://github.com/abetlen/llama-cpp-python": "repositories/llama_cpp",
"https://github.com/abetlen/llama-cpp-python": "repositories/llama_cpp", # noqa: E501
"https://github.com/turboderp/exllama": "repositories/exllama",
}
16 changes: 9 additions & 7 deletions llama_api/utils/concurrency.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,22 @@
from contextlib import contextmanager
from multiprocessing.managers import SyncManager
from os import environ
import platform
from queue import Queue
from sys import version_info
from threading import Event
from typing import Callable, Dict, Optional, ParamSpec, Tuple, TypeVar
from typing import Callable, Dict, Optional, Tuple, TypeVar

from fastapi.concurrency import run_in_threadpool

from ..server.app_settings import set_priority
from ..utils.logger import ApiLogger
from ..utils.process_pool import ProcessPool

if version_info >= (3, 10):
from typing import ParamSpec
else:
from typing_extensions import ParamSpec

T = TypeVar("T")
P = ParamSpec("P")

Expand All @@ -27,10 +32,7 @@ def init_process_pool(env_vars: Dict[str, str]) -> None:
and set the environment variables for the child processes"""
try:
# Set the priority of the process
if platform.system() == "Windows":
set_priority(priority="high")
else:
set_priority(priority="normal")
set_priority(priority="high")
except Exception:
pass

Expand Down Expand Up @@ -82,7 +84,7 @@ def run_in_executor(
func: Callable[P, T],
*args: P.args,
**kwargs: P.kwargs,
) -> Future[T]:
) -> "Future[T]":
"""Run a function in an executor, and return a future"""

if loop.is_closed:
Expand Down
66 changes: 55 additions & 11 deletions llama_api/utils/dependency.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from importlib.util import find_spec
import os
import sys
from contextlib import contextmanager
from importlib.util import find_spec
from pathlib import Path
from platform import mac_ver
from re import compile
from subprocess import PIPE, check_call, run
from tempfile import mkstemp
Expand Down Expand Up @@ -71,6 +72,30 @@ def git_clone(git_path: str, disk_path: Union[Path, str]) -> Optional[bool]:
return None


def get_mac_major_version_string():
# platform.mac_ver() returns a tuple ('10.16', ('', '', ''), 'x86_64')
# Split the version string on '.' and take the first two components
major = mac_ver()[0].split(".")[0]

# Join the components with '_' and prepend 'macosx_'
return "macosx_" + major


def get_installed_packages() -> List[str]:
"""Return a list of installed packages"""
return [
package.split("==")[0]
for package in run(
[sys.executable, "-m", "pip", "freeze"],
text=True,
stdout=PIPE,
stderr=PIPE,
)
.stdout.strip()
.split("\n")
]


def get_poetry_executable() -> Path:
"""Construct the path to the poetry executable
within the virtual environment.
Expand All @@ -89,30 +114,40 @@ def get_proper_torch_cuda_version(
"""Helper function that returns the proper CUDA version of torch."""
if cuda_version == fallback_cuda_version:
return fallback_cuda_version
elif check_if_torch_cuda_version_available(
cuda_version=cuda_version, source=source
elif check_if_torch_version_available(
version=f'cu{cuda_version.replace(".", "")}', source=source
):
return cuda_version
else:
return fallback_cuda_version


def check_if_torch_cuda_version_available(
cuda_version: str = Config.torch_version,
def check_if_torch_version_available(
version: str = Config.torch_version,
source: str = Config.torch_source,
) -> bool:
"""Helper function that checks if the CUDA version of torch is available"""
"""Helper function that checks if the version of torch is available"""
try:
# Determine the version of python, CUDA, and platform
cuda_ver = (f'cu{cuda_version.replace(".", "")}').encode()
canonical_version = compile(r"([0-9\.]+)").search(version)
if not canonical_version:
return False
package_ver = canonical_version.group().encode()
python_ver = (
f"cp{sys.version_info.major}{sys.version_info.minor}"
).encode()
platform = ("win" if sys.platform == "win32" else "linux").encode()
if "win32" in sys.platform:
platform = "win_amd64".encode()
elif "linux" in sys.platform:
platform = "linux_x86_64".encode()
elif "darwin" in sys.platform:
platform = get_mac_major_version_string().encode()
else:
return False

# Check if the CUDA version of torch is available
for line in urlopen(source).read().splitlines():
if cuda_ver in line and python_ver in line and platform in line:
if package_ver in line and python_ver in line and platform in line:
return True
return False
except Exception:
Expand Down Expand Up @@ -245,8 +280,17 @@ def install_pytorch(
pip_install += ["-f", source]
elif source:
# If a source is specified, but CUDA is not available,
# install the CPU version of torch
pip_install.append(f"torch{torch_version}+cpu")

if check_if_torch_version_available(
version=f"{torch_version}+cpu",
source=source,
):
# install the CPU version of torch if available
pip_install.append(f"torch{torch_version}+cpu")
else:
# else, install the canonical version of torch
pip_install.append(f"torch{torch_version}")

# If a source is specified, add it to the pip install command
pip_install += ["-f", source]
else:
Expand Down
2 changes: 1 addition & 1 deletion llama_api/utils/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class ErrorResponseCallbacks:
@staticmethod
def token_exceed_callback(
request: Union[CreateCompletionRequest, CreateChatCompletionRequest],
match: Match[str],
match: "Match[str]",
) -> Tuple[int, ErrorResponse]:
context_window = int(match.group(2))
prompt_tokens = int(match.group(1))
Expand Down
4 changes: 2 additions & 2 deletions llama_api/utils/lazy_imports.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@


from functools import wraps
from typing import Callable, TypeVar, Union
from typing import Callable, Set, TypeVar, Union

from .logger import ApiLogger

T = TypeVar("T")
logger = ApiLogger(__name__)
logged_modules: set[str] = set()
logged_modules: Set[str] = set()


def try_import(module_name: str):
Expand Down
Loading

0 comments on commit 6a02465

Please sign in to comment.