diff --git a/.github/workflows/cortex-cpp-quality-gate.yml b/.github/workflows/cortex-cpp-quality-gate.yml index 85050581a..e9fd8664b 100644 --- a/.github/workflows/cortex-cpp-quality-gate.yml +++ b/.github/workflows/cortex-cpp-quality-gate.yml @@ -5,6 +5,8 @@ on: types: [opened, synchronize, reopened, ready_for_review] paths: ["engine/**", ".github/workflows/cortex-cpp-quality-gate.yml"] workflow_dispatch: + schedule: + - cron: '0 22 * * *' env: LLM_MODEL_URL: https://delta.jan.ai/tinyllama-1.1b-chat-v0.3.Q2_K.gguf @@ -102,7 +104,7 @@ jobs: cd engine echo "huggingFaceToken: ${{ secrets.HUGGINGFACE_TOKEN_READ }}" > ~/.cortexrc echo "gitHubToken: ${{ secrets.PAT_SERVICE_ACCOUNT }}" >> ~/.cortexrc - ./build/cortex + # ./build/cortex cat ~/.cortexrc - name: Run unit tests @@ -115,10 +117,10 @@ jobs: - name: Run setup config run: | cd engine - echo "huggingFaceToken: ${{ secrets.HUGGINGFACE_TOKEN_READ }}" > ~/.cortexrc + echo "apiServerPort: 3928" > ~/.cortexrc + echo "huggingFaceToken: ${{ secrets.HUGGINGFACE_TOKEN_READ }}" >> ~/.cortexrc echo "gitHubToken: ${{ secrets.PAT_SERVICE_ACCOUNT }}" >> ~/.cortexrc - echo "apiServerPort: 3928" >> ~/.cortexrc - ./build/cortex + # ./build/cortex cat ~/.cortexrc - name: Run e2e tests @@ -149,6 +151,34 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.PAT_SERVICE_ACCOUNT }} + - name: Run e2e tests + if: github.event_name == 'schedule' && runner.os != 'Windows' && github.event.pull_request.draft == false + run: | + cd engine + cp build/cortex build/cortex-nightly + cp build/cortex build/cortex-beta + python -m pip install --upgrade pip + python -m pip install -r e2e-test/requirements.txt + python e2e-test/cortex-llamacpp-e2e-nightly.py + rm build/cortex-nightly + rm build/cortex-beta + env: + GITHUB_TOKEN: ${{ secrets.PAT_SERVICE_ACCOUNT }} + + - name: Run e2e tests + if: github.event_name == 'schedule' && runner.os == 'Windows' && github.event.pull_request.draft == false + run: | + cd engine + cp build/cortex.exe build/cortex-nightly.exe + cp build/cortex.exe build/cortex-beta.exe + python -m pip install --upgrade pip + python -m pip install -r e2e-test/requirements.txt + python e2e-test/cortex-llamacpp-e2e-nightly.py + rm build/cortex-nightly.exe + rm build/cortex-beta.exe + env: + GITHUB_TOKEN: ${{ secrets.PAT_SERVICE_ACCOUNT }} + - name: Pre-package run: | cd engine @@ -188,40 +218,58 @@ jobs: AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}" AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}" - # build-docker-and-test: - # runs-on: ubuntu-latest - # steps: - # - name: Getting the repo - # uses: actions/checkout@v3 - # with: - # submodules: 'recursive' + build-docker-and-test: + runs-on: ubuntu-24-04-docker + steps: + - name: Getting the repo + uses: actions/checkout@v3 + with: + submodules: 'recursive' + + - name: Run Docker + if: github.event_name != 'schedule' + run: | + docker build \ + --build-arg REMOTE_CACHE_URL="${{ secrets.MINIO_ENDPOINT }}/vcpkg-cache" \ + --build-arg MINIO_ENDPOINT_URL="${{ secrets.MINIO_ENDPOINT }}" \ + --build-arg MINIO_ACCESS_KEY="${{ secrets.MINIO_ACCESS_KEY_ID }}" \ + --build-arg MINIO_SECRET_KEY="${{ secrets.MINIO_SECRET_ACCESS_KEY }}" \ + -t menloltd/cortex:test -f docker/Dockerfile.cache . + docker run -it -d -p 3928:39281 --name cortex menloltd/cortex:test + sleep 20 - # - name: Set up QEMU - # uses: docker/setup-qemu-action@v3 + - name: Run Docker + if: github.event_name == 'schedule' + run: | + latest_prerelease=$(curl -s https://api.github.com/repos/cortexcpp/cortex.cpp/releases | jq -r '.[] | select(.prerelease == true) | .tag_name' | head -n 1) + echo "cortex.llamacpp latest release: $latest_prerelease" + docker build \ + --build-arg REMOTE_CACHE_URL="${{ secrets.MINIO_ENDPOINT }}/vcpkg-cache" \ + --build-arg MINIO_ENDPOINT_URL="${{ secrets.MINIO_ENDPOINT }}" \ + --build-arg MINIO_ACCESS_KEY="${{ secrets.MINIO_ACCESS_KEY_ID }}" \ + --build-arg MINIO_SECRET_KEY="${{ secrets.MINIO_SECRET_ACCESS_KEY }}" \ + --build-arg CORTEX_CPP_VERSION="${latest_prerelease}" \ + -t menloltd/cortex:test -f docker/Dockerfile.cache . + docker run -it -d -p 3928:39281 --name cortex menloltd/cortex:test + sleep 20 - # - name: Set up Docker Buildx - # uses: docker/setup-buildx-action@v3 - - # - name: Run Docker - # run: | - # docker build -t menloltd/cortex:test -f docker/Dockerfile . - # docker run -it -d -p 3928:39281 --name cortex menloltd/cortex:test - - # - name: use python - # uses: actions/setup-python@v5 - # with: - # python-version: "3.10" - - # - name: Run e2e tests - # run: | - # cd engine - # python -m pip install --upgrade pip - # python -m pip install -r e2e-test/requirements.txt - # pytest e2e-test/test_api_docker.py - - # - name: Run Docker - # continue-on-error: true - # if: always() - # run: | - # docker stop cortex - # docker rm cortex + - name: use python + uses: actions/setup-python@v5 + with: + python-version: "3.10" + + - name: Run e2e tests + run: | + cd engine + python -m pip install --upgrade pip + python -m pip install -r e2e-test/requirements.txt + pytest e2e-test/test_api_docker.py + + - name: Run Docker + continue-on-error: true + if: always() + run: | + docker logs cortex + docker stop cortex + docker rm cortex + echo "y\n" | docker system prune -af diff --git a/docker/Dockerfile b/docker/Dockerfile index 4cbdbef29..744c3899c 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,16 +1,12 @@ -FROM ubuntu:22.04 as base - -FROM base as build - -ARG CORTEX_CPP_VERSION=latest - -ARG CMAKE_EXTRA_FLAGS="" +# Stage 1: Base dependencies (common stage) +FROM ubuntu:22.04 as common ENV DEBIAN_FRONTEND=noninteractive -# Install dependencies +# Install common dependencies RUN apt-get update && apt-get install -y --no-install-recommends \ ca-certificates \ + software-properties-common \ curl \ wget \ jq \ @@ -20,11 +16,19 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ apt-get clean && \ rm -rf /var/lib/apt/lists/* -RUN apt-get update && apt-get install -y --no-install-recommends \ +# Stage 2: Build dependencies and compilation +FROM common as build + +# Install Dependencies +RUN wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /etc/apt/trusted.gpg.d/kitware.gpg >/dev/null && \ + apt-add-repository "deb https://apt.kitware.com/ubuntu/ $(lsb_release -cs) main" && \ + apt-get update && \ + apt-get install -y --no-install-recommends \ + cmake \ + make \ git \ uuid-dev \ lsb-release \ - software-properties-common \ gpg \ zip \ unzip \ @@ -32,59 +36,45 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ g++ \ ninja-build \ pkg-config \ + python3-pip \ openssl && \ + pip3 install awscli && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* -RUN wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /etc/apt/trusted.gpg.d/kitware.gpg >/dev/null && \ - apt-add-repository "deb https://apt.kitware.com/ubuntu/ $(lsb_release -cs) main" && \ - apt-get update && \ - apt-get install -y cmake && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* +ARG CORTEX_CPP_VERSION=latest +ARG CMAKE_EXTRA_FLAGS="" WORKDIR /app +# Copy source code COPY ./engine /app/engine - COPY ./docs/static/openapi/cortex.json /app/docs/static/openapi/cortex.json +# Build project +# Configure vcpkg binary sources RUN cd engine && make configure-vcpkg && make build CMAKE_EXTRA_FLAGS="-DCORTEX_CPP_VERSION=${CORTEX_CPP_VERSION} -DCMAKE_BUILD_TEST=OFF -DCMAKE_TOOLCHAIN_FILE=vcpkg/scripts/buildsystems/vcpkg.cmake ${CMAKE_EXTRA_FLAGS}" -FROM base as runtime - -ENV DEBIAN_FRONTEND=noninteractive - -# Install dependencies -RUN apt-get update && apt-get install -y --no-install-recommends \ - ca-certificates \ - curl \ - wget \ - jq \ - tar \ - openmpi-bin \ - libopenmpi-dev && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - -ARG CORTEX_LLAMACPP_VERSION=latest - +# Stage 3: Runtime +FROM common as runtime +WORKDIR /app COPY --from=build /app/engine/build/cortex /usr/local/bin/cortex COPY --from=build /app/engine/build/cortex-server /usr/local/bin/cortex-server COPY ./docker/download-cortex.llamacpp.sh /tmp/download-cortex.llamacpp.sh +COPY ./docker/entrypoint.sh /usr/local/bin/entrypoint.sh -# Get the latest version of the Cortex Llama +# Get the latest version of Cortex Llama +ARG CORTEX_LLAMACPP_VERSION=latest RUN chmod +x /tmp/download-cortex.llamacpp.sh && /bin/bash /tmp/download-cortex.llamacpp.sh ${CORTEX_LLAMACPP_VERSION} -# Copy the entrypoint script -COPY ./docker/entrypoint.sh /usr/local/bin/entrypoint.sh - +# Configure entrypoint RUN chmod +x /usr/local/bin/entrypoint.sh EXPOSE 39281 +# Healthcheck HEALTHCHECK --interval=300s --timeout=30s --start-period=10s --retries=3 \ CMD curl -f http://127.0.0.1:39281/healthz || exit 1 - + ENTRYPOINT ["/usr/local/bin/entrypoint.sh"] diff --git a/docker/Dockerfile.cache b/docker/Dockerfile.cache new file mode 100644 index 000000000..0a9cbe02d --- /dev/null +++ b/docker/Dockerfile.cache @@ -0,0 +1,121 @@ +# Stage 1: Base dependencies (common stage) +FROM ubuntu:22.04 as common + +ENV DEBIAN_FRONTEND=noninteractive + +# Install common dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates \ + software-properties-common \ + curl \ + wget \ + jq \ + tar \ + openmpi-bin \ + libopenmpi-dev && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Stage 2: Build dependencies and compilation +FROM common as build + +# Install Dependencies +RUN wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /etc/apt/trusted.gpg.d/kitware.gpg >/dev/null && \ + apt-add-repository "deb https://apt.kitware.com/ubuntu/ $(lsb_release -cs) main" && \ + apt-get update && \ + apt-get install -y --no-install-recommends \ + cmake \ + make \ + git \ + uuid-dev \ + lsb-release \ + gpg \ + zip \ + unzip \ + gcc \ + g++ \ + ninja-build \ + pkg-config \ + python3-pip \ + openssl && \ + pip3 install awscli && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +ARG CORTEX_CPP_VERSION=latest +ARG CMAKE_EXTRA_FLAGS="" + +WORKDIR /app + +# Build arguments for remote cache +ARG REMOTE_CACHE_URL="" +ARG MINIO_ENDPOINT_URL="" +ARG MINIO_ACCESS_KEY="" +ARG MINIO_SECRET_KEY="" + +# Configure cache +ENV LOCAL_CACHE_DIR="/vcpkg-cache" +RUN mkdir -p ${LOCAL_CACHE_DIR} + +# Configure MinIO alias (if remote cache is provided) +RUN if [ -n "${REMOTE_CACHE_URL}" ]; then \ + echo "Setting up MinIO for remote cache..." && \ + aws configure set default.s3.signature_version s3v4 && \ + aws configure set aws_access_key_id ${MINIO_ACCESS_KEY} && \ + aws configure set aws_secret_access_key ${MINIO_SECRET_KEY} && \ + aws configure set default.region us-east-1; \ + else \ + echo "No remote cache provided, using local fallback..."; \ + fi + +# Sync cache from MinIO (if remote cache is provided) +RUN if [ -n "${REMOTE_CACHE_URL}" ]; then \ + echo "Downloading cache from MinIO..." && \ + aws --endpoint-url=${MINIO_ENDPOINT_URL} s3 sync s3://vcpkg-cache ${LOCAL_CACHE_DIR}; \ + else \ + echo "No remote cache provided, skipping download."; \ + fi + +# Copy source code +COPY ./engine /app/engine +COPY ./docs/static/openapi/cortex.json /app/docs/static/openapi/cortex.json + +# Build project +# Configure vcpkg binary sources +RUN export VCPKG_BINARY_SOURCES="files,${LOCAL_CACHE_DIR},readwrite;default"; \ + cd engine && make configure-vcpkg && make build CMAKE_EXTRA_FLAGS="-DCORTEX_CPP_VERSION=${CORTEX_CPP_VERSION} -DCMAKE_BUILD_TEST=OFF -DCMAKE_TOOLCHAIN_FILE=vcpkg/scripts/buildsystems/vcpkg.cmake ${CMAKE_EXTRA_FLAGS}" + + # Upload updated cache to MinIO (if remote cache is provided) +RUN if [ -n "${REMOTE_CACHE_URL}" ]; then \ + echo "Uploading cache to MinIO..." && \ + aws --endpoint-url=${MINIO_ENDPOINT_URL} s3 sync ${LOCAL_CACHE_DIR} s3://vcpkg-cache; \ + else \ + echo "No remote cache provided, skipping upload."; \ + fi + +# Stage 3: Runtime +FROM common as runtime + +WORKDIR /app + +# Copy built binaries from the build stage +COPY --from=build /app/engine/build/cortex /usr/local/bin/cortex +COPY --from=build /app/engine/build/cortex-server /usr/local/bin/cortex-server + +COPY ./docker/download-cortex.llamacpp.sh /tmp/download-cortex.llamacpp.sh +COPY ./docker/entrypoint.sh /usr/local/bin/entrypoint.sh + +# Get the latest version of Cortex Llama +ARG CORTEX_LLAMACPP_VERSION=latest +RUN chmod +x /tmp/download-cortex.llamacpp.sh && /bin/bash /tmp/download-cortex.llamacpp.sh ${CORTEX_LLAMACPP_VERSION} + +# Configure entrypoint +RUN chmod +x /usr/local/bin/entrypoint.sh + +EXPOSE 39281 + +# Healthcheck +HEALTHCHECK --interval=300s --timeout=30s --start-period=10s --retries=3 \ + CMD curl -f http://127.0.0.1:39281/healthz || exit 1 + +ENTRYPOINT ["/usr/local/bin/entrypoint.sh"] diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh index 6461eb15b..6f0493ec2 100644 --- a/docker/entrypoint.sh +++ b/docker/entrypoint.sh @@ -5,10 +5,11 @@ echo "apiServerHost: 0.0.0.0" > /root/.cortexrc echo "enableCors: true" >> /root/.cortexrc +# Install the engine cortex engines install llama-cpp -s /opt/cortex.llamacpp +cortex engines list # Start the cortex server - cortex start # Keep the container running by tailing the log files diff --git a/docs/docs/quickstart.mdx b/docs/docs/quickstart.mdx index 2ebf53c7b..874309ad4 100644 --- a/docs/docs/quickstart.mdx +++ b/docs/docs/quickstart.mdx @@ -80,14 +80,13 @@ All model files are stored in the `~/cortex/models` folder. curl http://localhost:39281/v1/chat/completions \ -H "Content-Type: application/json" \ -d '{ - "model": "", + "model": "llama3.1:8b-gguf", "messages": [ { "role": "user", "content": "Hello" }, ], - "model": "mistral", "stream": true, "max_tokens": 1, "stop": [ diff --git a/docs/static/openapi/cortex.json b/docs/static/openapi/cortex.json index 1ac69d78e..da31ab64b 100644 --- a/docs/static/openapi/cortex.json +++ b/docs/static/openapi/cortex.json @@ -1234,9 +1234,9 @@ }, "/v1/engines/{name}": { "get": { - "operationId": "EnginesController_findOne", - "summary": "Get an engine", - "description": "Retrieves an engine instance, providing basic information about the engine.", + "operationId": "EnginesController_listInstalledEngines", + "summary": "List installed engines", + "description": "List installed engines for a particular engine type.", "parameters": [ { "name": "name", @@ -1292,10 +1292,12 @@ } }, "tags": ["Engines"] - }, - "post": { - "summary": "Install an engine", - "description": "Install an engine of a specific type, with optional version and variant", + } + }, + "/v1/engines/{name}/releases": { + "get": { + "summary": "List released engines", + "description": "List released engines of a specific engine type.", "parameters": [ { "name": "name", @@ -1307,32 +1309,156 @@ "default": "llama-cpp" }, "description": "The type of engine" - }, + } + ], + "responses": { + "200": { + "description": "Successful installation", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "type": "object", + "properties": { + "draft": { + "type": "boolean", + "example": false + }, + "name": { + "type": "string", + "example": "v0.1.39-20.11.24" + }, + "prerelease": { + "type": "boolean", + "example": true + }, + "published_at": { + "type": "string", + "format": "date-time", + "example": "2024-11-20T17:39:40Z" + }, + "url": { + "type": "string", + "example": "https://api.github.com/repos/janhq/cortex.llamacpp/releases/186479804" + } + } + } + } + } + } + } + }, + "tags": ["Engines"] + } + }, + "/v1/engines/{name}/releases/latest": { + "get": { + "summary": "Get latest release", + "description": "Return variants for the latest engine release of a specific engine type.", + "parameters": [ { - "name": "version", - "in": "query", - "required": false, + "name": "name", + "in": "path", + "required": true, "schema": { - "type": "string" + "type": "string", + "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"], + "default": "llama-cpp" }, - "description": "The version of the engine to install (optional)" - }, + "description": "The type of engine" + } + ], + "responses": { + "200": { + "description": "Successful installation", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "type": "object", + "properties": { + "created_at": { + "type": "string", + "format": "date-time", + "example": "2024-11-15T10:39:39Z" + }, + "download_count": { + "type": "integer", + "example": 76 + }, + "name": { + "type": "string", + "example": "0.1.39-linux-amd64-avx-cuda-11-7" + }, + "size": { + "type": "integer", + "example": 151215080 + } + } + } + } + } + } + } + }, + "tags": ["Engines"] + } + }, + "/v1/engines/{name}/install": { + "post": { + "summary": "Install an engine", + "description": "Install an engine of a specific type, with optional version and variant. If none are provided, the latest version and most suitable variant will be installed.", + "parameters": [ { - "name": "variant", - "in": "query", - "required": false, + "name": "name", + "in": "path", + "required": true, "schema": { - "type": "string" + "type": "string", + "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"], + "default": "llama-cpp" }, - "description": "The variant of the engine to install (optional)" + "description": "The type of engine" } ], + "requestBody": { + "required": false, + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "version": { + "type": "string", + "description": "The version of the engine to install (optional)", + "example": "v0.1.39" + }, + "variant": { + "type": "string", + "description": "The variant of the engine to install (optional)", + "example": "mac-arm64" + } + } + } + } + } + }, "responses": { "200": { "description": "Successful installation", "content": { "application/json": { - "schema": {} + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string", + "example": "Engine starts installing!" + } + } + } } } } @@ -1341,7 +1467,7 @@ }, "delete": { "summary": "Uninstall an engine", - "description": "Uninstall an engine based on type, version, and variant", + "description": "Uninstall an engine based on engine, version, and variant. If version and variant are not provided, all versions and variants of the engine will be uninstalled.", "parameters": [ { "name": "name", @@ -1353,26 +1479,30 @@ "default": "llama-cpp" }, "description": "The type of engine" - }, - { - "name": "version", - "in": "query", - "required": false, - "schema": { - "type": "string" - }, - "description": "The version of the engine to uninstall (optional)" - }, - { - "name": "variant", - "in": "query", - "required": false, - "schema": { - "type": "string" - }, - "description": "The variant of the engine to uninstall (optional)" } ], + "requestBody": { + "required": false, + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "version": { + "type": "string", + "description": "The version of the engine to uninstall (optional)", + "example": "v0.1.39" + }, + "variant": { + "type": "string", + "description": "The variant of the engine to uninstall (optional)", + "example": "mac-arm64" + } + } + } + } + } + }, "responses": { "200": { "description": "Successful uninstallation", @@ -1381,28 +1511,10 @@ "schema": { "type": "object", "properties": { - "success": { - "type": "boolean", - "description": "Indicates if the uninstallation was successful" - }, "message": { "type": "string", - "description": "Description of the uninstallation action taken" - }, - "uninstalledEngines": { - "type": "array", - "items": { - "type": "object", - "properties": { - "version": { - "type": "string" - }, - "variant": { - "type": "string" - } - } - }, - "description": "List of uninstalled engine versions and variants" + "description": "Engine llama-cpp uninstalled successfully!", + "example": "Engine llama-cpp uninstalled successfully!" } } } @@ -1429,6 +1541,44 @@ "tags": ["Engines"] } }, + "/v1/engines/{name}/update": { + "post": { + "summary": "Update engine", + "description": "Updates the specified engine type using the engine variant currently set as default.", + "parameters": [ + { + "name": "name", + "in": "path", + "required": true, + "schema": { + "type": "string", + "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"], + "default": "llama-cpp" + }, + "description": "The name of the engine to update" + } + ], + "responses": { + "200": { + "description": "Engine updated successfully", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "message": { + "type": "string", + "example": "Engine updated successfully" + } + } + } + } + } + } + }, + "tags": ["Engines"] + } + }, "/v1/engines/{name}/default": { "get": { "summary": "Get default engine variant", @@ -1603,44 +1753,6 @@ "tags": ["Engines"] } }, - "/v1/engines/{name}/update": { - "post": { - "summary": "Update engine", - "description": "Updates the specified engine type using the engine variant currently set as default.", - "parameters": [ - { - "name": "name", - "in": "path", - "required": true, - "schema": { - "type": "string", - "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"], - "default": "llama-cpp" - }, - "description": "The name of the engine to update" - } - ], - "responses": { - "200": { - "description": "Engine updated successfully", - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "message": { - "type": "string", - "example": "Engine updated successfully" - } - } - } - } - } - } - }, - "tags": ["Engines"] - } - }, "/v1/hardware": { "get": { "summary": "Get hardware information", diff --git a/engine/CMakeLists.txt b/engine/CMakeLists.txt index 92e07ec91..b53eb7fdf 100644 --- a/engine/CMakeLists.txt +++ b/engine/CMakeLists.txt @@ -28,6 +28,10 @@ if(MSVC) $<$:/MTd> #---|-- Statically link the runtime libraries $<$:/MT> #--| ) + + add_compile_options(/utf-8) + add_definitions(-DUNICODE -D_UNICODE) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /DUNICODE /D_UNICODE") endif() if(NOT DEFINED CORTEX_VARIANT) @@ -173,10 +177,11 @@ aux_source_directory(models MODEL_SRC) aux_source_directory(cortex-common CORTEX_COMMON) aux_source_directory(config CONFIG_SRC) aux_source_directory(database DB_SRC) +aux_source_directory(migrations MIGR_SRC) target_include_directories(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} ) -target_sources(${TARGET_NAME} PRIVATE ${CONFIG_SRC} ${CTL_SRC} ${COMMON_SRC} ${SERVICES_SRC} ${DB_SRC}) +target_sources(${TARGET_NAME} PRIVATE ${CONFIG_SRC} ${CTL_SRC} ${COMMON_SRC} ${SERVICES_SRC} ${DB_SRC} ${MIGR_SRC}) set_target_properties(${TARGET_NAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR} diff --git a/engine/README.md b/engine/README.md index 192e95612..884e5efae 100644 --- a/engine/README.md +++ b/engine/README.md @@ -173,4 +173,4 @@ Manual build is a process in which the developers build the software manually. T ## Star History -[![Star History Chart](https://api.star-history.com/svg?repos=janhq/cortex-cpp&type=Date)](https://star-history.com/#janhq/cortex-cpp&Date) +[![Star History Chart](https://api.star-history.com/svg?repos=janhq/cortex.cpp&type=Date)](https://star-history.com/#janhq/cortex.cpp&Date) \ No newline at end of file diff --git a/engine/cli/CMakeLists.txt b/engine/cli/CMakeLists.txt index ce6f254ca..c69e7e150 100644 --- a/engine/cli/CMakeLists.txt +++ b/engine/cli/CMakeLists.txt @@ -26,6 +26,10 @@ if(MSVC) $<$:/MTd> #---|-- Statically link the runtime libraries $<$:/MT> #--| ) + + add_compile_options(/utf-8) + add_definitions(-DUNICODE -D_UNICODE) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /DUNICODE /D_UNICODE") endif() if(NOT DEFINED CORTEX_VARIANT) diff --git a/engine/cli/commands/engine_get_cmd.cc b/engine/cli/commands/engine_get_cmd.cc index 8699c336b..3fd1fd576 100644 --- a/engine/cli/commands/engine_get_cmd.cc +++ b/engine/cli/commands/engine_get_cmd.cc @@ -2,8 +2,8 @@ #include #include #include +#include "common/engine_servicei.h" #include "server_start_cmd.h" -#include "services/engine_service.h" #include "utils/curl_utils.h" #include "utils/logging_utils.h" #include "utils/url_parser.h" diff --git a/engine/cli/commands/engine_install_cmd.cc b/engine/cli/commands/engine_install_cmd.cc index 1f712d10c..477e38ee2 100644 --- a/engine/cli/commands/engine_install_cmd.cc +++ b/engine/cli/commands/engine_install_cmd.cc @@ -45,28 +45,28 @@ bool EngineInstallCmd::Exec(const std::string& engine, } }); - auto versions_url = url_parser::Url{ + auto releases_url = url_parser::Url{ .protocol = "http", .host = host_ + ":" + std::to_string(port_), - .pathParams = {"v1", "engines", engine, "versions"}, + .pathParams = {"v1", "engines", engine, "releases"}, }; - auto versions_result = curl_utils::SimpleGetJson(versions_url.ToFullPath()); - if (versions_result.has_error()) { - CTL_ERR(versions_result.error()); + auto releases_result = curl_utils::SimpleGetJson(releases_url.ToFullPath()); + if (releases_result.has_error()) { + CTL_ERR(releases_result.error()); return false; } std::vector version_selections; - for (const auto& release_version : versions_result.value()) { + for (const auto& release_version : releases_result.value()) { version_selections.push_back(release_version["name"].asString()); } - auto selected_version = + auto selected_release = cli_selection_utils::PrintSelection(version_selections); - if (selected_version == std::nullopt) { + if (selected_release == std::nullopt) { CTL_ERR("Invalid version selection"); return false; } - std::cout << "Selected version: " << selected_version.value() << std::endl; + std::cout << "Selected version: " << selected_release.value() << std::endl; auto variant_url = url_parser::Url{ .protocol = "http", @@ -76,8 +76,8 @@ bool EngineInstallCmd::Exec(const std::string& engine, "v1", "engines", engine, - "versions", - selected_version.value(), + "releases", + selected_release.value(), }, }; auto variant_result = curl_utils::SimpleGetJson(variant_url.ToFullPath()); @@ -113,23 +113,25 @@ bool EngineInstallCmd::Exec(const std::string& engine, return false; } std::cout << "Selected " << selected_variant.value() << " - " - << selected_version.value() << std::endl; - - auto install_url = - url_parser::Url{.protocol = "http", - .host = host_ + ":" + std::to_string(port_), - .pathParams = - { - "v1", - "engines", - engine, - }, - .queries = { - {"version", selected_version.value()}, - {"variant", selected_variant.value()}, - }}; - - auto response = curl_utils::SimplePostJson(install_url.ToFullPath()); + << selected_release.value() << std::endl; + + auto install_url = url_parser::Url{ + .protocol = "http", + .host = host_ + ":" + std::to_string(port_), + .pathParams = + { + "v1", + "engines", + engine, + "install", + }, + }; + Json::Value body; + body["version"] = selected_release.value(); + body["variant"] = selected_variant.value(); + + auto response = curl_utils::SimplePostJson(install_url.ToFullPath(), + body.toStyledString()); if (response.has_error()) { CTL_ERR(response.error()); return false; @@ -163,14 +165,17 @@ bool EngineInstallCmd::Exec(const std::string& engine, "v1", "engines", engine, + "install", }, }; + Json::Value body; if (!version.empty()) { - install_url.queries = {{"version", version}}; + body["version"] = version; } - auto response = curl_utils::SimplePostJson(install_url.ToFullPath()); + auto response = curl_utils::SimplePostJson(install_url.ToFullPath(), + body.toStyledString()); if (response.has_error()) { // TODO: namh refactor later Json::Value root; @@ -183,8 +188,6 @@ bool EngineInstallCmd::Exec(const std::string& engine, return false; } - CLI_LOG("Validating download items, please wait..") - if (!dp_res.get()) return false; diff --git a/engine/cli/commands/engine_uninstall_cmd.cc b/engine/cli/commands/engine_uninstall_cmd.cc index 1ef5580a8..ef9c95af8 100644 --- a/engine/cli/commands/engine_uninstall_cmd.cc +++ b/engine/cli/commands/engine_uninstall_cmd.cc @@ -17,9 +17,10 @@ void EngineUninstallCmd::Exec(const std::string& host, int port, } } - auto url = url_parser::Url{.protocol = "http", - .host = host + ":" + std::to_string(port), - .pathParams = {"v1", "engines", engine}}; + auto url = + url_parser::Url{.protocol = "http", + .host = host + ":" + std::to_string(port), + .pathParams = {"v1", "engines", engine, "install"}}; auto result = curl_utils::SimpleDeleteJson(url.ToFullPath()); if (result.has_error()) { diff --git a/engine/cli/commands/engine_use_cmd.cc b/engine/cli/commands/engine_use_cmd.cc index d03f9ddc0..50735739d 100644 --- a/engine/cli/commands/engine_use_cmd.cc +++ b/engine/cli/commands/engine_use_cmd.cc @@ -55,22 +55,29 @@ cpp::result EngineUseCmd::Exec(const std::string& host, return cpp::fail("Invalid variant selection"); } - auto selected_version = cli_selection_utils::PrintSelection( - variant_map[selected_variant.value()]); - if (!selected_variant.has_value()) { + std::optional selected_version = std::nullopt; + if (variant_map[selected_variant.value()].size() == 1) { + selected_version = variant_map[selected_variant.value()][0]; + } else { + selected_version = cli_selection_utils::PrintSelection( + variant_map[selected_variant.value()]); + } + if (!selected_version.has_value()) { CTL_ERR("Invalid version selection"); return cpp::fail("Invalid version selection"); } - auto set_default_engine_variant = - url_parser::Url{.protocol = "http", - .host = host + ":" + std::to_string(port), - .pathParams = {"v1", "engines", engine, "default"}, - .queries = {{"version", selected_version.value()}, - {"variant", selected_variant.value()}}}; + Json::Value body; + body["variant"] = selected_variant.value(); + body["version"] = selected_version.value(); + auto set_default_engine_variant = url_parser::Url{ + .protocol = "http", + .host = host + ":" + std::to_string(port), + .pathParams = {"v1", "engines", engine, "default"}, + }; - auto response = - curl_utils::SimplePostJson(set_default_engine_variant.ToFullPath()); + auto response = curl_utils::SimplePostJson( + set_default_engine_variant.ToFullPath(), body.toStyledString()); if (response.has_error()) { CTL_ERR(response.error()); return cpp::fail("Failed to set default engine variant"); diff --git a/engine/cli/commands/model_import_cmd.cc b/engine/cli/commands/model_import_cmd.cc index 6abaad2c8..f8cf6a810 100644 --- a/engine/cli/commands/model_import_cmd.cc +++ b/engine/cli/commands/model_import_cmd.cc @@ -1,13 +1,7 @@ #include "model_import_cmd.h" -#include -#include -#include "config/gguf_parser.h" -#include "config/yaml_config.h" -#include "database/models.h" +#include #include "httplib.h" -#include "json/json.h" #include "server_start_cmd.h" -#include "utils/file_manager_utils.h" #include "utils/logging_utils.h" namespace commands { diff --git a/engine/cli/commands/model_pull_cmd.cc b/engine/cli/commands/model_pull_cmd.cc index d769b667a..376943fd1 100644 --- a/engine/cli/commands/model_pull_cmd.cc +++ b/engine/cli/commands/model_pull_cmd.cc @@ -96,8 +96,6 @@ std::optional ModelPullCmd::Exec(const std::string& host, int port, CTL_INF("model: " << model << ", model_id: " << model_id); - // Send request download model to server - CLI_LOG("Validating download items, please wait..") Json::Value json_data; json_data["model"] = model; auto data_str = json_data.toStyledString(); diff --git a/engine/cli/commands/server_start_cmd.cc b/engine/cli/commands/server_start_cmd.cc index e039e5329..5ba972463 100644 --- a/engine/cli/commands/server_start_cmd.cc +++ b/engine/cli/commands/server_start_cmd.cc @@ -2,6 +2,7 @@ #include "commands/cortex_upd_cmd.h" #include "utils/cortex_utils.h" #include "utils/file_manager_utils.h" +#include "utils/widechar_conv.h" namespace commands { @@ -57,24 +58,32 @@ bool ServerStartCmd::Exec(const std::string& host, int port, ZeroMemory(&si, sizeof(si)); si.cb = sizeof(si); ZeroMemory(&pi, sizeof(pi)); - std::string params = "--start-server"; - params += " --config_file_path " + get_config_file_path(); - params += " --data_folder_path " + get_data_folder_path(); - params += " --loglevel " + log_level_; - std::string cmds = cortex_utils::GetCurrentPath() + "/" + exe + " " + params; + std::wstring params = L"--start-server"; + params += L" --config_file_path " + + file_manager_utils::GetConfigurationPath().wstring(); + params += L" --data_folder_path " + + file_manager_utils::GetCortexDataPath().wstring(); + params += L" --loglevel " + cortex::wc::Utf8ToWstring(log_level_); + std::wstring exe_w = cortex::wc::Utf8ToWstring(exe); + std::wstring current_path_w = + file_manager_utils::GetExecutableFolderContainerPath().wstring(); + std::wstring wcmds = current_path_w + L"/" + exe_w + L" " + params; + CTL_DBG("wcmds: " << wcmds); + std::vector mutable_cmds(wcmds.begin(), wcmds.end()); + mutable_cmds.push_back(L'\0'); // Create child process if (!CreateProcess( NULL, // No module name (use command line) - const_cast( - cmds.c_str()), // Command line (replace with your actual executable) - NULL, // Process handle not inheritable - NULL, // Thread handle not inheritable - FALSE, // Set handle inheritance to FALSE - 0, // No creation flags - NULL, // Use parent's environment block - NULL, // Use parent's starting directory - &si, // Pointer to STARTUPINFO structure - &pi)) // Pointer to PROCESS_INFORMATION structure + mutable_cmds + .data(), // Command line (replace with your actual executable) + NULL, // Process handle not inheritable + NULL, // Thread handle not inheritable + FALSE, // Set handle inheritance + 0, // No creation flags + NULL, // Use parent's environment block + NULL, // Use parent's starting directory + &si, // Pointer to STARTUPINFO structure + &pi)) // Pointer to PROCESS_INFORMATION structure { std::cout << "Could not start server: " << GetLastError() << std::endl; return false; @@ -115,7 +124,8 @@ bool ServerStartCmd::Exec(const std::string& host, int port, std::string p = cortex_utils::GetCurrentPath() + "/" + exe; execl(p.c_str(), exe.c_str(), "--start-server", "--config_file_path", get_config_file_path().c_str(), "--data_folder_path", - get_data_folder_path().c_str(), "--loglevel", log_level_.c_str(), (char*)0); + get_data_folder_path().c_str(), "--loglevel", log_level_.c_str(), + (char*)0); } else { // Parent process if (!TryConnectToServer(host, port)) { diff --git a/engine/cli/main.cc b/engine/cli/main.cc index 49cdf4be9..a03c5adf0 100644 --- a/engine/cli/main.cc +++ b/engine/cli/main.cc @@ -25,6 +25,9 @@ #error "Unsupported platform!" #endif +#include +#include + void RemoveBinaryTempFileIfExists() { auto temp = file_manager_utils::GetExecutableFolderContainerPath() / "cortex_temp"; @@ -40,11 +43,20 @@ void RemoveBinaryTempFileIfExists() { void SetupLogger(trantor::FileLogger& async_logger, bool verbose) { if (!verbose) { auto config = file_manager_utils::GetCortexConfig(); + std::filesystem::create_directories( +#if defined(_WIN32) + std::filesystem::u8path(config.logFolderPath) / +#else std::filesystem::path(config.logFolderPath) / +#endif std::filesystem::path(cortex_utils::logs_folder)); - async_logger.setFileName(config.logFolderPath + "/" + - cortex_utils::logs_cli_base_name); + + // Do not need to use u8path here because trantor handles itself + async_logger.setFileName( + (std::filesystem::path(config.logFolderPath) / + std::filesystem::path(cortex_utils::logs_cli_base_name)) + .string()); async_logger.setMaxLines(config.maxLogLines); // Keep last 100000 lines async_logger.startLogging(); trantor::Logger::setOutputFunction( @@ -192,8 +204,7 @@ int main(int argc, char* argv[]) { // Check if server exists, if not notify to user to install server auto exe = commands::GetCortexServerBinary(); auto server_binary_path = - std::filesystem::path(cortex_utils::GetCurrentPath()) / - std::filesystem::path(exe); + file_manager_utils::GetExecutableFolderContainerPath() / exe; if (!std::filesystem::exists(server_binary_path)) { std::cout << CORTEX_CPP_VERSION << " requires server binary, to install server, run: " diff --git a/engine/cli/utils/download_progress.cc b/engine/cli/utils/download_progress.cc index e085a660e..37920e516 100644 --- a/engine/cli/utils/download_progress.cc +++ b/engine/cli/utils/download_progress.cc @@ -8,6 +8,10 @@ #include "utils/format_utils.h" #include "utils/json_helper.h" #include "utils/logging_utils.h" +#if !defined(WIN32) && !defined(WIN64) +#include +#include +#endif namespace { std::string Repo2Engine(const std::string& r) { @@ -20,6 +24,20 @@ std::string Repo2Engine(const std::string& r) { } return r; }; + +int GetColumns() { +#if defined(WIN32) || defined(WIN64) + CONSOLE_SCREEN_BUFFER_INFO csbi; + int columns; + GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi); + columns = csbi.srWindow.Right - csbi.srWindow.Left + 1; + return columns; +#else + struct winsize w; + ioctl(STDOUT_FILENO, TIOCGWINSZ, &w); + return w.ws_col; +#endif +} } // namespace bool DownloadProgress::Connect(const std::string& host, int port) { if (ws_) { @@ -97,7 +115,7 @@ bool DownloadProgress::Handle( items[i.id] = std::pair( idx, std::make_unique( - indicators::option::BarWidth{50}, + indicators::option::BarWidth{GetColumns() / 6}, indicators::option::Start{"["}, indicators::option::Fill{"="}, indicators::option::Lead{">"}, indicators::option::End{"]"}, indicators::option::PrefixText{pad_string(Repo2Engine(i.id))}, diff --git a/engine/common/download_task.h b/engine/common/download_task.h index 94fb11a48..95e736394 100644 --- a/engine/common/download_task.h +++ b/engine/common/download_task.h @@ -2,9 +2,9 @@ #include #include +#include #include #include -#include enum class DownloadType { Model, Engine, Miscellaneous, CudaToolkit, Cortex }; @@ -161,4 +161,4 @@ inline DownloadTask GetDownloadTaskFromJson(const Json::Value item_json) { } return task; } -} // namespace common \ No newline at end of file +} // namespace common diff --git a/engine/config/model_config.h b/engine/config/model_config.h index 044fd8dd3..7d4076ee5 100644 --- a/engine/config/model_config.h +++ b/engine/config/model_config.h @@ -1,13 +1,12 @@ #pragma once #include -#include -#include #include #include #include #include #include "utils/format_utils.h" + namespace config { struct ModelConfig { std::string name; @@ -173,6 +172,15 @@ struct ModelConfig { tp = json["tp"].asInt(); } } + + std::string ToJsonString() const { + auto obj = ToJson(); + obj["id"] = obj["model"].asString(); + Json::StreamWriterBuilder wbuilder; + wbuilder.settings_["precision"] = 2; + return Json::writeString(wbuilder, obj); + } + Json::Value ToJson() const { Json::Value obj; diff --git a/engine/config/yaml_config.cc b/engine/config/yaml_config.cc index 0b3c23d8b..bbe7f430c 100644 --- a/engine/config/yaml_config.cc +++ b/engine/config/yaml_config.cc @@ -18,6 +18,7 @@ void YamlHandler::Reset() { void YamlHandler::ReadYamlFile(const std::string& file_path) { namespace fs = std::filesystem; namespace fmu = file_manager_utils; + try { yaml_node_ = YAML::LoadFile(file_path); // incase of model.yml file, we don't have files yet, create them @@ -41,7 +42,6 @@ void YamlHandler::ReadYamlFile(const std::string& file_path) { yaml_node_["files"] = v; } } catch (const YAML::BadFile& e) { - std::cerr << "Failed to read file: " << e.what() << std::endl; throw; } } diff --git a/engine/config/yaml_config.h b/engine/config/yaml_config.h index 87b9083a1..fe3bf4d02 100644 --- a/engine/config/yaml_config.h +++ b/engine/config/yaml_config.h @@ -4,7 +4,6 @@ #include #include "model_config.h" -#include "yaml-cpp/yaml.h" namespace config { class YamlHandler { diff --git a/engine/controllers/configs.cc b/engine/controllers/configs.cc index 41b08cf45..c2cf7cc2c 100644 --- a/engine/controllers/configs.cc +++ b/engine/controllers/configs.cc @@ -1,4 +1,5 @@ #include "configs.h" +#include "utils/cortex_utils.h" void Configs::GetConfigurations( const HttpRequestPtr& req, @@ -7,13 +8,13 @@ void Configs::GetConfigurations( if (get_config_result.has_error()) { Json::Value error_json; error_json["message"] = get_config_result.error(); - auto resp = drogon::HttpResponse::newHttpJsonResponse(error_json); + auto resp = cortex_utils::CreateCortexHttpJsonResponse(error_json); resp->setStatusCode(drogon::k400BadRequest); callback(resp); return; } - auto resp = drogon::HttpResponse::newHttpJsonResponse( + auto resp = cortex_utils::CreateCortexHttpJsonResponse( get_config_result.value().ToJson()); resp->setStatusCode(drogon::k200OK); callback(resp); @@ -27,7 +28,7 @@ void Configs::UpdateConfigurations( if (json_body == nullptr) { Json::Value error_json; error_json["message"] = "Configuration must be provided via JSON body"; - auto resp = drogon::HttpResponse::newHttpJsonResponse(error_json); + auto resp = cortex_utils::CreateCortexHttpJsonResponse(error_json); resp->setStatusCode(drogon::k400BadRequest); callback(resp); return; @@ -37,7 +38,7 @@ void Configs::UpdateConfigurations( if (update_config_result.has_error()) { Json::Value error_json; error_json["message"] = update_config_result.error(); - auto resp = drogon::HttpResponse::newHttpJsonResponse(error_json); + auto resp = cortex_utils::CreateCortexHttpJsonResponse(error_json); resp->setStatusCode(drogon::k400BadRequest); callback(resp); return; @@ -46,7 +47,7 @@ void Configs::UpdateConfigurations( Json::Value root; root["message"] = "Configuration updated successfully"; root["config"] = update_config_result.value().ToJson(); - auto resp = drogon::HttpResponse::newHttpJsonResponse(root); + auto resp = cortex_utils::CreateCortexHttpJsonResponse(root); resp->setStatusCode(drogon::k200OK); callback(resp); return; diff --git a/engine/controllers/engines.cc b/engine/controllers/engines.cc index a75bd1f9b..9e110bd66 100644 --- a/engine/controllers/engines.cc +++ b/engine/controllers/engines.cc @@ -47,11 +47,24 @@ void Engines::ListEngine( void Engines::UninstallEngine( const HttpRequestPtr& req, std::function&& callback, - const std::string& engine, const std::optional version, - const std::optional variant) { + const std::string& engine) { + std::optional norm_variant = std::nullopt; + std::optional norm_version = std::nullopt; + if (req->getJsonObject() != nullptr) { + auto variant = (*(req->getJsonObject())).get("variant", "").asString(); + auto version = + (*(req->getJsonObject())).get("version", "latest").asString(); - auto result = - engine_service_->UninstallEngineVariant(engine, version, variant); + if (!variant.empty()) { + norm_variant = variant; + } + if (!version.empty()) { + norm_version = version; + } + } + + auto result = engine_service_->UninstallEngineVariant(engine, norm_version, + norm_variant); Json::Value ret; if (result.has_error()) { @@ -69,7 +82,7 @@ void Engines::UninstallEngine( } } -void Engines::GetEngineVersions( +void Engines::GetEngineReleases( const HttpRequestPtr& req, std::function&& callback, const std::string& engine) const { @@ -134,12 +147,23 @@ void Engines::GetEngineVariants( void Engines::InstallEngine( const HttpRequestPtr& req, std::function&& callback, - const std::string& engine, const std::optional version, - const std::optional variant_name) { - auto normalized_version = version.value_or("latest"); + const std::string& engine) { + std::optional norm_variant = std::nullopt; + std::string norm_version{"latest"}; + + if (req->getJsonObject() != nullptr) { + auto variant = (*(req->getJsonObject())).get("variant", "").asString(); + auto version = + (*(req->getJsonObject())).get("version", "latest").asString(); - auto result = engine_service_->InstallEngineAsyncV2( - engine, normalized_version, variant_name); + if (!variant.empty()) { + norm_variant = variant; + } + norm_version = version; + } + + auto result = + engine_service_->InstallEngineAsync(engine, norm_version, norm_variant); if (result.has_error()) { Json::Value res; res["message"] = result.error(); @@ -218,8 +242,36 @@ void Engines::GetLatestEngineVersion( void Engines::SetDefaultEngineVariant( const HttpRequestPtr& req, std::function&& callback, - const std::string& engine, const std::string& version, - const std::string& variant) { + const std::string& engine) { + auto json_obj = req->getJsonObject(); + if (json_obj == nullptr) { + Json::Value res; + res["message"] = "Request body is required"; + auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); + resp->setStatusCode(k400BadRequest); + callback(resp); + return; + } + + auto variant = (*(req->getJsonObject())).get("variant", "").asString(); + if (variant.empty()) { + Json::Value ret; + ret["message"] = "Variant is required"; + auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret); + resp->setStatusCode(k400BadRequest); + callback(resp); + return; + } + auto version = (*(req->getJsonObject())).get("version", "").asString(); + if (version.empty()) { + Json::Value ret; + ret["message"] = "Version is required"; + auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret); + resp->setStatusCode(k400BadRequest); + callback(resp); + return; + } + auto result = engine_service_->SetDefaultEngineVariant(engine, version, variant); if (result.has_error()) { diff --git a/engine/controllers/engines.h b/engine/controllers/engines.h index dc9ef5a54..b0a92b6c3 100644 --- a/engine/controllers/engines.h +++ b/engine/controllers/engines.h @@ -12,57 +12,72 @@ class Engines : public drogon::HttpController { public: METHOD_LIST_BEGIN - METHOD_ADD(Engines::GetInstalledEngineVariants, "/{1}", Get); - METHOD_ADD(Engines::InstallEngine, "/{1}?version={2}&variant={3}", Options, - Post); - METHOD_ADD(Engines::UninstallEngine, "/{1}?version={2}&variant={3}", Options, - Delete); - METHOD_ADD(Engines::SetDefaultEngineVariant, - "/{1}/default?version={2}&variant={3}", Options, Post); - METHOD_ADD(Engines::GetDefaultEngineVariant, "/{1}/default", Get); + // install engine + METHOD_ADD(Engines::InstallEngine, "/{1}/install", Options, Post); + ADD_METHOD_TO(Engines::InstallEngine, "/v1/engines/{1}/install", Options, + Post); - METHOD_ADD(Engines::LoadEngine, "/{1}/load", Options, Post); - METHOD_ADD(Engines::UnloadEngine, "/{1}/load", Options, Delete); - METHOD_ADD(Engines::UpdateEngine, "/{1}/update", Options, Post); - METHOD_ADD(Engines::ListEngine, "", Get); + // uninstall engine + METHOD_ADD(Engines::UninstallEngine, "/{1}/install", Options, Delete); + ADD_METHOD_TO(Engines::UninstallEngine, "/v1/engines/{1}/install", Options, + Delete); - METHOD_ADD(Engines::GetEngineVersions, "/{1}/versions", Get); - METHOD_ADD(Engines::GetEngineVariants, "/{1}/versions/{2}", Get); - METHOD_ADD(Engines::GetLatestEngineVersion, "/{1}/latest", Get); + // set default engine + METHOD_ADD(Engines::SetDefaultEngineVariant, "/{1}/default", Options, Post); + ADD_METHOD_TO(Engines::SetDefaultEngineVariant, "/v1/engines/{1}/default", + Options, Post); - ADD_METHOD_TO(Engines::GetInstalledEngineVariants, "/v1/engines/{1}", Get); - ADD_METHOD_TO(Engines::InstallEngine, - "/v1/engines/{1}?version={2}&variant={3}", Options, Post); - ADD_METHOD_TO(Engines::UninstallEngine, - "/v1/engines/{1}?version={2}&variant={3}", Options, Delete); - ADD_METHOD_TO(Engines::SetDefaultEngineVariant, - "/v1/engines/{1}/default?version={2}&variant={3}", Options, - Post); + // get default engine + METHOD_ADD(Engines::GetDefaultEngineVariant, "/{1}/default", Get); ADD_METHOD_TO(Engines::GetDefaultEngineVariant, "/v1/engines/{1}/default", Get); + // update engine + METHOD_ADD(Engines::UpdateEngine, "/{1}/update", Options, Post); + ADD_METHOD_TO(Engines::UpdateEngine, "/v1/engines/{1}/update", Options, Post); + + // load engine + METHOD_ADD(Engines::LoadEngine, "/{1}/load", Options, Post); ADD_METHOD_TO(Engines::LoadEngine, "/v1/engines/{1}/load", Options, Post); + + // unload engine + METHOD_ADD(Engines::UnloadEngine, "/{1}/load", Options, Delete); ADD_METHOD_TO(Engines::UnloadEngine, "/v1/engines/{1}/load", Options, Delete); - ADD_METHOD_TO(Engines::UpdateEngine, "/v1/engines/{1}/update", Options, Post); - ADD_METHOD_TO(Engines::GetEngineVersions, "/v1/engines/{1}/versions", Get); - ADD_METHOD_TO(Engines::GetEngineVariants, "/v1/engines/{1}/versions/{2}", - Get); + + METHOD_ADD(Engines::GetInstalledEngineVariants, "/{1}", Get); + ADD_METHOD_TO(Engines::GetInstalledEngineVariants, "/v1/engines/{1}", Get); + + METHOD_ADD(Engines::ListEngine, "", Get); ADD_METHOD_TO(Engines::ListEngine, "/v1/engines", Get); + + METHOD_ADD(Engines::GetEngineReleases, "/{1}/releases", Get); + ADD_METHOD_TO(Engines::GetEngineReleases, "/v1/engines/{1}/releases", Get); + + METHOD_ADD(Engines::GetEngineVariants, "/{1}/releases/{2}", Get); + ADD_METHOD_TO(Engines::GetEngineVariants, "/v1/engines/{1}/releases/{2}", + Get); + + METHOD_ADD(Engines::GetLatestEngineVersion, "/{1}/releases/latest", Get); + ADD_METHOD_TO(Engines::GetLatestEngineVersion, + "/v1/engines/{1}/releases/latest", Get); + METHOD_LIST_END explicit Engines(std::shared_ptr engine_service) : engine_service_{engine_service} {} - void ListEngine(const HttpRequestPtr& req, - std::function&& callback) const; + void InstallEngine(const HttpRequestPtr& req, + std::function&& callback, + const std::string& engine); void UninstallEngine(const HttpRequestPtr& req, std::function&& callback, - const std::string& engine, - const std::optional version, - const std::optional variant); + const std::string& engine); + + void ListEngine(const HttpRequestPtr& req, + std::function&& callback) const; - void GetEngineVersions(const HttpRequestPtr& req, + void GetEngineReleases(const HttpRequestPtr& req, std::function&& callback, const std::string& engine) const; @@ -71,12 +86,6 @@ class Engines : public drogon::HttpController { const std::string& engine, const std::string& version) const; - void InstallEngine(const HttpRequestPtr& req, - std::function&& callback, - const std::string& engine, - const std::optional version, - const std::optional variant_name); - void GetInstalledEngineVariants( const HttpRequestPtr& req, std::function&& callback, @@ -94,8 +103,7 @@ class Engines : public drogon::HttpController { void SetDefaultEngineVariant( const HttpRequestPtr& req, std::function&& callback, - const std::string& engine, const std::string& version, - const std::string& variant); + const std::string& engine); void GetDefaultEngineVariant( const HttpRequestPtr& req, diff --git a/engine/controllers/models.cc b/engine/controllers/models.cc index 9e4ba1e9f..c51bb3b77 100644 --- a/engine/controllers/models.cc +++ b/engine/controllers/models.cc @@ -153,7 +153,7 @@ void Models::ListModel( Json::Value ret; ret["object"] = "list"; Json::Value data(Json::arrayValue); - + model_service_->ForceIndexingModelList(); // Iterate through directory cortex::db::Models modellist_handler; @@ -224,13 +224,9 @@ void Models::GetModel(const HttpRequestPtr& req, .string()); auto model_config = yaml_handler.GetModelConfig(); - ret = model_config.ToJson(); - - ret["id"] = model_config.model; - ret["object"] = "model"; - ret["result"] = "OK"; - auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret); - resp->setStatusCode(k200OK); + auto ret = model_config.ToJsonString(); + auto resp = cortex_utils::CreateCortexHttpTextAsJsonResponse(ret); + resp->setStatusCode(drogon::k200OK); callback(resp); } catch (const std::exception& e) { std::string message = diff --git a/engine/controllers/swagger.cc b/engine/controllers/swagger.cc index d605daa0b..96a6c3837 100644 --- a/engine/controllers/swagger.cc +++ b/engine/controllers/swagger.cc @@ -1,5 +1,6 @@ #include "swagger.h" #include "cortex_openapi.h" +#include "utils/cortex_utils.h" constexpr auto ScalarUi = R"( @@ -31,7 +32,7 @@ Json::Value SwaggerController::generateOpenAPISpec() { void SwaggerController::serveSwaggerUI( const drogon::HttpRequestPtr& req, std::function&& callback) const { - auto resp = drogon::HttpResponse::newHttpResponse(); + auto resp = cortex_utils::CreateCortexHttpResponse(); resp->setBody(ScalarUi); resp->setContentTypeCode(drogon::CT_TEXT_HTML); callback(resp); @@ -41,6 +42,6 @@ void SwaggerController::serveOpenAPISpec( const drogon::HttpRequestPtr& req, std::function&& callback) const { Json::Value spec = generateOpenAPISpec(); - auto resp = drogon::HttpResponse::newHttpJsonResponse(spec); + auto resp = cortex_utils::CreateCortexHttpJsonResponse(spec); callback(resp); } diff --git a/engine/database/database.h b/engine/database/database.h index 27c75e923..dbe58cc4b 100644 --- a/engine/database/database.h +++ b/engine/database/database.h @@ -20,7 +20,7 @@ class Database { private: Database() - : db_(file_manager_utils::GetCortexDataPath().string() + "/cortex.db", + : db_(file_manager_utils::GetCortexDataPath() / "cortex.db", SQLite::OPEN_READWRITE | SQLite::OPEN_CREATE) {} SQLite::Database db_; }; diff --git a/engine/database/hardwares.cc b/engine/database/hardware.cc similarity index 80% rename from engine/database/hardwares.cc rename to engine/database/hardware.cc index c23aec0b7..ee68749d5 100644 --- a/engine/database/hardwares.cc +++ b/engine/database/hardware.cc @@ -1,27 +1,13 @@ -#include "hardwares.h" +#include "hardware.h" #include "database.h" #include "utils/scope_exit.h" namespace cortex::db { Hardwares::Hardwares() : db_(cortex::db::Database::GetInstance().db()) { - db_.exec( - "CREATE TABLE IF NOT EXISTS hardwares (" - "uuid TEXT PRIMARY KEY," - "type TEXT," - "hardware_id INTEGER," - "software_id INTEGER," - "activated INTEGER);"); } Hardwares::Hardwares(SQLite::Database& db) : db_(db) { - db_.exec( - "CREATE TABLE IF NOT EXISTS hardwares (" - "uuid TEXT PRIMARY KEY," - "type TEXT," - "hardware_id INTEGER," - "software_id INTEGER," - "activated INTEGER);"); } Hardwares::~Hardwares() {} @@ -35,7 +21,7 @@ Hardwares::LoadHardwareList() const { SQLite::Statement query( db_, "SELECT uuid, type, " - "hardware_id, software_id, activated FROM hardwares"); + "hardware_id, software_id, activated FROM hardware"); while (query.executeStep()) { HardwareEntry entry; @@ -57,7 +43,7 @@ cpp::result Hardwares::AddHardwareEntry( try { SQLite::Statement insert( db_, - "INSERT INTO hardwares (uuid, type, " + "INSERT INTO hardware (uuid, type, " "hardware_id, software_id, activated) VALUES (?, ?, " "?, ?, ?)"); insert.bind(1, new_entry.uuid); @@ -77,7 +63,7 @@ cpp::result Hardwares::UpdateHardwareEntry( const std::string& id, const HardwareEntry& updated_entry) { try { SQLite::Statement upd(db_, - "UPDATE hardwares " + "UPDATE hardware " "SET hardware_id = ?, software_id = ?, activated = ? " "WHERE uuid = ?"); upd.bind(1, updated_entry.hardware_id); @@ -97,7 +83,7 @@ cpp::result Hardwares::UpdateHardwareEntry( cpp::result Hardwares::DeleteHardwareEntry( const std::string& id) { try { - SQLite::Statement del(db_, "DELETE from hardwares WHERE uuid = ?"); + SQLite::Statement del(db_, "DELETE from hardware WHERE uuid = ?"); del.bind(1, id); if (del.exec() == 1) { CTL_INF("Deleted: " << id); diff --git a/engine/database/hardwares.h b/engine/database/hardware.h similarity index 100% rename from engine/database/hardwares.h rename to engine/database/hardware.h diff --git a/engine/database/models.cc b/engine/database/models.cc index d0bee405c..3e81fbab2 100644 --- a/engine/database/models.cc +++ b/engine/database/models.cc @@ -1,6 +1,5 @@ #include "models.h" #include -#include #include #include "database.h" #include "utils/result.hpp" @@ -8,25 +7,9 @@ namespace cortex::db { -Models::Models() : db_(cortex::db::Database::GetInstance().db()) { - db_.exec( - "CREATE TABLE IF NOT EXISTS models (" - "model_id TEXT PRIMARY KEY," - "author_repo_id TEXT," - "branch_name TEXT," - "path_to_model_yaml TEXT," - "model_alias TEXT);"); -} +Models::Models() : db_(cortex::db::Database::GetInstance().db()) {} -Models::Models(SQLite::Database& db) : db_(db) { - db_.exec( - "CREATE TABLE IF NOT EXISTS models (" - "model_id TEXT PRIMARY KEY," - "author_repo_id TEXT," - "branch_name TEXT," - "path_to_model_yaml TEXT," - "model_alias TEXT UNIQUE);"); -} +Models::Models(SQLite::Database& db) : db_(db) {} Models::~Models() {} @@ -262,6 +245,11 @@ cpp::result Models::UpdateModelAlias( cpp::result Models::DeleteModelEntry( const std::string& identifier) { try { + // delete only if its there + if (!HasModel(identifier)) { + return true; + } + SQLite::Statement del( db_, "DELETE from models WHERE model_id = ? OR model_alias = ?"); del.bind(1, identifier); diff --git a/engine/e2e-test/cortex-llamacpp-e2e-nightly.py b/engine/e2e-test/cortex-llamacpp-e2e-nightly.py new file mode 100644 index 000000000..9be34519a --- /dev/null +++ b/engine/e2e-test/cortex-llamacpp-e2e-nightly.py @@ -0,0 +1,33 @@ +import pytest +import sys + +### e2e tests are expensive, have to keep engines tests in order +from test_api_engine_list import TestApiEngineList +from test_api_engine_install_nightly import TestApiEngineInstall +from test_api_engine_get import TestApiEngineGet + +### models, keeps in order, note that we only uninstall engine after finishing all models test +from test_api_model_pull_direct_url import TestApiModelPullDirectUrl +from test_api_model_start import TestApiModelStart +from test_api_model_stop import TestApiModelStop +from test_api_model_get import TestApiModelGet +from test_api_model_list import TestApiModelList +from test_api_model_update import TestApiModelUpdate +from test_api_model_delete import TestApiModelDelete +from test_api_model_import import TestApiModelImport +from test_api_engine_uninstall import TestApiEngineUninstall + +### +from test_cli_engine_get import TestCliEngineGet +from test_cli_engine_install_nightly import TestCliEngineInstall +from test_cli_engine_list import TestCliEngineList +from test_cli_model_delete import TestCliModelDelete +from test_cli_model_pull_direct_url import TestCliModelPullDirectUrl +from test_cli_server_start import TestCliServerStart +from test_cortex_update import TestCortexUpdate +from test_create_log_folder import TestCreateLogFolder +from test_cli_model_import import TestCliModelImport +from test_cli_engine_uninstall import TestCliEngineUninstall + +if __name__ == "__main__": + sys.exit(pytest.main([__file__, "-v"])) diff --git a/engine/e2e-test/test_api_engine_install.py b/engine/e2e-test/test_api_engine_install.py index b0fbb6c9c..aabe0138d 100644 --- a/engine/e2e-test/test_api_engine_install.py +++ b/engine/e2e-test/test_api_engine_install.py @@ -18,17 +18,19 @@ def setup_and_teardown(self): stop_server() def test_engines_install_llamacpp_should_be_successful(self): - response = requests.post("http://localhost:3928/v1/engines/llama-cpp") + response = requests.post("http://localhost:3928/v1/engines/llama-cpp/install") assert response.status_code == 200 def test_engines_install_llamacpp_specific_version_and_variant(self): + data = {"version": "v0.1.35-27.10.24", "variant": "linux-amd64-avx-cuda-11-7"} response = requests.post( - "http://localhost:3928/v1/engines/llama-cpp?version=v0.1.35-27.10.24&variant=linux-amd64-avx-cuda-11-7" + "http://localhost:3928/v1/engines/llama-cpp/install", json=data ) assert response.status_code == 200 def test_engines_install_llamacpp_specific_version_and_null_variant(self): + data = {"version": "v0.1.35-27.10.24"} response = requests.post( - "http://localhost:3928/v1/engines/llama-cpp?version=v0.1.35-27.10.24" + "http://localhost:3928/v1/engines/llama-cpp/install", json=data ) assert response.status_code == 200 diff --git a/engine/e2e-test/test_api_engine_install_nightly.py b/engine/e2e-test/test_api_engine_install_nightly.py new file mode 100644 index 000000000..de4914c28 --- /dev/null +++ b/engine/e2e-test/test_api_engine_install_nightly.py @@ -0,0 +1,37 @@ +import pytest +import requests +from test_runner import start_server, stop_server, get_latest_pre_release_tag + +latest_pre_release_tag = get_latest_pre_release_tag("janhq", "cortex.llamacpp") + +class TestApiEngineInstall: + + @pytest.fixture(autouse=True) + def setup_and_teardown(self): + # Setup + success = start_server() + if not success: + raise Exception("Failed to start server") + + yield + + # Teardown + stop_server() + + def test_engines_install_llamacpp_should_be_successful(self): + response = requests.post("http://localhost:3928/v1/engines/llama-cpp/install") + assert response.status_code == 200 + + def test_engines_install_llamacpp_specific_version_and_variant(self): + data = {"version": latest_pre_release_tag, "variant": "linux-amd64-avx-cuda-11-7"} + response = requests.post( + "http://localhost:3928/v1/engines/llama-cpp/install", json=data + ) + assert response.status_code == 200 + + def test_engines_install_llamacpp_specific_version_and_null_variant(self): + data = {"version": latest_pre_release_tag} + response = requests.post( + "http://localhost:3928/v1/engines/llama-cpp/install", json=data + ) + assert response.status_code == 200 diff --git a/engine/e2e-test/test_api_engine_list.py b/engine/e2e-test/test_api_engine_list.py index 974fbbf8e..71b9ea8b4 100644 --- a/engine/e2e-test/test_api_engine_list.py +++ b/engine/e2e-test/test_api_engine_list.py @@ -8,6 +8,9 @@ class TestApiEngineList: @pytest.fixture(autouse=True) def setup_and_teardown(self): # Setup + # Not sure why but on macOS amd, the first start server timeouts with CI + start_server() + stop_server() success = start_server() if not success: raise Exception("Failed to start server") diff --git a/engine/e2e-test/test_api_engine_uninstall.py b/engine/e2e-test/test_api_engine_uninstall.py index 491bc2d27..2a491d07a 100644 --- a/engine/e2e-test/test_api_engine_uninstall.py +++ b/engine/e2e-test/test_api_engine_uninstall.py @@ -1,6 +1,12 @@ import pytest +import time import requests -from test_runner import start_server, stop_server +from test_runner import ( + run, + start_server, + stop_server, + wait_for_websocket_download_success_event, +) class TestApiEngineUninstall: @@ -16,49 +22,58 @@ def setup_and_teardown(self): # Teardown stop_server() - - def test_engines_uninstall_llamacpp_should_be_successful(self): - # install first - requests.post("http://localhost:3928/v1/engines/llama-cpp") - - response = requests.delete("http://localhost:3928/v1/engines/llama-cpp") + + @pytest.mark.asyncio + async def test_engines_uninstall_llamacpp_should_be_successful(self): + response = requests.post("http://localhost:3928/v1/engines/llama-cpp/install") + assert response.status_code == 200 + await wait_for_websocket_download_success_event(timeout=None) + time.sleep(30) + + response = requests.delete("http://localhost:3928/v1/engines/llama-cpp/install") assert response.status_code == 200 - def test_engines_uninstall_llamacpp_with_only_version_should_be_failed(self): + @pytest.mark.asyncio + async def test_engines_uninstall_llamacpp_with_only_version_should_be_failed(self): # install first + data = {"variant": "mac-arm64"} install_response = requests.post( - "http://localhost:3928/v1/engines/llama-cpp?version=v0.1.35" + "http://127.0.0.1:3928/v1/engines/llama-cpp/install", json=data ) + await wait_for_websocket_download_success_event(timeout=120) assert install_response.status_code == 200 + data = {"version": "v0.1.35"} response = requests.delete( - "http://localhost:3928/v1/engines/llama-cpp?version=v0.1.35" + "http://localhost:3928/v1/engines/llama-cpp/install", json=data ) assert response.status_code == 400 assert response.json()["message"] == "No variant provided" - def test_engines_uninstall_llamacpp_with_variant_should_be_successful(self): + @pytest.mark.asyncio + async def test_engines_uninstall_llamacpp_with_variant_should_be_successful(self): # install first + data = {"variant": "mac-arm64"} install_response = requests.post( - "http://localhost:3928/v1/engines/llama-cpp?variant=mac-arm64" + "http://127.0.0.1:3928/v1/engines/llama-cpp/install", json=data ) + await wait_for_websocket_download_success_event(timeout=120) assert install_response.status_code == 200 - response = requests.delete( - "http://localhost:3928/v1/engines/llama-cpp?variant=mac-arm64" - ) + response = requests.delete("http://127.0.0.1:3928/v1/engines/llama-cpp/install") assert response.status_code == 200 def test_engines_uninstall_llamacpp_with_specific_variant_and_version_should_be_successful( self, ): + data = {"variant": "mac-arm64", "version": "v0.1.35"} # install first install_response = requests.post( - "http://localhost:3928/v1/engines/llama-cpp?variant=mac-arm64&version=v0.1.35" + "http://localhost:3928/v1/engines/llama-cpp/install", json=data ) assert install_response.status_code == 200 response = requests.delete( - "http://localhost:3928/v1/engines/llama-cpp?variant=mac-arm64&version=v0.1.35" + "http://localhost:3928/v1/engines/llama-cpp/install", json=data ) assert response.status_code == 200 diff --git a/engine/e2e-test/test_api_model_start.py b/engine/e2e-test/test_api_model_start.py index d6a98a78b..b3e33d113 100644 --- a/engine/e2e-test/test_api_model_start.py +++ b/engine/e2e-test/test_api_model_start.py @@ -1,8 +1,10 @@ import pytest +import time import requests from test_runner import run, start_server, stop_server - - +from test_runner import ( + wait_for_websocket_download_success_event +) class TestApiModelStart: @pytest.fixture(autouse=True) @@ -12,20 +14,28 @@ def setup_and_teardown(self): success = start_server() if not success: raise Exception("Failed to start server") - run("Install engine", ["engines", "install", "llama-cpp"], 5 * 60) run("Delete model", ["models", "delete", "tinyllama:gguf"]) - run( - "Pull model", - ["pull", "tinyllama:gguf"], - timeout=None, - ) yield # Teardown stop_server() - - def test_models_start_should_be_successful(self): + + @pytest.mark.asyncio + async def test_models_start_should_be_successful(self): + response = requests.post("http://localhost:3928/v1/engines/llama-cpp/install") + assert response.status_code == 200 + await wait_for_websocket_download_success_event(timeout=None) + # TODO(sang) need to fix for cuda download + time.sleep(30) + + json_body = { + "model": "tinyllama:gguf" + } + response = requests.post("http://localhost:3928/v1/models/pull", json=json_body) + assert response.status_code == 200, f"Failed to pull model: tinyllama:gguf" + await wait_for_websocket_download_success_event(timeout=None) + json_body = {"model": "tinyllama:gguf"} response = requests.post( "http://localhost:3928/v1/models/start", json=json_body diff --git a/engine/e2e-test/test_api_model_stop.py b/engine/e2e-test/test_api_model_stop.py index dc3b6b77b..4fc7a55e2 100644 --- a/engine/e2e-test/test_api_model_stop.py +++ b/engine/e2e-test/test_api_model_stop.py @@ -1,7 +1,10 @@ import pytest +import time import requests from test_runner import run, start_server, stop_server - +from test_runner import ( + wait_for_websocket_download_success_event +) class TestApiModelStop: @@ -13,14 +16,19 @@ def setup_and_teardown(self): if not success: raise Exception("Failed to start server") - run("Install engine", ["engines", "install", "llama-cpp"], 5 * 60) yield run("Uninstall engine", ["engines", "uninstall", "llama-cpp"]) # Teardown stop_server() - def test_models_stop_should_be_successful(self): + @pytest.mark.asyncio + async def test_models_stop_should_be_successful(self): + response = requests.post("http://localhost:3928/v1/engines/llama-cpp/install") + assert response.status_code == 200 + await wait_for_websocket_download_success_event(timeout=None) + time.sleep(30) + json_body = {"model": "tinyllama:gguf"} response = requests.post( "http://localhost:3928/v1/models/start", json=json_body diff --git a/engine/e2e-test/test_cli_engine_install.py b/engine/e2e-test/test_cli_engine_install.py index 380334222..a998f3183 100644 --- a/engine/e2e-test/test_cli_engine_install.py +++ b/engine/e2e-test/test_cli_engine_install.py @@ -19,6 +19,7 @@ def setup_and_teardown(self): # Teardown stop_server() + @pytest.mark.skipif(platform.system() == "Windows", reason="Progress bar log issue on Windows") def test_engines_install_llamacpp_should_be_successfully(self): exit_code, output, error = run( "Install Engine", @@ -46,6 +47,7 @@ def test_engines_install_onnx_on_tensorrt_should_be_failed(self): assert "is not supported on" in output, "Should display error message" assert exit_code == 0, f"Install engine failed with error: {error}" + @pytest.mark.skipif(platform.system() == "Windows", reason="Progress bar log issue on Windows") def test_engines_install_pre_release_llamacpp(self): engine_version = "v0.1.29" exit_code, output, error = run( @@ -67,6 +69,7 @@ def test_engines_install_pre_release_llamacpp(self): assert is_engine_version_exist, f"Engine version {engine_version} is not found" assert exit_code == 0, f"Install engine failed with error: {error}" + @pytest.mark.skipif(platform.system() == "Windows", reason="Progress bar log issue on Windows") def test_engines_should_fallback_to_download_llamacpp_engine_if_not_exists(self): exit_code, output, error = run( "Install Engine", diff --git a/engine/e2e-test/test_cli_engine_install_nightly.py b/engine/e2e-test/test_cli_engine_install_nightly.py new file mode 100644 index 000000000..8c66c284c --- /dev/null +++ b/engine/e2e-test/test_cli_engine_install_nightly.py @@ -0,0 +1,89 @@ +import platform +import tempfile + +import pytest +import requests +from test_runner import run, start_server, stop_server, get_latest_pre_release_tag + +latest_pre_release_tag = get_latest_pre_release_tag("janhq", "cortex.llamacpp") + +class TestCliEngineInstall: + def setup_and_teardown(self): + # Setup + stop_server() + success = start_server() + if not success: + raise Exception("Failed to start server") + + yield + + # Teardown + stop_server() + + def test_engines_install_llamacpp_should_be_successfully(self): + exit_code, output, error = run( + "Install Engine", + ["engines", "install", "llama-cpp"], + timeout=None, + capture=False, + ) + response = requests.get("http://127.0.0.1:3928/v1/engines/llama-cpp") + assert len(response.json()) > 0 + assert exit_code == 0, f"Install engine failed with error: {error}" + + @pytest.mark.skipif(platform.system() != "Darwin", reason="macOS-specific test") + def test_engines_install_onnx_on_macos_should_be_failed(self): + exit_code, output, error = run( + "Install Engine", ["engines", "install", "onnxruntime"] + ) + assert "is not supported on" in output, "Should display error message" + assert exit_code == 0, f"Install engine failed with error: {error}" + + @pytest.mark.skipif(platform.system() != "Darwin", reason="macOS-specific test") + def test_engines_install_onnx_on_tensorrt_should_be_failed(self): + exit_code, output, error = run( + "Install Engine", ["engines", "install", "tensorrt-llm"] + ) + assert "is not supported on" in output, "Should display error message" + assert exit_code == 0, f"Install engine failed with error: {error}" + + def test_engines_should_fallback_to_download_llamacpp_engine_if_not_exists(self): + exit_code, output, error = run( + "Install Engine", + ["engines", "install", "llama-cpp", "-s", tempfile.gettempdir()], + timeout=None, + ) + # response = requests.get("http://127.0.0.1:3928/v1/engines/llama-cpp") + # assert len(response.json()) > 0 + assert "downloaded successfully" in output + assert exit_code == 0, f"Install engine failed with error: {error}" + + def test_engines_should_not_perform_with_dummy_path(self): + exit_code, output, error = run( + "Install Engine", + ["engines", "install", "llama-cpp", "-s", "abcpod"], + timeout=None, + ) + assert "Folder does not exist" in output, "Should display error" + assert exit_code == 0, f"Install engine failed with error: {error}" + + def test_engines_install_pre_release_llamacpp(self): + engine_version = latest_pre_release_tag + exit_code, output, error = run( + "Install Engine", + ["engines", "install", "llama-cpp", "-v", engine_version], + timeout=None, + capture=False, + ) + response = requests.get("http://127.0.0.1:3928/v1/engines/llama-cpp") + assert len(response.json()) > 0 + is_engine_version_exist = False + for item in response.json(): + # Check if 'version' key exists and matches target + if "version" in item and item["version"] == engine_version: + is_engine_version_exist = True + break + + # loop through all the installed response, expect we find + assert is_engine_version_exist, f"Engine version {engine_version} is not found" + assert exit_code == 0, f"Install engine failed with error: {error}" diff --git a/engine/e2e-test/test_cli_engine_uninstall.py b/engine/e2e-test/test_cli_engine_uninstall.py index 0ca151d48..fcc5f5c73 100644 --- a/engine/e2e-test/test_cli_engine_uninstall.py +++ b/engine/e2e-test/test_cli_engine_uninstall.py @@ -24,7 +24,7 @@ def setup_and_teardown(self): @pytest.mark.asyncio async def test_engines_uninstall_llamacpp_should_be_successfully(self): - requests.post("http://127.0.0.1:3928/v1/engines/llama-cpp") + requests.post("http://127.0.0.1:3928/v1/engines/llama-cpp/install") await wait_for_websocket_download_success_event(timeout=None) exit_code, output, error = run( "Uninstall engine", ["engines", "uninstall", "llama-cpp"] diff --git a/engine/e2e-test/test_cli_model_delete.py b/engine/e2e-test/test_cli_model_delete.py index f7ab53058..d0ba43ec1 100644 --- a/engine/e2e-test/test_cli_model_delete.py +++ b/engine/e2e-test/test_cli_model_delete.py @@ -1,6 +1,10 @@ import pytest +import requests from test_runner import popen, run from test_runner import start_server, stop_server +from test_runner import ( + wait_for_websocket_download_success_event +) class TestCliModelDelete: @@ -11,10 +15,6 @@ def setup_and_teardown(self): if not success: raise Exception("Failed to start server") - # Pull model - - run("Pull model", ["pull", "tinyllama:gguf"], timeout=None,) - yield # Teardown @@ -22,7 +22,15 @@ def setup_and_teardown(self): run("Delete model", ["models", "delete", "tinyllama:gguf"]) stop_server() - def test_models_delete_should_be_successful(self): + @pytest.mark.asyncio + async def test_models_delete_should_be_successful(self): + json_body = { + "model": "tinyllama:gguf" + } + response = requests.post("http://localhost:3928/v1/models/pull", json=json_body) + assert response.status_code == 200, f"Failed to pull model: tinyllama:gguf" + await wait_for_websocket_download_success_event(timeout=None) + exit_code, output, error = run( "Delete model", ["models", "delete", "tinyllama:gguf"] ) diff --git a/engine/e2e-test/test_runner.py b/engine/e2e-test/test_runner.py index e93e152e0..843e669b4 100644 --- a/engine/e2e-test/test_runner.py +++ b/engine/e2e-test/test_runner.py @@ -6,6 +6,7 @@ import subprocess import threading import time +import requests from typing import List import websockets @@ -187,3 +188,36 @@ async def receive_until_success(): except asyncio.TimeoutError: raise TimeoutError("Timeout waiting for DownloadSuccess event") + + +def get_latest_pre_release_tag(repo_owner, repo_name): + # URL for GitHub API to fetch all releases of the repository + url = f"https://api.github.com/repos/{repo_owner}/{repo_name}/releases" + + # Headers to specify the API version + headers = { + "Accept": "application/vnd.github.v3+json" + } + + # Send a GET request to the GitHub API + response = requests.get(url, headers=headers) + + # Check the response status; raise an error if the request failed + if response.status_code != 200: + raise Exception(f"Failed to fetch releases: {response.status_code}, {response.text}") + + # Parse the JSON response into a list of releases + releases = response.json() + + # Filter the releases to include only pre-releases + pre_releases = [release for release in releases if release.get("prerelease")] + + # If no pre-releases are found, raise an exception + if not pre_releases: + raise Exception("No pre-releases found") + + # Sort the pre-releases by creation date, newest first + pre_releases.sort(key=lambda x: x["created_at"], reverse=True) + + # Return the tag name of the latest pre-release + return pre_releases[0]["tag_name"] \ No newline at end of file diff --git a/engine/main.cc b/engine/main.cc index 4b95785bb..b39c4c6e2 100644 --- a/engine/main.cc +++ b/engine/main.cc @@ -9,7 +9,10 @@ #include "controllers/process_manager.h" #include "controllers/server.h" #include "cortex-common/cortexpythoni.h" +#include "database/database.h" +#include "migrations/migration_manager.h" #include "services/config_service.h" +#include "services/file_watcher_service.h" #include "services/model_service.h" #include "utils/archive_utils.h" #include "utils/cortex_utils.h" @@ -19,6 +22,7 @@ #include "utils/file_manager_utils.h" #include "utils/logging_utils.h" #include "utils/system_info_utils.h" +#include "utils/widechar_conv.h" #if defined(__APPLE__) && defined(__MACH__) #include // for dirname() @@ -63,7 +67,11 @@ void RunServer(std::optional port, bool ignore_cout) { } // Create logs/ folder and setup log to file std::filesystem::create_directories( +#if defined(_WIN32) + std::filesystem::u8path(config.logFolderPath) / +#else std::filesystem::path(config.logFolderPath) / +#endif std::filesystem::path(cortex_utils::logs_folder)); static trantor::FileLogger asyncFileLogger; asyncFileLogger.setFileName( @@ -107,6 +115,7 @@ void RunServer(std::optional port, bool ignore_cout) { auto event_queue_ptr = std::make_shared(); cortex::event::EventProcessor event_processor(event_queue_ptr); + auto model_dir_path = file_manager_utils::GetModelsContainerPath(); auto config_service = std::make_shared(); auto download_service = std::make_shared(event_queue_ptr, config_service); @@ -116,6 +125,10 @@ void RunServer(std::optional port, bool ignore_cout) { auto model_service = std::make_shared( download_service, inference_svc, engine_service); + auto file_watcher_srv = std::make_shared( + model_dir_path.string(), model_service); + file_watcher_srv->start(); + // initialize custom controllers auto engine_ctl = std::make_shared(engine_service); auto model_ctl = std::make_shared(model_service, engine_service); @@ -142,6 +155,8 @@ void RunServer(std::optional port, bool ignore_cout) { LOG_INFO << "Please load your model"; #ifndef _WIN32 drogon::app().enableReusePort(); +#else + drogon::app().enableDateHeader(false); #endif drogon::app().addListener(config.apiServerHost, std::stoi(config.apiServerPort)); @@ -190,7 +205,11 @@ void RunServer(std::optional port, bool ignore_cout) { } } +#if defined(_WIN32) +int wmain(int argc, wchar_t* argv[]) { +#else int main(int argc, char* argv[]) { +#endif // Stop the program if the system is not supported auto system_info = system_info_utils::GetSystemInfo(); if (system_info->arch == system_info_utils::kUnsupported || @@ -205,6 +224,28 @@ int main(int argc, char* argv[]) { std::optional server_port; bool ignore_cout_log = false; +#if defined(_WIN32) + for (int i = 0; i < argc; i++) { + std::wstring command = argv[i]; + if (command == L"--config_file_path") { + std::wstring v = argv[i + 1]; + file_manager_utils::cortex_config_file_path = + cortex::wc::WstringToUtf8(v); + } else if (command == L"--data_folder_path") { + std::wstring v = argv[i + 1]; + file_manager_utils::cortex_data_folder_path = + cortex::wc::WstringToUtf8(v); + } else if (command == L"--port") { + server_port = std::stoi(argv[i + 1]); + } else if (command == L"--ignore_cout") { + ignore_cout_log = true; + } else if (command == L"--loglevel") { + std::wstring v = argv[i + 1]; + std::string log_level = cortex::wc::WstringToUtf8(v); + logging_utils_helper::SetLogLevel(log_level, ignore_cout_log); + } + } +#else for (int i = 0; i < argc; i++) { if (strcmp(argv[i], "--config_file_path") == 0) { file_manager_utils::cortex_config_file_path = argv[i + 1]; @@ -219,6 +260,7 @@ int main(int argc, char* argv[]) { logging_utils_helper::SetLogLevel(log_level, ignore_cout_log); } } +#endif { auto result = file_manager_utils::CreateConfigFileIfNotExist(); @@ -244,6 +286,15 @@ int main(int argc, char* argv[]) { } } + // check if migration is needed + if (auto res = cortex::migr::MigrationManager( + cortex::db::Database::GetInstance().db()) + .Migrate(); + res.has_error()) { + CLI_LOG("Error: " << res.error()); + return 1; + } + // Delete temporary file if it exists auto temp = file_manager_utils::GetExecutableFolderContainerPath() / "cortex_temp"; @@ -255,26 +306,26 @@ int main(int argc, char* argv[]) { } } - // Check if this process is for python execution - if (argc > 1) { - if (strcmp(argv[1], "--run_python_file") == 0) { - std::string py_home_path = (argc > 3) ? argv[3] : ""; - std::unique_ptr dl; - try { - std::string abs_path = - cortex_utils::GetCurrentPath() + kPythonRuntimeLibPath; - dl = std::make_unique(abs_path, "engine"); - } catch (const cortex_cpp::dylib::load_error& e) { - LOG_ERROR << "Could not load engine: " << e.what(); - return 1; - } + // // Check if this process is for python execution + // if (argc > 1) { + // if (strcmp(argv[1], "--run_python_file") == 0) { + // std::string py_home_path = (argc > 3) ? argv[3] : ""; + // std::unique_ptr dl; + // try { + // std::string abs_path = + // cortex_utils::GetCurrentPath() + kPythonRuntimeLibPath; + // dl = std::make_unique(abs_path, "engine"); + // } catch (const cortex_cpp::dylib::load_error& e) { + // LOG_ERROR << "Could not load engine: " << e.what(); + // return 1; + // } - auto func = dl->get_function("get_engine"); - auto e = func(); - e->ExecutePythonFile(argv[0], argv[2], py_home_path); - return 0; - } - } + // auto func = dl->get_function("get_engine"); + // auto e = func(); + // e->ExecutePythonFile(argv[0], argv[2], py_home_path); + // return 0; + // } + // } RunServer(server_port, ignore_cout_log); return 0; diff --git a/engine/migrations/migration_helper.cc b/engine/migrations/migration_helper.cc new file mode 100644 index 000000000..f2b39d77e --- /dev/null +++ b/engine/migrations/migration_helper.cc @@ -0,0 +1,72 @@ +#include "migration_helper.h" + +namespace cortex::migr { +cpp::result MigrationHelper::BackupDatabase( + const std::filesystem::path& src_db_path, + const std::string& backup_db_path) { + try { + SQLite::Database src_db(src_db_path, SQLite::OPEN_READONLY); + sqlite3* backup_db; + + if (sqlite3_open16(backup_db_path.c_str(), &backup_db) != SQLITE_OK) { + throw std::runtime_error("Failed to open backup database"); + } + + sqlite3_backup* backup = + sqlite3_backup_init(backup_db, "main", src_db.getHandle(), "main"); + if (!backup) { + sqlite3_close(backup_db); + throw std::runtime_error("Failed to initialize backup"); + } + + if (sqlite3_backup_step(backup, -1) != SQLITE_DONE) { + sqlite3_backup_finish(backup); + sqlite3_close(backup_db); + throw std::runtime_error("Failed to perform backup"); + } + + sqlite3_backup_finish(backup); + sqlite3_close(backup_db); + // CTL_INF("Backup completed successfully."); + return true; + } catch (const std::exception& e) { + CTL_WRN("Error during backup: " << e.what()); + return cpp::fail(e.what()); + } +} + +cpp::result MigrationHelper::RestoreDatabase( + const std::string& backup_db_path, + const std::filesystem::path& target_db_path) { + try { + SQLite::Database target_db(target_db_path, + SQLite::OPEN_READWRITE | SQLite::OPEN_CREATE); + sqlite3* backup_db; + + if (sqlite3_open16(backup_db_path.c_str(), &backup_db) != SQLITE_OK) { + throw std::runtime_error("Failed to open backup database"); + } + + sqlite3_backup* backup = + sqlite3_backup_init(target_db.getHandle(), "main", backup_db, "main"); + if (!backup) { + sqlite3_close(backup_db); + throw std::runtime_error("Failed to initialize restore"); + } + + if (sqlite3_backup_step(backup, -1) != SQLITE_DONE) { + sqlite3_backup_finish(backup); + sqlite3_close(backup_db); + throw std::runtime_error("Failed to perform restore"); + } + + sqlite3_backup_finish(backup); + sqlite3_close(backup_db); + // CTL_INF("Restore completed successfully."); + return true; + } catch (const std::exception& e) { + CTL_WRN("Error during restore: " << e.what()); + return cpp::fail(e.what()); + } +} +} // namespace cortex::migr \ No newline at end of file diff --git a/engine/migrations/migration_helper.h b/engine/migrations/migration_helper.h new file mode 100644 index 000000000..cdf7b8f55 --- /dev/null +++ b/engine/migrations/migration_helper.h @@ -0,0 +1,20 @@ +#pragma once + +#include +#include +#include +#include "utils/logging_utils.h" +#include "utils/result.hpp" + +namespace cortex::migr { +class MigrationHelper { + public: + cpp::result BackupDatabase( + const std::filesystem::path& src_db_path, + const std::string& backup_db_path); + + cpp::result RestoreDatabase( + const std::string& backup_db_path, + const std::filesystem::path& target_db_path); +}; +} // namespace cortex::migr diff --git a/engine/migrations/migration_manager.cc b/engine/migrations/migration_manager.cc new file mode 100644 index 000000000..f4b4f8046 --- /dev/null +++ b/engine/migrations/migration_manager.cc @@ -0,0 +1,224 @@ +#include "migration_manager.h" +#include +#include "assert.h" +#include "schema_version.h" +#include "utils/file_manager_utils.h" +#include "utils/scope_exit.h" + +namespace cortex::migr { + +namespace { +int GetSchemaVersion(SQLite::Database& db) { + int version = -1; // Default version if not set + + try { + SQLite::Statement query(db, "SELECT version FROM schema_version LIMIT 1;"); + + // Execute the query and get the result + if (query.executeStep()) { + version = + query.getColumn(0).getInt(); // Get the version from the first column + } + } catch (const std::exception& e) { + // CTL_WRN("SQLite error: " << e.what()); + } + + return version; +} + +constexpr const auto kCortexDb = "cortex.db"; +constexpr const auto kCortexDbBackup = "cortex_backup.db"; +} // namespace + +cpp::result MigrationManager::Migrate() { + namespace fmu = file_manager_utils; + int last_schema_version = GetSchemaVersion(db_); + int target_schema_version = SCHEMA_VERSION; + if (last_schema_version == target_schema_version) + return true; + // Back up all data before migrating + if (std::filesystem::exists(fmu::GetCortexDataPath() / kCortexDb)) { + auto src_db_path = (fmu::GetCortexDataPath() / kCortexDb); + auto backup_db_path = (fmu::GetCortexDataPath() / kCortexDbBackup); + if (auto res = mgr_helper_.BackupDatabase(src_db_path, backup_db_path.string()); + res.has_error()) { + CTL_INF("Error: backup database failed!"); + return res; + } + } + + cortex::utils::ScopeExit se([]() { + auto cortex_tmp = fmu::GetCortexDataPath() / kCortexDbBackup; + if (std::filesystem::exists(cortex_tmp)) { + try { + auto n = std::filesystem::remove_all(cortex_tmp); + // CTL_INF("Deleted " << n << " files or directories"); + } catch (const std::exception& e) { + CTL_WRN(e.what()); + } + } + }); + + auto restore_db = [this]() -> cpp::result { + auto src_db_path = (fmu::GetCortexDataPath() / kCortexDb); + auto backup_db_path = (fmu::GetCortexDataPath() / kCortexDbBackup); + return mgr_helper_.BackupDatabase(src_db_path, backup_db_path.string()); + }; + + // Backup folder structure + // Update logic if the folder structure changes + + // Migrate folder structure + if (last_schema_version <= target_schema_version) { + if (auto res = + UpFolderStructure(last_schema_version, target_schema_version); + res.has_error()) { + // Restore + return res; + } + } else { + if (auto res = + DownFolderStructure(last_schema_version, target_schema_version); + res.has_error()) { + // Restore + return res; + } + } + + // Update database on folder structure changes + // Update logic if the folder structure changes + + // Migrate database + if (last_schema_version < target_schema_version) { + if (auto res = UpDB(last_schema_version, target_schema_version); + res.has_error()) { + auto r = restore_db(); + return res; + } + } else { + if (auto res = DownDB(last_schema_version, target_schema_version); + res.has_error()) { + auto r = restore_db(); + return res; + } + } + return true; +} + +cpp::result MigrationManager::UpFolderStructure(int current, + int target) { + assert(current < target); + for (int v = current + 1; v <= target; v++) { + if (auto res = DoUpFolderStructure(v /*version*/); res.has_error()) { + return res; + } + } + return true; +} + +cpp::result MigrationManager::DownFolderStructure( + int current, int target) { + assert(current > target); + for (int v = current; v > target; v--) { + if (auto res = DoDownFolderStructure(v /*version*/); res.has_error()) { + return res; + } + } + return true; +} + +cpp::result MigrationManager::DoUpFolderStructure( + int version) { + switch (version) { + case 0: + return v0::MigrateFolderStructureUp(); + break; + + default: + return true; + } +} +cpp::result MigrationManager::DoDownFolderStructure( + int version) { + switch (version) { + case 0: + return v0::MigrateFolderStructureDown(); + break; + + default: + return true; + } +} + +cpp::result MigrationManager::UpDB(int current, int target) { + assert(current < target); + for (int v = current + 1; v <= target; v++) { + if (auto res = DoUpDB(v /*version*/); res.has_error()) { + return res; + } + } + // Save database + return UpdateSchemaVersion(current, target); +} +cpp::result MigrationManager::DownDB(int current, + int target) { + assert(current > target); + for (int v = current; v > target; v--) { + if (auto res = DoDownDB(v /*version*/); res.has_error()) { + return res; + } + } + // Save database + return UpdateSchemaVersion(current, target); +} + +cpp::result MigrationManager::DoUpDB(int version) { + switch (version) { + case 0: + return v0::MigrateDBUp(db_); + break; + + default: + return true; + } +} + +cpp::result MigrationManager::DoDownDB(int version) { + switch (version) { + case 0: + return v0::MigrateDBDown(db_); + break; + + default: + return true; + } +} + +cpp::result MigrationManager::UpdateSchemaVersion( + int old_version, int new_version) { + if (old_version == new_version) + return true; + try { + db_.exec("BEGIN TRANSACTION;"); + + SQLite::Statement insert(db_, + "INSERT INTO schema_version (version) VALUES (?)"); + insert.bind(1, new_version); + insert.exec(); + + if (old_version != -1) { + SQLite::Statement del(db_, + "DELETE FROM schema_version WHERE version = ?"); + del.bind(1, old_version); + del.exec(); + } + + db_.exec("COMMIT;"); + // CTL_INF("Inserted: " << version); + return true; + } catch (const std::exception& e) { + CTL_WRN(e.what()); + return cpp::fail(e.what()); + } +} +} // namespace cortex::migr \ No newline at end of file diff --git a/engine/migrations/migration_manager.h b/engine/migrations/migration_manager.h new file mode 100644 index 000000000..b05a76c26 --- /dev/null +++ b/engine/migrations/migration_manager.h @@ -0,0 +1,31 @@ +#pragma once +#include "migration_helper.h" +#include "v0/migration.h" + +namespace cortex::migr { +class MigrationManager { + public: + explicit MigrationManager(SQLite::Database& db) : db_(db) {} + cpp::result Migrate(); + + private: + cpp::result UpFolderStructure(int current, int target); + cpp::result DownFolderStructure(int current, int target); + + cpp::result DoUpFolderStructure(int version); + cpp::result DoDownFolderStructure(int version); + + cpp::result UpDB(int current, int target); + cpp::result DownDB(int current, int target); + + cpp::result DoUpDB(int version); + cpp::result DoDownDB(int version); + + cpp::result UpdateSchemaVersion(int old_version, + int new_version); + + private: + MigrationHelper mgr_helper_; + SQLite::Database& db_; +}; +} // namespace cortex::migr \ No newline at end of file diff --git a/engine/migrations/schema_version.h b/engine/migrations/schema_version.h new file mode 100644 index 000000000..7cfccf27a --- /dev/null +++ b/engine/migrations/schema_version.h @@ -0,0 +1,4 @@ +#pragma once + +//Track the current schema version +#define SCHEMA_VERSION 0 \ No newline at end of file diff --git a/engine/migrations/v0/migration.h b/engine/migrations/v0/migration.h new file mode 100644 index 000000000..9d44435c5 --- /dev/null +++ b/engine/migrations/v0/migration.h @@ -0,0 +1,88 @@ +#pragma once +#include +#include +#include +#include "utils/file_manager_utils.h" +#include "utils/logging_utils.h" +#include "utils/result.hpp" + +namespace cortex::migr::v0 { +// Data folder +namespace fmu = file_manager_utils; + +// cortexcpp +// |__ models +// | |__ cortex.so +// | |__ tinyllama +// | |__ gguf +// |__ engines +// | |__ cortex.llamacpp +// | |__ deps +// | |__ windows-amd64-avx +// |__ logs +// +inline cpp::result MigrateFolderStructureUp() { + if (!std::filesystem::exists(fmu::GetCortexDataPath() / "models")) { + std::filesystem::create_directory(fmu::GetCortexDataPath() / "models"); + } + + if (!std::filesystem::exists(fmu::GetCortexDataPath() / "engines")) { + std::filesystem::create_directory(fmu::GetCortexDataPath() / "engines"); + } + + if (!std::filesystem::exists(fmu::GetCortexDataPath() / "logs")) { + std::filesystem::create_directory(fmu::GetCortexDataPath() / "logs"); + } + + return true; +} + +inline cpp::result MigrateFolderStructureDown() { + // CTL_INF("Folder structure already up to date!"); + return true; +} + +// Database +inline cpp::result MigrateDBUp(SQLite::Database& db) { + try { + db.exec( + "CREATE TABLE IF NOT EXISTS schema_version ( version INTEGER PRIMARY " + "KEY);"); + + db.exec( + "CREATE TABLE IF NOT EXISTS models (" + "model_id TEXT PRIMARY KEY," + "author_repo_id TEXT," + "branch_name TEXT," + "path_to_model_yaml TEXT," + "model_alias TEXT);"); + + db.exec( + "CREATE TABLE IF NOT EXISTS hardware (" + "uuid TEXT PRIMARY KEY, " + "type TEXT NOT NULL, " + "hardware_id INTEGER NOT NULL, " + "software_id INTEGER NOT NULL, " + "activated INTEGER NOT NULL CHECK (activated IN (0, 1)));"); + + // CTL_INF("Database migration up completed successfully."); + return true; + } catch (const std::exception& e) { + CTL_WRN("Migration up failed: " << e.what()); + return cpp::fail(e.what()); + } +}; + +inline cpp::result MigrateDBDown(SQLite::Database& db) { + try { + db.exec("DROP TABLE IF EXISTS hardware;"); + db.exec("DROP TABLE IF EXISTS models;"); + // CTL_INF("Migration down completed successfully."); + return true; + } catch (const std::exception& e) { + CTL_WRN("Migration down failed: " << e.what()); + return cpp::fail(e.what()); + } +} + +}; // namespace cortex::migr::v0 diff --git a/engine/services/download_service.cc b/engine/services/download_service.cc index 09c4d1a75..d855c8f61 100644 --- a/engine/services/download_service.cc +++ b/engine/services/download_service.cc @@ -544,10 +544,16 @@ void DownloadService::EmitTaskCompleted(const std::string& task_id) { } void DownloadService::ExecuteCallback(const DownloadTask& task) { - std::lock_guard lock(callbacks_mutex_); - auto it = callbacks_.find(task.id); - if (it != callbacks_.end()) { - it->second(task); - callbacks_.erase(it); + std::lock_guard active_task_lock(active_tasks_mutex_); + if (auto it = active_tasks_.find(task.id); it != active_tasks_.end()) { + for (auto& item : it->second->items) { + item.downloadedBytes = item.bytes; + } + std::lock_guard lock(callbacks_mutex_); + auto callback = callbacks_.find(task.id); + if (callback != callbacks_.end()) { + callback->second(*it->second); + callbacks_.erase(callback); + } } } diff --git a/engine/services/engine_service.cc b/engine/services/engine_service.cc index 8d8a4a65c..4eebff669 100644 --- a/engine/services/engine_service.cc +++ b/engine/services/engine_service.cc @@ -4,6 +4,7 @@ #include #include "algorithm" #include "utils/archive_utils.h" +#include "utils/cortex_utils.h" #include "utils/engine_constants.h" #include "utils/engine_matcher_utils.h" #include "utils/file_manager_utils.h" @@ -69,12 +70,12 @@ std::string GetEnginePath(std::string_view e) { }; } // namespace -cpp::result EngineService::InstallEngineAsyncV2( +cpp::result EngineService::InstallEngineAsync( const std::string& engine, const std::string& version, const std::optional variant_name) { auto ne = NormalizeEngine(engine); - CTL_INF("InstallEngineAsyncV2: " << ne << ", " << version << ", " - << variant_name.value_or("")); + CTL_INF("InstallEngineAsync: " << ne << ", " << version << ", " + << variant_name.value_or("")); auto os = hw_inf_.sys_inf->os; if (os == kMacOs && (ne == kOnnxRepo || ne == kTrtLlmRepo)) { return cpp::fail("Engine " + ne + " is not supported on macOS"); @@ -84,7 +85,7 @@ cpp::result EngineService::InstallEngineAsyncV2( return cpp::fail("Engine " + ne + " is not supported on Linux"); } - auto result = DownloadEngineV2(ne, version, variant_name); + auto result = DownloadEngine(ne, version, variant_name); if (result.has_error()) { return cpp::fail(result.error()); } @@ -95,25 +96,6 @@ cpp::result EngineService::InstallEngineAsyncV2( return {}; } -cpp::result EngineService::InstallEngineAsync( - const std::string& engine, const std::string& version, - const std::string& src) { - // Although this function is called async, only download tasks are performed async - auto ne = NormalizeEngine(engine); - if (!src.empty()) { - auto res = UnzipEngine(ne, version, src); - // If has error or engine is installed successfully - if (res.has_error() || res.value()) { - return res; - } - } - auto result = DownloadEngine(ne, version, true /*async*/); - if (result.has_error()) { - return result; - } - return DownloadCuda(ne, true /*async*/); -} - cpp::result EngineService::UnzipEngine( const std::string& engine, const std::string& version, const std::string& path) { @@ -242,7 +224,7 @@ cpp::result EngineService::UninstallEngineVariant( } } -cpp::result EngineService::DownloadEngineV2( +cpp::result EngineService::DownloadEngine( const std::string& engine, const std::string& version, const std::optional variant_name) { auto normalized_version = version == "latest" @@ -377,101 +359,6 @@ cpp::result EngineService::DownloadEngineV2( return {}; } -cpp::result EngineService::DownloadEngine( - const std::string& engine, const std::string& version, bool async) { - auto res = GetEngineVariants(engine, version); - if (res.has_error()) { - return cpp::fail("Failed to fetch engine releases: " + res.error()); - } - - if (res.value().empty()) { - return cpp::fail("No release found for " + version); - } - - auto os_arch{hw_inf_.sys_inf->os + "-" + hw_inf_.sys_inf->arch}; - - std::vector variants; - for (const auto& asset : res.value()) { - variants.push_back(asset.name); - } - - CTL_INF("engine: " << engine); - CTL_INF("CUDA version: " << hw_inf_.cuda_driver_version); - auto matched_variant = GetMatchedVariant(engine, variants); - CTL_INF("Matched variant: " << matched_variant); - if (matched_variant.empty()) { - CTL_ERR("No variant found for " << os_arch); - return cpp::fail("No variant found for " + os_arch); - } - - for (const auto& asset : res.value()) { - if (asset.name == matched_variant) { - CTL_INF("Download url: " << asset.browser_download_url); - - std::filesystem::path engine_folder_path = - file_manager_utils::GetContainerFolderPath( - file_manager_utils::DownloadTypeToString(DownloadType::Engine)) / - engine; - - if (!std::filesystem::exists(engine_folder_path)) { - CTL_INF("Creating " << engine_folder_path.string()); - std::filesystem::create_directories(engine_folder_path); - } - if (IsEngineLoaded(engine)) { - CTL_INF("Engine " << engine << " is already loaded, unloading it"); - auto unload_res = UnloadEngine(engine); - if (unload_res.has_error()) { - CTL_INF("Failed to unload engine: " << unload_res.error()); - return cpp::fail(unload_res.error()); - } else { - CTL_INF("Engine " << engine << " unloaded successfully"); - } - } - CTL_INF("Engine folder path: " << engine_folder_path.string() << "\n"); - auto local_path = engine_folder_path / asset.name; - auto downloadTask{ - DownloadTask{.id = engine, - .type = DownloadType::Engine, - .items = {DownloadItem{ - .id = engine, - .downloadUrl = asset.browser_download_url, - .localPath = local_path, - }}}}; - - auto on_finished = [](const DownloadTask& finishedTask) { - // try to unzip the downloaded file - CTL_INF( - "Engine zip path: " << finishedTask.items[0].localPath.string()); - - std::filesystem::path extract_path = - finishedTask.items[0].localPath.parent_path().parent_path(); - - archive_utils::ExtractArchive(finishedTask.items[0].localPath.string(), - extract_path.string()); - - // remove the downloaded file - try { - std::filesystem::remove(finishedTask.items[0].localPath); - } catch (const std::exception& e) { - CTL_WRN("Could not delete file: " << e.what()); - } - CTL_INF("Finished!"); - }; - - if (async) { - auto res = download_service_->AddTask(downloadTask, on_finished); - if (res.has_error()) { - return cpp::fail(res.error()); - } - return true; - } else { - return download_service_->AddDownloadTask(downloadTask, on_finished); - } - } - } - return true; -} - cpp::result EngineService::DownloadCuda( const std::string& engine, bool async) { if (hw_inf_.sys_inf->os == "mac" || engine == kOnnxRepo || @@ -778,14 +665,17 @@ cpp::result EngineService::LoadEngine( CTL_INF("Selected engine variant: " << json_helper::DumpJsonString(selected_engine_variant->ToJson())); - +#if defined(_WIN32) + auto user_defined_engine_path = _wgetenv(L"ENGINE_PATH"); +#else auto user_defined_engine_path = getenv("ENGINE_PATH"); +#endif + CTL_DBG("user defined engine path: " << user_defined_engine_path); const std::filesystem::path engine_dir_path = [&] { if (user_defined_engine_path != nullptr) { - return std::filesystem::path(user_defined_engine_path + - GetEnginePath(ne)) / - selected_engine_variant->variant / + return std::filesystem::path(user_defined_engine_path) / + GetEnginePath(ne) / selected_engine_variant->variant / selected_engine_variant->version; } else { return file_manager_utils::GetEnginesContainerPath() / ne / @@ -815,9 +705,9 @@ cpp::result EngineService::LoadEngine( // Do nothing, llamacpp can re-use tensorrt-llm dependencies (need to be tested careful) // 3. Add dll directory if met other conditions - auto add_dll = [this](const std::string& e_type, const std::string& p) { - auto ws = std::wstring(p.begin(), p.end()); - if (auto cookie = AddDllDirectory(ws.c_str()); cookie != 0) { + auto add_dll = [this](const std::string& e_type, + const std::filesystem::path& p) { + if (auto cookie = AddDllDirectory(p.c_str()); cookie != 0) { CTL_DBG("Added dll directory: " << p); engines_[e_type].cookie = cookie; } else { @@ -834,7 +724,11 @@ cpp::result EngineService::LoadEngine( } }; +#if defined(_WIN32) + if (bool should_use_dll_search_path = !(_wgetenv(L"ENGINE_PATH")); +#else if (bool should_use_dll_search_path = !(getenv("ENGINE_PATH")); +#endif should_use_dll_search_path) { if (IsEngineLoaded(kLlamaRepo) && ne == kTrtLlmRepo && should_use_dll_search_path) { @@ -850,11 +744,11 @@ cpp::result EngineService::LoadEngine( CTL_DBG("Removed cuda dll directory: " << kLlamaRepo); } - add_dll(ne, engine_dir_path.string()); + add_dll(ne, engine_dir_path); } else if (IsEngineLoaded(kTrtLlmRepo) && ne == kLlamaRepo) { // Do nothing } else { - add_dll(ne, engine_dir_path.string()); + add_dll(ne, engine_dir_path); } } #endif @@ -1032,8 +926,8 @@ cpp::result EngineService::UpdateEngine( << default_variant->variant << " is not up-to-date! Current: " << default_variant->version << ", latest: " << latest_version->name); - auto res = InstallEngineAsyncV2(engine, latest_version->tag_name, - default_variant->variant); + auto res = InstallEngineAsync(engine, latest_version->tag_name, + default_variant->variant); return EngineUpdateResult{.engine = engine, .variant = default_variant->variant, diff --git a/engine/services/engine_service.h b/engine/services/engine_service.h index b339fd7df..dee8a530b 100644 --- a/engine/services/engine_service.h +++ b/engine/services/engine_service.h @@ -4,6 +4,7 @@ #include #include #include +#include "common/engine_servicei.h" #include "cortex-common/EngineI.h" #include "cortex-common/cortexpythoni.h" #include "services/download_service.h" @@ -13,7 +14,6 @@ #include "utils/github_release_utils.h" #include "utils/result.hpp" #include "utils/system_info_utils.h" -#include "common/engine_servicei.h" struct EngineUpdateResult { std::string engine; @@ -37,7 +37,7 @@ struct SystemInfo; using EngineV = std::variant; -class EngineService: public EngineServiceI { +class EngineService : public EngineServiceI { private: using EngineRelease = github_release_utils::GitHubRelease; using EngineVariant = github_release_utils::GitHubAsset; @@ -69,17 +69,13 @@ class EngineService: public EngineServiceI { */ cpp::result IsEngineReady(const std::string& engine) const; - cpp::result InstallEngineAsync( - const std::string& engine, const std::string& version = "latest", - const std::string& src = ""); - /** * Handling install engine variant. * * If no version provided, choose `latest`. * If no variant provided, automatically pick the best variant. */ - cpp::result InstallEngineAsyncV2( + cpp::result InstallEngineAsync( const std::string& engine, const std::string& version, const std::optional variant_name); @@ -125,11 +121,7 @@ class EngineService: public EngineServiceI { const std::string& engine); private: - cpp::result DownloadEngine( - const std::string& engine, const std::string& version = "latest", - bool async = false); - - cpp::result DownloadEngineV2( + cpp::result DownloadEngine( const std::string& engine, const std::string& version = "latest", const std::optional variant_name = std::nullopt); diff --git a/engine/services/file_watcher_service.h b/engine/services/file_watcher_service.h new file mode 100644 index 000000000..d15b98827 --- /dev/null +++ b/engine/services/file_watcher_service.h @@ -0,0 +1,364 @@ +#include +#include +#include +#include +#include "services/model_service.h" +#include "utils/logging_utils.h" + +#ifdef __APPLE__ +#include +#include + +#elif defined(_WIN32) +#include + +#else // Linux +#include +#include +#include +#include +#endif + +class FileWatcherService { + private: +#if defined(_WIN32) + HANDLE dir_handle = INVALID_HANDLE_VALUE; + HANDLE stop_event; +#elif defined(__APPLE__) + FSEventStreamRef event_stream; +#else // Linux + int fd; + int wd; + std::unordered_map watch_descriptors; +#endif + + public: + FileWatcherService(const std::string& path, + std::shared_ptr model_service) + : watch_path_{path}, running_{false}, model_service_{model_service} { + if (!std::filesystem::exists(path)) { + throw std::runtime_error("Path does not exist: " + path); + } +#ifdef _WIN32 + stop_event = CreateEvent(NULL, TRUE, FALSE, NULL); +#endif + CTL_INF("FileWatcherService created: " + path); + } + + ~FileWatcherService() { + CTL_INF("FileWatcherService destructor"); + stop(); + } + + void start() { + if (running_) { + return; + } + + running_ = true; + watch_thread_ = std::thread(&FileWatcherService::WatcherThread, this); + } + + void stop() { + if (!running_) { + return; + } + + running_ = false; + +#ifdef _WIN32 + // Signal the stop event + SetEvent(stop_event); +#elif defined(__APPLE__) + if (event_stream) { + FSEventStreamStop(event_stream); + FSEventStreamInvalidate(event_stream); + } +#else // Linux + // For Linux, closing the fd will interrupt the read() call + CTL_INF("before close fd!"); + if (fd >= 0) { + close(fd); + } +#endif + CTL_INF("before join!"); + // Add timeout to avoid infinite waiting + if (watch_thread_.joinable()) { + watch_thread_.join(); + } + +#ifdef _WIN32 + if (stop_event != NULL) { + CloseHandle(stop_event); + } + if (dir_handle != INVALID_HANDLE_VALUE) { + CloseHandle(dir_handle); + } +#elif defined(__APPLE__) + if (event_stream) { + FSEventStreamRelease(event_stream); + } +#else // Linux + CleanupWatches(); +#endif + CTL_INF("FileWatcherService stopped!"); + } + + private: + std::string watch_path_; + std::atomic running_; + std::thread watch_thread_; + std::shared_ptr model_service_; + +#ifdef __APPLE__ + + static void callback(ConstFSEventStreamRef streamRef, + void* clientCallBackInfo, size_t numEvents, + void* eventPaths, + const FSEventStreamEventFlags eventFlags[], + const FSEventStreamEventId eventIds[]) { + auto** paths = (char**)eventPaths; + auto* watcher = static_cast(clientCallBackInfo); + + for (size_t i = 0; i < numEvents; i++) { + if (eventFlags[i] & (kFSEventStreamEventFlagItemRemoved | + kFSEventStreamEventFlagItemRenamed | + kFSEventStreamEventFlagItemModified)) { + CTL_INF("File removed: " + std::string(paths[i])); + CTL_INF("File event detected: " + std::string(paths[i]) + + " flags: " + std::to_string(eventFlags[i])); + watcher->model_service_->ForceIndexingModelList(); + } + } + } + + void WatcherThread() { + CFRunLoopRef runLoop = CFRunLoopGetCurrent(); + + auto path = CFStringCreateWithCString(nullptr, watch_path_.c_str(), + kCFStringEncodingUTF8); + auto path_to_watch = + CFArrayCreate(nullptr, (const void**)&path, 1, &kCFTypeArrayCallBacks); + + FSEventStreamContext context = {0, this, nullptr, nullptr, nullptr}; + + event_stream = FSEventStreamCreate( + nullptr, &FileWatcherService::callback, &context, path_to_watch, + kFSEventStreamEventIdSinceNow, 1, // each second + kFSEventStreamCreateFlagFileEvents | kFSEventStreamCreateFlagNoDefer | + kFSEventStreamCreateFlagWatchRoot); + + if (!event_stream) { + CFRelease(path_to_watch); + CFRelease(path); + throw std::runtime_error("Failed to create FSEvent stream"); + } + + FSEventStreamScheduleWithRunLoop(event_stream, runLoop, + kCFRunLoopDefaultMode); + + if (!FSEventStreamStart(event_stream)) { + FSEventStreamInvalidate(event_stream); + FSEventStreamRelease(event_stream); + CFRelease(path_to_watch); + CFRelease(path); + throw std::runtime_error("Failed to start FSEvent stream"); + } + + while (running_) { + CFRunLoopRunInMode(kCFRunLoopDefaultMode, 0.25, true); + } + + FSEventStreamStop(event_stream); + FSEventStreamInvalidate(event_stream); + FSEventStreamRelease(event_stream); + CFRelease(path_to_watch); + CFRelease(path); + } + +#elif defined(_WIN32) + void WatcherThread() { + dir_handle = + CreateFileA(watch_path_.c_str(), FILE_LIST_DIRECTORY, + FILE_SHARE_READ | FILE_SHARE_DELETE, NULL, OPEN_EXISTING, + FILE_FLAG_BACKUP_SEMANTICS | FILE_FLAG_OVERLAPPED, NULL); + + if (dir_handle == INVALID_HANDLE_VALUE) { + throw std::runtime_error("Failed to open directory"); + } + + char buffer[4096]; + OVERLAPPED overlapped = {0}; + overlapped.hEvent = CreateEvent(NULL, TRUE, FALSE, NULL); + DWORD bytesReturned; + HANDLE events[] = {overlapped.hEvent, stop_event}; + while (running_) { + if (!ReadDirectoryChangesW( + dir_handle, buffer, sizeof(buffer), TRUE, + FILE_NOTIFY_CHANGE_FILE_NAME | FILE_NOTIFY_CHANGE_DIR_NAME, + &bytesReturned, &overlapped, NULL)) { + break; + } + + // Wait for either file change event or stop event + DWORD result = WaitForMultipleObjects(2, events, FALSE, INFINITE); + if (result == WAIT_OBJECT_0 + 1) { // stop_event was signaled + break; + } + + if (result != WAIT_OBJECT_0 || + !GetOverlappedResult(dir_handle, &overlapped, &bytesReturned, + FALSE)) { + break; + } + + FILE_NOTIFY_INFORMATION* event = (FILE_NOTIFY_INFORMATION*)buffer; + do { + if (event->Action == FILE_ACTION_REMOVED) { + std::wstring fileName(event->FileName, + event->FileNameLength / sizeof(wchar_t)); + + std::string file_name_str(fileName.begin(), fileName.end()); + model_service_->ForceIndexingModelList(); + } + + if (event->NextEntryOffset == 0) + break; + event = (FILE_NOTIFY_INFORMATION*)((uint8_t*)event + + event->NextEntryOffset); + } while (true); + + ResetEvent(overlapped.hEvent); + } + + CloseHandle(overlapped.hEvent); + CloseHandle(dir_handle); + } + +#else // Linux + + void AddWatch(const std::string& dirPath) { + const int watch_flags = IN_DELETE | IN_DELETE_SELF | IN_CREATE; + wd = inotify_add_watch(fd, dirPath.c_str(), watch_flags); + if (wd < 0) { + throw std::runtime_error("Failed to add watch on " + dirPath + ": " + + std::string(strerror(errno))); + } + watch_descriptors[wd] = dirPath; + + // Add watches for subdirectories + try { + for (const auto& entry : + std::filesystem::recursive_directory_iterator(dirPath)) { + if (std::filesystem::is_directory(entry)) { + int subwd = inotify_add_watch(fd, entry.path().c_str(), watch_flags); + if (subwd >= 0) { + watch_descriptors[subwd] = entry.path().string(); + } else { + CTL_ERR("Failed to add watch for subdirectory " + + entry.path().string() + ": " + + std::string(strerror(errno))); + } + } + } + } catch (const std::filesystem::filesystem_error& e) { + CTL_ERR("Error walking directory tree: " + std::string(e.what())); + } + } + + void CleanupWatches() { + CTL_INF("Cleanup Watches"); + for (const auto& [wd, path] : watch_descriptors) { + inotify_rm_watch(fd, wd); + } + watch_descriptors.clear(); + + if (fd >= 0) { + close(fd); + fd = -1; + } + } + + void WatcherThread() { + fd = inotify_init1(IN_NONBLOCK); + if (fd < 0) { + CTL_ERR("Failed to initialize inotify: " + std::string(strerror(errno))); + return; + } + + try { + AddWatch(watch_path_); + } catch (const std::exception& e) { + CTL_ERR("Failed to add watch: " + std::string(e.what())); + close(fd); + return; + } + + const int POLL_TIMEOUT_MS = 1000; // 1 second timeout + char buffer[4096]; + struct pollfd pfd = {.fd = fd, .events = POLLIN, .revents = 0}; + + while (running_) { + // Poll will sleep until either: + // 1. Events are available (POLLIN) + // 2. POLL_TIMEOUT_MS milliseconds have elapsed + // 3. An error occurs + int poll_result = poll(&pfd, 1, POLL_TIMEOUT_MS); + + if (poll_result < 0) { + if (errno == EINTR) { + // System call was interrupted, just retry + continue; + } + CTL_ERR("Poll failed: " + std::string(strerror(errno))); + break; + } + + if (poll_result == 0) { // Timeout - no events + // No need to sleep - poll() already waited + continue; + } + + if (pfd.revents & POLLERR || pfd.revents & POLLNVAL) { + CTL_ERR("Poll error on fd"); + break; + } + + // Read all pending events + while (running_) { + int length = read(fd, buffer, sizeof(buffer)); + if (length < 0) { + if (errno == EAGAIN || errno == EWOULDBLOCK) { + // No more events to read + break; + } + CTL_ERR("Read error: " + std::string(strerror(errno))); + break; + } + + if (length == 0) { + break; + } + + // Process events + size_t i = 0; + while (i < static_cast(length)) { + struct inotify_event* event = + reinterpret_cast(&buffer[i]); + + if (event->mask & (IN_DELETE | IN_DELETE_SELF)) { + try { + model_service_->ForceIndexingModelList(); + } catch (const std::exception& e) { + CTL_ERR("Error processing delete event: " + + std::string(e.what())); + } + } + + i += sizeof(struct inotify_event) + event->len; + } + } + } + } +#endif +}; diff --git a/engine/services/hardware_service.cc b/engine/services/hardware_service.cc index 905b17107..16ae234b4 100644 --- a/engine/services/hardware_service.cc +++ b/engine/services/hardware_service.cc @@ -7,9 +7,10 @@ #include #endif #include "cli/commands/cortex_upd_cmd.h" -#include "database/hardwares.h" +#include "database/hardware.h" #include "services/engine_service.h" #include "utils/cortex_utils.h" +#include "utils/widechar_conv.h" namespace services { @@ -115,24 +116,33 @@ bool HardwareService::Restart(const std::string& host, int port) { ZeroMemory(&si, sizeof(si)); si.cb = sizeof(si); ZeroMemory(&pi, sizeof(pi)); - std::string params = "--ignore_cout"; - params += " --config_file_path " + get_config_file_path(); - params += " --data_folder_path " + get_data_folder_path(); - params += " --loglevel " + luh::LogLevelStr(luh::global_log_level); - std::string cmds = cortex_utils::GetCurrentPath() + "/" + exe + " " + params; + // TODO (sang) write a common function for this and server_start_cmd + std::wstring params = L"--ignore_cout"; + params += L" --config_file_path " + + file_manager_utils::GetConfigurationPath().wstring(); + params += L" --data_folder_path " + + file_manager_utils::GetCortexDataPath().wstring(); + params += L" --loglevel " + + cortex::wc::Utf8ToWstring(luh::LogLevelStr(luh::global_log_level)); + std::wstring exe_w = cortex::wc::Utf8ToWstring(exe); + std::wstring current_path_w = + file_manager_utils::GetExecutableFolderContainerPath().wstring(); + std::wstring wcmds = current_path_w + L"/" + exe_w + L" " + params; + std::vector mutable_cmds(wcmds.begin(), wcmds.end()); + mutable_cmds.push_back(L'\0'); // Create child process if (!CreateProcess( NULL, // No module name (use command line) - const_cast( - cmds.c_str()), // Command line (replace with your actual executable) - NULL, // Process handle not inheritable - NULL, // Thread handle not inheritable - TRUE, // Handle inheritance - 0, // No creation flags - NULL, // Use parent's environment block - NULL, // Use parent's starting directory - &si, // Pointer to STARTUPINFO structure - &pi)) // Pointer to PROCESS_INFORMATION structure + mutable_cmds + .data(), // Command line (replace with your actual executable) + NULL, // Process handle not inheritable + NULL, // Thread handle not inheritable + TRUE, // Handle inheritance + 0, // No creation flags + NULL, // Use parent's environment block + NULL, // Use parent's starting directory + &si, // Pointer to STARTUPINFO structure + &pi)) // Pointer to PROCESS_INFORMATION structure { std::cout << "Could not start server: " << GetLastError() << std::endl; return false; diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc index 793e8ecb5..1ec1a68cf 100644 --- a/engine/services/model_service.cc +++ b/engine/services/model_service.cc @@ -8,14 +8,15 @@ #include "database/models.h" #include "hardware_service.h" #include "httplib.h" -#include "services/engine_service.h" #include "utils/cli_selection_utils.h" +#include "utils/cortex_utils.h" #include "utils/engine_constants.h" #include "utils/file_manager_utils.h" #include "utils/huggingface_utils.h" #include "utils/logging_utils.h" #include "utils/result.hpp" #include "utils/string_utils.h" +#include "utils/widechar_conv.h" namespace { void ParseGguf(const DownloadItem& ggufDownloadItem, @@ -114,6 +115,40 @@ cpp::result GetDownloadTask( } } // namespace +void ModelService::ForceIndexingModelList() { + CTL_INF("Force indexing model list"); + + cortex::db::Models modellist_handler; + config::YamlHandler yaml_handler; + + auto list_entry = modellist_handler.LoadModelList(); + if (list_entry.has_error()) { + CTL_ERR("Failed to load model list: " << list_entry.error()); + return; + } + + namespace fs = std::filesystem; + namespace fmu = file_manager_utils; + + CTL_DBG("Database model size: " + std::to_string(list_entry.value().size())); + for (const auto& model_entry : list_entry.value()) { + try { + yaml_handler.ModelConfigFromFile( + fmu::ToAbsoluteCortexDataPath( + fs::path(model_entry.path_to_model_yaml)) + .string()); + auto model_config = yaml_handler.GetModelConfig(); + Json::Value obj = model_config.ToJson(); + yaml_handler.Reset(); + } catch (const std::exception& e) { + // remove in db + auto remove_result = + modellist_handler.DeleteModelEntry(model_entry.model); + // silently ignore result + } + } +} + cpp::result ModelService::DownloadModel( const std::string& input) { if (input.empty()) { @@ -425,11 +460,18 @@ ModelService::DownloadModelFromCortexsoAsync( return; } auto url_obj = url_parser::FromUrlString(model_yml_item->downloadUrl); - CTL_INF("Adding model to modellist with branch: " << branch); + CTL_INF("Adding model to modellist with branch: " + << branch << ", path: " << model_yml_item->localPath.string()); config::YamlHandler yaml_handler; yaml_handler.ModelConfigFromFile(model_yml_item->localPath.string()); auto mc = yaml_handler.GetModelConfig(); mc.model = unique_model_id; + + uint64_t model_size = 0; + for (const auto& item : finishedTask.items) { + model_size = model_size + item.bytes.value_or(0); + } + mc.size = model_size; yaml_handler.UpdateModelConfig(mc); yaml_handler.WriteYamlFile(model_yml_item->localPath.string()); @@ -627,9 +669,13 @@ cpp::result ModelService::StartModel( json_data = mc.ToJson(); if (mc.files.size() > 0) { - // TODO(sang) support multiple files +#if defined(_WIN32) + json_data["model_path"] = cortex::wc::WstringToUtf8( + fmu::ToAbsoluteCortexDataPath(fs::path(mc.files[0])).wstring()); +#else json_data["model_path"] = fmu::ToAbsoluteCortexDataPath(fs::path(mc.files[0])).string(); +#endif } else { LOG_WARN << "model_path is empty"; return StartModelResult{.success = false}; diff --git a/engine/services/model_service.h b/engine/services/model_service.h index 7b6375e54..be450fb0b 100644 --- a/engine/services/model_service.h +++ b/engine/services/model_service.h @@ -39,6 +39,8 @@ struct StartModelResult { class ModelService { public: + void ForceIndexingModelList(); + explicit ModelService(std::shared_ptr download_service) : download_service_{download_service} {}; diff --git a/engine/test/components/test_models_db.cc b/engine/test/components/test_models_db.cc index ef54fe7e0..8c3ebbe00 100644 --- a/engine/test/components/test_models_db.cc +++ b/engine/test/components/test_models_db.cc @@ -13,7 +13,19 @@ class ModelsTestSuite : public ::testing::Test { model_list_(db_) {} void SetUp() { try { - db_.exec("DELETE FROM models"); + db_.exec( + "CREATE TABLE IF NOT EXISTS models (" + "model_id TEXT PRIMARY KEY," + "author_repo_id TEXT," + "branch_name TEXT," + "path_to_model_yaml TEXT," + "model_alias TEXT);"); + } catch (const std::exception& e) {} + } + + void TearDown() { + try { + db_.exec("DROP TABLE IF EXISTS models;"); } catch (const std::exception& e) {} } diff --git a/engine/utils/config_yaml_utils.h b/engine/utils/config_yaml_utils.h index 187e1b4ef..3176339a0 100644 --- a/engine/utils/config_yaml_utils.h +++ b/engine/utils/config_yaml_utils.h @@ -86,6 +86,9 @@ class CortexConfigMgr { if (!out_file) { throw std::runtime_error("Failed to open output file."); } + // Workaround to save file as utf8 BOM + const unsigned char utf8_bom[] = {0xEF, 0xBB, 0xBF}; + out_file.write(reinterpret_cast(utf8_bom), sizeof(utf8_bom)); YAML::Node node; node["logFolderPath"] = config.logFolderPath; node["logLlamaCppPath"] = config.logLlamaCppPath; diff --git a/engine/utils/cortex_utils.h b/engine/utils/cortex_utils.h index 2d250df72..895217250 100644 --- a/engine/utils/cortex_utils.h +++ b/engine/utils/cortex_utils.h @@ -3,7 +3,10 @@ #include #include #include +#include +#include #include +#include #include #include #include @@ -15,45 +18,82 @@ #include #endif -#if __APPLE__ +#if defined(__APPLE__) #include #endif +#if defined(_WIN32) +#include +#include +#include +#endif + namespace cortex_utils { inline std::string logs_folder = "./logs"; inline std::string logs_base_name = "./logs/cortex.log"; inline std::string logs_cli_base_name = "./logs/cortex-cli.log"; +// example: Mon, 25 Nov 2024 09:57:03 GMT +inline std::string GetDateRFC1123() { + std::time_t now = std::time(nullptr); + std::tm* gmt_time = std::gmtime(&now); + std::ostringstream oss; + oss << std::put_time(gmt_time, "%a, %d %b %Y %H:%M:%S GMT"); + return oss.str(); +} + inline drogon::HttpResponsePtr CreateCortexHttpResponse() { - return drogon::HttpResponse::newHttpResponse(); + auto res = drogon::HttpResponse::newHttpResponse(); +#if defined(_WIN32) + res->addHeader("date", GetDateRFC1123()); +#endif + return res; } +inline drogon::HttpResponsePtr CreateCortexHttpTextAsJsonResponse( + const std::string& data) { + auto res = drogon::HttpResponse::newHttpResponse(); + res->setBody(data); + res->setContentTypeCode(drogon::CT_APPLICATION_JSON); +#if defined(_WIN32) + res->addHeader("date", GetDateRFC1123()); +#endif + return res; +}; + inline drogon::HttpResponsePtr CreateCortexHttpJsonResponse( const Json::Value& data) { - return drogon::HttpResponse::newHttpJsonResponse(data); + auto res = drogon::HttpResponse::newHttpJsonResponse(data); +#if defined(_WIN32) + res->addHeader("date", GetDateRFC1123()); +#endif + return res; }; inline drogon::HttpResponsePtr CreateCortexStreamResponse( const std::function& callback, const std::string& attachmentFileName = "") { - return drogon::HttpResponse::newStreamResponse( + auto res = drogon::HttpResponse::newStreamResponse( callback, attachmentFileName, drogon::CT_NONE, "text/event-stream"); +#if defined(_WIN32) + res->addHeader("date", GetDateRFC1123()); +#endif + return res; } + + #if defined(_WIN32) inline std::string GetCurrentPath() { - wchar_t path[MAX_PATH]; - DWORD result = GetModuleFileNameW(NULL, path, MAX_PATH); + char path[MAX_PATH]; + DWORD result = GetModuleFileNameA(NULL, path, MAX_PATH); if (result == 0) { - std::wcerr << L"Error getting module file name." << std::endl; + std::cerr << "Error getting module file name." << std::endl; return ""; } - std::wstring::size_type pos = std::wstring(path).find_last_of(L"\\/"); - auto ws = std::wstring(path).substr(0, pos); - std::string res; - std::transform(ws.begin(), ws.end(), std::back_inserter(res), - [](wchar_t c) { return (char)c; }); - return res; + + std::string::size_type pos = std::string(path).find_last_of("\\/"); + return std::string(path).substr(0, pos); } #else inline std::string GetCurrentPath() { diff --git a/engine/utils/engine_constants.h b/engine/utils/engine_constants.h index c63a58ab9..5dab49936 100644 --- a/engine/utils/engine_constants.h +++ b/engine/utils/engine_constants.h @@ -9,7 +9,7 @@ constexpr const auto kLlamaRepo = "cortex.llamacpp"; constexpr const auto kTrtLlmRepo = "cortex.tensorrt-llm"; constexpr const auto kPythonRuntimeRepo = "cortex.python"; -constexpr const auto kLlamaLibPath = "/engines/cortex.llamacpp"; +constexpr const auto kLlamaLibPath = "./engines/cortex.llamacpp"; constexpr const auto kPythonRuntimeLibPath = "/engines/cortex.python"; constexpr const auto kOnnxLibPath = "/engines/cortex.onnx"; constexpr const auto kTensorrtLlmPath = "/engines/cortex.tensorrt-llm"; diff --git a/engine/utils/file_manager_utils.h b/engine/utils/file_manager_utils.h index 399afcfa6..72310385c 100644 --- a/engine/utils/file_manager_utils.h +++ b/engine/utils/file_manager_utils.h @@ -7,6 +7,7 @@ #include "utils/config_yaml_utils.h" #include "utils/engine_constants.h" #include "utils/result.hpp" +#include "utils/widechar_conv.h" #if defined(__APPLE__) && defined(__MACH__) #include @@ -14,6 +15,8 @@ #include #elif defined(_WIN32) #include +#include +#include #endif namespace file_manager_utils { @@ -55,8 +58,8 @@ inline std::filesystem::path GetExecutableFolderContainerPath() { return std::filesystem::current_path(); } #elif defined(_WIN32) - char buffer[MAX_PATH]; - GetModuleFileNameA(NULL, buffer, MAX_PATH); + wchar_t buffer[MAX_PATH]; + GetModuleFileNameW(NULL, buffer, MAX_PATH); // CTL_DBG("Executable path: " << buffer); return std::filesystem::path{buffer}.parent_path(); #else @@ -67,11 +70,11 @@ inline std::filesystem::path GetExecutableFolderContainerPath() { inline std::filesystem::path GetHomeDirectoryPath() { #ifdef _WIN32 - const char* homeDir = std::getenv("USERPROFILE"); + const wchar_t* homeDir = _wgetenv(L"USERPROFILE"); if (!homeDir) { // Fallback if USERPROFILE is not set - const char* homeDrive = std::getenv("HOMEDRIVE"); - const char* homePath = std::getenv("HOMEPATH"); + const wchar_t* homeDrive = _wgetenv(L"HOMEDRIVE"); + const wchar_t* homePath = _wgetenv(L"HOMEPATH"); if (homeDrive && homePath) { return std::filesystem::path(homeDrive) / std::filesystem::path(homePath); } else { @@ -103,8 +106,12 @@ inline std::filesystem::path GetConfigurationPath() { } if (config_file_path != kDefaultConfigurationPath) { - // CTL_INF("Config file path: " + config_file_path); +// CTL_INF("Config file path: " + config_file_path); +#if defined(_WIN32) + return std::filesystem::u8path(config_file_path); +#else return std::filesystem::path(config_file_path); +#endif } std::string variant{CORTEX_VARIANT}; @@ -162,11 +169,21 @@ inline config_yaml_utils::CortexConfig GetDefaultConfig() { : std::filesystem::path(cortex_data_folder_path); return config_yaml_utils::CortexConfig{ +#if defined(_WIN32) + .logFolderPath = + cortex::wc::WstringToUtf8(default_data_folder_path.wstring()), +#else .logFolderPath = default_data_folder_path.string(), +#endif .logLlamaCppPath = kLogsLlamacppBaseName, .logTensorrtLLMPath = kLogsTensorrtllmBaseName, .logOnnxPath = kLogsOnnxBaseName, +#if defined(_WIN32) + .dataFolderPath = + cortex::wc::WstringToUtf8(default_data_folder_path.wstring()), +#else .dataFolderPath = default_data_folder_path.string(), +#endif .maxLogLines = config_yaml_utils::kDefaultMaxLines, .apiServerHost = config_yaml_utils::kDefaultHost, .apiServerPort = config_yaml_utils::kDefaultPort, @@ -220,7 +237,11 @@ inline std::filesystem::path GetCortexDataPath() { auto config = GetCortexConfig(); std::filesystem::path data_folder_path; if (!config.dataFolderPath.empty()) { +#if defined(_WIN32) + data_folder_path = std::filesystem::u8path(config.dataFolderPath); +#else data_folder_path = std::filesystem::path(config.dataFolderPath); +#endif } else { auto home_path = GetHomeDirectoryPath(); data_folder_path = home_path / kCortexFolderName; diff --git a/engine/utils/system_info_utils.h b/engine/utils/system_info_utils.h index 6183c3095..013069699 100644 --- a/engine/utils/system_info_utils.h +++ b/engine/utils/system_info_utils.h @@ -87,8 +87,8 @@ inline std::unique_ptr GetSystemInfo() { inline bool IsNvidiaSmiAvailable() { #ifdef _WIN32 // Check if nvidia-smi.exe exists in the PATH on Windows - char buffer[MAX_PATH]; - if (SearchPath(NULL, "nvidia-smi.exe", NULL, MAX_PATH, buffer, NULL) != 0) { + wchar_t buffer[MAX_PATH]; + if (SearchPath(NULL, L"nvidia-smi.exe", NULL, MAX_PATH, buffer, NULL) != 0) { return true; } else { return false; diff --git a/engine/utils/widechar_conv.h b/engine/utils/widechar_conv.h new file mode 100644 index 000000000..e979be3c1 --- /dev/null +++ b/engine/utils/widechar_conv.h @@ -0,0 +1,49 @@ +#pragma once + +#if defined(_WIN32) +#include +#include + +namespace cortex::wc { + +inline std::string WstringToUtf8(const std::wstring& wstr) { + if (wstr.empty()) { + return std::string(); + } + + int size_needed = WideCharToMultiByte(CP_UTF8, 0, wstr.data(), (int)wstr.size(), NULL, 0, NULL, NULL); + if (size_needed <= 0) { + throw std::runtime_error("WideCharToMultiByte() failed: " + std::to_string(GetLastError())); + } + + std::string result(size_needed, 0); + int bytes_written = WideCharToMultiByte(CP_UTF8, 0, wstr.data(), (int)wstr.size(), &result[0], size_needed, NULL, NULL); + if (bytes_written <= 0) { + throw std::runtime_error("WideCharToMultiByte() failed: " + std::to_string(GetLastError())); + } + + return result; +} + +inline std::wstring Utf8ToWstring(const std::string& str) { + if (str.empty()) { + return std::wstring(); + } + + int size_needed = MultiByteToWideChar(CP_UTF8, 0, str.data(), (int)str.size(), NULL, 0); + if (size_needed <= 0) { + throw std::runtime_error("MultiByteToWideChar() failed: " + std::to_string(GetLastError())); + } + + std::wstring result(size_needed, 0); + int chars_written = MultiByteToWideChar(CP_UTF8, 0, str.data(), (int)str.size(), &result[0], size_needed); + if (chars_written <= 0) { + throw std::runtime_error("MultiByteToWideChar() failed: " + std::to_string(GetLastError())); + } + + return result; +} + +}; + +#endif \ No newline at end of file