diff --git a/.github/ISSUE_TEMPLATE/QA_checklist.md b/.github/ISSUE_TEMPLATE/QA_checklist.md index a0c68eb38..75123b4c4 100644 --- a/.github/ISSUE_TEMPLATE/QA_checklist.md +++ b/.github/ISSUE_TEMPLATE/QA_checklist.md @@ -70,7 +70,7 @@ OS (select one) ## Server - [ ] `cortex start` should start server and output localhost URL & port number - [ ] users can access API Swagger documentation page at localhost URL & port number -- [ ] `cortex start` can be configured with parameters (port, [logLevel [WIP]](https://github.com/janhq/cortex.cpp/pull/1636)) https://cortex.so/docs/cli/start/ +- [ ] `cortex start` can be configured with parameters (port, [logLevel [WIP]](https://github.com/menloresearch/cortex.cpp/pull/1636)) https://cortex.so/docs/cli/start/ - [ ] it should correctly log to cortex logs (logs/cortex.log, logs/cortex-cli.log) - [ ] `cortex ps` should return server status and running models (or no model loaded) - [ ] `cortex stop` should stop server diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index 56e11b10a..458241d42 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -3,5 +3,5 @@ blank_issues_enabled: true contact_links: - name: "\1F4AC Cortex Discussions" - url: "https://github.com/orgs/janhq/discussions/categories/q-a" + url: "https://github.com/orgs/menloresearch/discussions/categories/q-a" about: "Get help, discuss features & roadmap, and share your projects" \ No newline at end of file diff --git a/.github/workflows/beta-build.yml b/.github/workflows/beta-build.yml index dc99034f4..1bf324d96 100644 --- a/.github/workflows/beta-build.yml +++ b/.github/workflows/beta-build.yml @@ -145,20 +145,20 @@ jobs: message: | Cortex.cpp beta build artifact version ${{ env.VERSION }}: - Windows: - - Network Installer: https://github.com/janhq/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-windows-amd64-network-installer.exe - - Local Installer: https://github.com/janhq/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-windows-amd64-local-installer.exe - - Binary: https://github.com/janhq/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-windows-amd64.tar.gz + - Network Installer: https://github.com/menloresearch/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-windows-amd64-network-installer.exe + - Local Installer: https://github.com/menloresearch/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-windows-amd64-local-installer.exe + - Binary: https://github.com/menloresearch/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-windows-amd64.tar.gz - macOS Universal: - - Network Installer: https://github.com/janhq/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-mac-universal-network-installer.pkg - - Local Installer: https://github.com/janhq/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-mac-universal-local-installer.pkg - - Binary: https://github.com/janhq/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-mac-universal.tar.gz + - Network Installer: https://github.com/menloresearch/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-mac-universal-network-installer.pkg + - Local Installer: https://github.com/menloresearch/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-mac-universal-local-installer.pkg + - Binary: https://github.com/menloresearch/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-mac-universal.tar.gz - Linux amd64 Deb: - - Network Installer: https://github.com/janhq/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-linux-amd64-network-installer.deb - - Local Installer: https://github.com/janhq/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-linux-amd64-local-installer.deb - - Binary: https://github.com/janhq/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-linux-amd64.tar.gz + - Network Installer: https://github.com/menloresearch/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-linux-amd64-network-installer.deb + - Local Installer: https://github.com/menloresearch/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-linux-amd64-local-installer.deb + - Binary: https://github.com/menloresearch/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-linux-amd64.tar.gz - Linux arm64 Deb: - - Network Installer: https://github.com/janhq/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-linux-arm64-network-installer.deb - - Local Installer: https://github.com/janhq/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-linux-arm64-local-installer.deb - - Binary: https://github.com/janhq/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-linux-arm64.tar.gz + - Network Installer: https://github.com/menloresearch/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-linux-arm64-network-installer.deb + - Local Installer: https://github.com/menloresearch/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-linux-arm64-local-installer.deb + - Binary: https://github.com/menloresearch/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-linux-arm64.tar.gz - Docker: menloltd/cortex:beta-${{ env.VERSION }} - - Github Release: https://github.com/janhq/cortex.cpp/releases/tag/v${{ env.VERSION }} + - Github Release: https://github.com/menloresearch/cortex.cpp/releases/tag/v${{ env.VERSION }} diff --git a/.github/workflows/template-build-linux.yml b/.github/workflows/template-build-linux.yml index bca440440..3fa802ad4 100644 --- a/.github/workflows/template-build-linux.yml +++ b/.github/workflows/template-build-linux.yml @@ -169,23 +169,23 @@ jobs: mkdir -p engine/templates/linux/dependencies cd engine/templates/linux/dependencies if [ "${{ inputs.arch }}" == "amd64" ]; then - wget https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-avx-cuda-11-7.tar.gz - wget https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-avx-cuda-12-0.tar.gz - wget https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-avx.tar.gz - wget https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-avx2-cuda-11-7.tar.gz - wget https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-avx2-cuda-12-0.tar.gz - wget https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-avx2.tar.gz - wget https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-avx512-cuda-11-7.tar.gz - wget https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-avx512-cuda-12-0.tar.gz - wget https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-avx512.tar.gz - wget https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-noavx-cuda-11-7.tar.gz - wget https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-noavx-cuda-12-0.tar.gz - wget https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-noavx.tar.gz - wget https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-vulkan.tar.gz - wget https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cuda-11-7-linux-amd64.tar.gz - wget https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cuda-12-0-linux-amd64.tar.gz + # wget https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-avx-cuda-11-7.tar.gz + # wget https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-avx-cuda-12-0.tar.gz + # wget https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-avx.tar.gz + wget https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-avx2-cuda-11-7.tar.gz + wget https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-avx2-cuda-12-0.tar.gz + wget https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-avx2.tar.gz + # wget https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-avx512-cuda-11-7.tar.gz + # wget https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-avx512-cuda-12-0.tar.gz + # wget https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-avx512.tar.gz + wget https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-noavx-cuda-11-7.tar.gz + wget https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-noavx-cuda-12-0.tar.gz + wget https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-noavx.tar.gz + wget https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-vulkan.tar.gz + wget https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cuda-11-7-linux-amd64.tar.gz + wget https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cuda-12-0-linux-amd64.tar.gz else - wget https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-arm64.tar.gz + wget https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-arm64.tar.gz fi cd .. diff --git a/.github/workflows/template-build-macos.yml b/.github/workflows/template-build-macos.yml index ae10fb675..20c7430fb 100644 --- a/.github/workflows/template-build-macos.yml +++ b/.github/workflows/template-build-macos.yml @@ -289,8 +289,8 @@ jobs: run: | mkdir -p engine/templates/macos/Scripts/dependencies cd engine/templates/macos/Scripts/dependencies - wget https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-mac-arm64.tar.gz - wget https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-mac-amd64.tar.gz + wget https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-mac-arm64.tar.gz + wget https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-mac-amd64.tar.gz cd ../../ chmod +x create_pkg_local.sh diff --git a/.github/workflows/template-build-windows-x64.yml b/.github/workflows/template-build-windows-x64.yml index d1f6f1333..b9e0c9937 100644 --- a/.github/workflows/template-build-windows-x64.yml +++ b/.github/workflows/template-build-windows-x64.yml @@ -205,21 +205,21 @@ jobs: run: | mkdir dependencies cd dependencies - wget.exe https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-windows-amd64-avx-cuda-11-7.tar.gz - wget.exe https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-windows-amd64-avx-cuda-12-0.tar.gz - wget.exe https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-windows-amd64-avx.tar.gz - wget.exe https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-windows-amd64-avx2-cuda-11-7.tar.gz - wget.exe https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-windows-amd64-avx2-cuda-12-0.tar.gz - wget.exe https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-windows-amd64-avx2.tar.gz - wget.exe https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-windows-amd64-avx512-cuda-11-7.tar.gz - wget.exe https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-windows-amd64-avx512-cuda-12-0.tar.gz - wget.exe https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-windows-amd64-avx512.tar.gz - wget.exe https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-windows-amd64-noavx-cuda-11-7.tar.gz - wget.exe https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-windows-amd64-noavx-cuda-12-0.tar.gz - wget.exe https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-windows-amd64-noavx.tar.gz - wget.exe https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-windows-amd64-vulkan.tar.gz - wget.exe https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cuda-11-7-windows-amd64.tar.gz - wget.exe https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cuda-12-0-windows-amd64.tar.gz + # wget.exe https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-windows-amd64-avx-cuda-11-7.tar.gz + # wget.exe https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-windows-amd64-avx-cuda-12-0.tar.gz + # wget.exe https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-windows-amd64-avx.tar.gz + wget.exe https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-windows-amd64-avx2-cuda-11-7.tar.gz + wget.exe https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-windows-amd64-avx2-cuda-12-0.tar.gz + wget.exe https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-windows-amd64-avx2.tar.gz + # wget.exe https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-windows-amd64-avx512-cuda-11-7.tar.gz + # wget.exe https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-windows-amd64-avx512-cuda-12-0.tar.gz + # wget.exe https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-windows-amd64-avx512.tar.gz + wget.exe https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-windows-amd64-noavx-cuda-11-7.tar.gz + wget.exe https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-windows-amd64-noavx-cuda-12-0.tar.gz + wget.exe https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-windows-amd64-noavx.tar.gz + wget.exe https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-windows-amd64-vulkan.tar.gz + wget.exe https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cuda-11-7-windows-amd64.tar.gz + wget.exe https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cuda-12-0-windows-amd64.tar.gz - name: Enable long paths run: | diff --git a/.github/workflows/template-cortex-llamacpp-latest-version.yml b/.github/workflows/template-cortex-llamacpp-latest-version.yml index 5135c55ab..610b1a89a 100644 --- a/.github/workflows/template-cortex-llamacpp-latest-version.yml +++ b/.github/workflows/template-cortex-llamacpp-latest-version.yml @@ -24,7 +24,7 @@ jobs: local max_retries=3 local tag while [ $retries -lt $max_retries ]; do - tag=$(curl -s https://api.github.com/repos/janhq/cortex.llamacpp/releases/latest | jq -r .tag_name) + tag=$(curl -s https://api.github.com/repos/menloresearch/cortex.llamacpp/releases/latest | jq -r .tag_name) if [ -n "$tag" ] && [ "$tag" != "null" ]; then echo $tag return diff --git a/.github/workflows/template-get-update-version.yml b/.github/workflows/template-get-update-version.yml index 7b715a6e0..aff50605f 100644 --- a/.github/workflows/template-get-update-version.yml +++ b/.github/workflows/template-get-update-version.yml @@ -31,7 +31,7 @@ jobs: local max_retries=3 local tag while [ $retries -lt $max_retries ]; do - tag=$(curl -s https://api.github.com/repos/janhq/cortex.cpp/releases/latest | jq -r .tag_name) + tag=$(curl -s https://api.github.com/repos/menloresearch/cortex.cpp/releases/latest | jq -r .tag_name) if [ -n "$tag" ] && [ "$tag" != "null" ]; then echo $tag return diff --git a/.github/workflows/template-noti-discord.yaml b/.github/workflows/template-noti-discord.yaml index 97a539e33..d25b37b92 100644 --- a/.github/workflows/template-noti-discord.yaml +++ b/.github/workflows/template-noti-discord.yaml @@ -43,4 +43,4 @@ jobs: - Local Installer: https://delta.jan.ai/cortex/v${{ env.VERSION }}/linux-amd64/cortex-${{ env.VERSION }}-linux-amd64-local-installer.deb - Binary: https://delta.jan.ai/cortex/v${{ env.VERSION }}/linux-amd64/cortex-nightly.tar.gz - Docker: menloltd/cortex:nightly-${{ env.VERSION }} - - Github action run: https://github.com/janhq/cortex.cpp/actions/runs/${{ env.RUNNER_ID }} + - Github action run: https://github.com/menloresearch/cortex.cpp/actions/runs/${{ env.RUNNER_ID }} diff --git a/BUILDING.md b/BUILDING.md index 47d246a03..ff51cba88 100644 --- a/BUILDING.md +++ b/BUILDING.md @@ -1,9 +1,9 @@ # Build Cortex.cpp from source -Firstly, clone the Cortex.cpp repository [here](https://github.com/janhq/cortex.cpp) and initialize the submodules: +Firstly, clone the Cortex.cpp repository [here](https://github.com/menloresearch/cortex.cpp) and initialize the submodules: ```bash -git clone https://github.com/janhq/cortex.cpp +git clone https://github.com/menloresearch/cortex.cpp cd cortex.cpp git submodule update --init --recursive ``` @@ -73,7 +73,7 @@ make -j4 1. Open Cortex.cpp repository in Codespaces or local devcontainer - [![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://codespaces.new/janhq/cortex.cpp?quickstart=1) + [![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://codespaces.new/menloresearch/cortex.cpp?quickstart=1) ```sh devcontainer up --workspace-folder . diff --git a/README.md b/README.md index 0feb179d3..5cd51ece1 100644 --- a/README.md +++ b/README.md @@ -5,17 +5,17 @@

- GitHub commit activity - Github Last Commit - Github Contributors + GitHub commit activity + Github Last Commit + Github Contributors Discord

DocsAPI Reference • - Changelog • - Issues • + Changelog • + IssuesCommunity

@@ -35,7 +35,7 @@ Cortex is the open-source brain for robots: vision, speech, language, tabular, a All other Linux distributions: ```bash -curl -s https://raw.githubusercontent.com/janhq/cortex/main/engine/templates/linux/install.sh | sudo bash +curl -s https://raw.githubusercontent.com/menloresearch/cortex/main/engine/templates/linux/install.sh | sudo bash ``` ### Start the Server @@ -144,7 +144,7 @@ cortex-nightly hardware activate - Quick troubleshooting: `cortex --help` - [Documentation](https://cortex.so/docs) - [Community Discord](https://discord.gg/FTk2MvZwJH) -- [Report Issues](https://github.com/janhq/cortex.cpp/issues) +- [Report Issues](https://github.com/menloresearch/cortex.cpp/issues) --- @@ -182,6 +182,6 @@ The script to uninstall Cortex comes with the binary and was added to the `/usr/ ## Contact Support -- For support, please file a [GitHub ticket](https://github.com/janhq/cortex.cpp/issues/new/choose). +- For support, please file a [GitHub ticket](https://github.com/menloresearch/cortex.cpp/issues/new/choose). - For questions, join our Discord [here](https://discord.gg/FTk2MvZwJH). - For long-form inquiries, please email [hello@jan.ai](mailto:hello@jan.ai). \ No newline at end of file diff --git a/docker/README.md b/docker/README.md index 89b5c3365..2f0022afb 100644 --- a/docker/README.md +++ b/docker/README.md @@ -25,7 +25,7 @@ docker pull menloltd/cortex:nightly-1.0.1-224 - Build and Run Cortex Docker Container from Dockerfile ```bash -git clone https://github.com/janhq/cortex.cpp.git +git clone https://github.com/menloresearch/cortex.cpp.git cd cortex.cpp git submodule update --init diff --git a/docker/download-cortex.llamacpp.sh b/docker/download-cortex.llamacpp.sh index 8e6a15df2..51471c342 100644 --- a/docker/download-cortex.llamacpp.sh +++ b/docker/download-cortex.llamacpp.sh @@ -4,7 +4,7 @@ VERSION=${1:-latest} # Get the latest version of the cortex.llamacpp if [ "$VERSION" = "latest" ]; then - VERSION=$(curl -s https://api.github.com/repos/janhq/cortex.llamacpp/releases/latest | jq -r '.tag_name' | sed 's/^v//'); + VERSION=$(curl -s https://api.github.com/repos/menloresearch/cortex.llamacpp/releases/latest | jq -r '.tag_name' | sed 's/^v//'); fi # Create the directory to store the cortex.llamacpp @@ -13,18 +13,18 @@ cd /opt/cortex.llamacpp # Download the cortex.llamacpp engines echo -e "Downloading Cortex Llama version $VERSION" -wget https://github.com/janhq/cortex.llamacpp/releases/download/v$VERSION/cortex.llamacpp-$VERSION-linux-amd64-avx-cuda-11-7.tar.gz -wget https://github.com/janhq/cortex.llamacpp/releases/download/v$VERSION/cortex.llamacpp-$VERSION-linux-amd64-avx-cuda-12-0.tar.gz -wget https://github.com/janhq/cortex.llamacpp/releases/download/v$VERSION/cortex.llamacpp-$VERSION-linux-amd64-avx.tar.gz -wget https://github.com/janhq/cortex.llamacpp/releases/download/v$VERSION/cortex.llamacpp-$VERSION-linux-amd64-avx2-cuda-11-7.tar.gz -wget https://github.com/janhq/cortex.llamacpp/releases/download/v$VERSION/cortex.llamacpp-$VERSION-linux-amd64-avx2-cuda-12-0.tar.gz -wget https://github.com/janhq/cortex.llamacpp/releases/download/v$VERSION/cortex.llamacpp-$VERSION-linux-amd64-avx2.tar.gz -wget https://github.com/janhq/cortex.llamacpp/releases/download/v$VERSION/cortex.llamacpp-$VERSION-linux-amd64-avx512-cuda-11-7.tar.gz -wget https://github.com/janhq/cortex.llamacpp/releases/download/v$VERSION/cortex.llamacpp-$VERSION-linux-amd64-avx512-cuda-12-0.tar.gz -wget https://github.com/janhq/cortex.llamacpp/releases/download/v$VERSION/cortex.llamacpp-$VERSION-linux-amd64-avx512.tar.gz -wget https://github.com/janhq/cortex.llamacpp/releases/download/v$VERSION/cortex.llamacpp-$VERSION-linux-amd64-noavx-cuda-11-7.tar.gz -wget https://github.com/janhq/cortex.llamacpp/releases/download/v$VERSION/cortex.llamacpp-$VERSION-linux-amd64-noavx-cuda-12-0.tar.gz -wget https://github.com/janhq/cortex.llamacpp/releases/download/v$VERSION/cortex.llamacpp-$VERSION-linux-amd64-noavx.tar.gz -wget https://github.com/janhq/cortex.llamacpp/releases/download/v$VERSION/cortex.llamacpp-$VERSION-linux-amd64-vulkan.tar.gz -wget https://github.com/janhq/cortex.llamacpp/releases/download/v$VERSION/cuda-11-7-linux-amd64.tar.gz -wget https://github.com/janhq/cortex.llamacpp/releases/download/v$VERSION/cuda-12-0-linux-amd64.tar.gz \ No newline at end of file +wget https://github.com/menloresearch/cortex.llamacpp/releases/download/v$VERSION/cortex.llamacpp-$VERSION-linux-amd64-avx-cuda-11-7.tar.gz +wget https://github.com/menloresearch/cortex.llamacpp/releases/download/v$VERSION/cortex.llamacpp-$VERSION-linux-amd64-avx-cuda-12-0.tar.gz +wget https://github.com/menloresearch/cortex.llamacpp/releases/download/v$VERSION/cortex.llamacpp-$VERSION-linux-amd64-avx.tar.gz +wget https://github.com/menloresearch/cortex.llamacpp/releases/download/v$VERSION/cortex.llamacpp-$VERSION-linux-amd64-avx2-cuda-11-7.tar.gz +wget https://github.com/menloresearch/cortex.llamacpp/releases/download/v$VERSION/cortex.llamacpp-$VERSION-linux-amd64-avx2-cuda-12-0.tar.gz +wget https://github.com/menloresearch/cortex.llamacpp/releases/download/v$VERSION/cortex.llamacpp-$VERSION-linux-amd64-avx2.tar.gz +wget https://github.com/menloresearch/cortex.llamacpp/releases/download/v$VERSION/cortex.llamacpp-$VERSION-linux-amd64-avx512-cuda-11-7.tar.gz +wget https://github.com/menloresearch/cortex.llamacpp/releases/download/v$VERSION/cortex.llamacpp-$VERSION-linux-amd64-avx512-cuda-12-0.tar.gz +wget https://github.com/menloresearch/cortex.llamacpp/releases/download/v$VERSION/cortex.llamacpp-$VERSION-linux-amd64-avx512.tar.gz +wget https://github.com/menloresearch/cortex.llamacpp/releases/download/v$VERSION/cortex.llamacpp-$VERSION-linux-amd64-noavx-cuda-11-7.tar.gz +wget https://github.com/menloresearch/cortex.llamacpp/releases/download/v$VERSION/cortex.llamacpp-$VERSION-linux-amd64-noavx-cuda-12-0.tar.gz +wget https://github.com/menloresearch/cortex.llamacpp/releases/download/v$VERSION/cortex.llamacpp-$VERSION-linux-amd64-noavx.tar.gz +wget https://github.com/menloresearch/cortex.llamacpp/releases/download/v$VERSION/cortex.llamacpp-$VERSION-linux-amd64-vulkan.tar.gz +wget https://github.com/menloresearch/cortex.llamacpp/releases/download/v$VERSION/cuda-11-7-linux-amd64.tar.gz +wget https://github.com/menloresearch/cortex.llamacpp/releases/download/v$VERSION/cuda-12-0-linux-amd64.tar.gz \ No newline at end of file diff --git a/docs/docs/architecture/cortex-db.mdx b/docs/docs/architecture/cortex-db.mdx index 6182c74f4..42583825d 100644 --- a/docs/docs/architecture/cortex-db.mdx +++ b/docs/docs/architecture/cortex-db.mdx @@ -23,7 +23,6 @@ The `schema_version` table is designed to hold schema version for cortex databas |--------------------|-----------|---------------------------------------------------------| | version | INTEGER | A unique schema version for database. | - ### models Table The `models` table is designed to hold metadata about various AI models. Below is the structure of the table: diff --git a/docs/docs/basic-usage/cortex-js.md b/docs/docs/basic-usage/cortex-js.md index 698e9e011..fbd687e6d 100644 --- a/docs/docs/basic-usage/cortex-js.md +++ b/docs/docs/basic-usage/cortex-js.md @@ -7,7 +7,7 @@ description: How to use the Cortex.js Library 🚧 Cortex.js is currently under development, and this page is a stub for future development. ::: -[Cortex.js](https://github.com/janhq/cortex.js) is a Typescript client library that can be used to +[Cortex.js](https://github.com/menloresearch/cortex.js) is a Typescript client library that can be used to interact with the Cortex API. It is a fork of the OpenAI Typescript library with additional methods for Local AI. This is still a work in progress, and we will let the community know once a stable version is available. @@ -20,7 +20,7 @@ Cortex.cpp can be used in a Typescript application with the `cortex.js` library. ## Installation ```ts -npm install @janhq/cortexso-node +npm install @menloresearch/cortexso-node ``` ## Usage @@ -29,7 +29,7 @@ npm install @janhq/cortexso-node ```diff - import OpenAI from 'openai'; -+ import Cortex from '@janhq/cortexso-node'; ++ import Cortex from '@menloresearch/cortexso-node'; ``` 2. Modify the initialization of the client to use Cortex.cpp: @@ -46,7 +46,7 @@ npm install @janhq/cortexso-node ### Example Usage ```js -import Cortex from "@janhq/cortexso-node"; +import Cortex from "@menloresearch/cortexso-node"; async function inference() { const cortex = new Cortex({ diff --git a/docs/docs/basic-usage/cortex-py.md b/docs/docs/basic-usage/cortex-py.md index cdc540d71..55a5b0340 100644 --- a/docs/docs/basic-usage/cortex-py.md +++ b/docs/docs/basic-usage/cortex-py.md @@ -17,7 +17,7 @@ Cortex.cpp can be used in a Python application with the `cortex.py` library. Cor ## Installation ```py -pip install @janhq/cortex-python +pip install @menloresearch/cortex-python ``` ## Usage @@ -26,7 +26,7 @@ pip install @janhq/cortex-python ```diff - from openai import OpenAI -+ from @janhq/cortex-python import Cortex ++ from @menloresearch/cortex-python import Cortex ``` 2. Modify the initialization of the client to use Cortex.cpp: @@ -40,7 +40,7 @@ pip install @janhq/cortex-python ### Example Usage ```py -from @janhq/cortex-python import Cortex +from @menloresearch/cortex-python import Cortex client = OpenAI(base_url="http://localhost:3928", api_key="cortex") diff --git a/docs/docs/cortex-llamacpp.mdx b/docs/docs/cortex-llamacpp.mdx index db2085eb0..50c1bf880 100644 --- a/docs/docs/cortex-llamacpp.mdx +++ b/docs/docs/cortex-llamacpp.mdx @@ -77,7 +77,7 @@ The command will check, download, and install these dependencies: :::info -To include `llamacpp` in your own server implementation, follow the steps [here](https://github.com/janhq/llamacpp/tree/main/examples/server). +To include `llamacpp` in your own server implementation, follow the steps [here](https://github.com/menloresearch/llamacpp/tree/main/examples/server). ::: #### Get GGUF Models @@ -184,5 +184,5 @@ The future plans for `llamacpp` are focused on enhancing performance and expandi - **Multimodal Model Compatibility**: Expanding support to include a variety of multimodal models, enabling a broader range of applications and use cases. :::info -To follow the latest developments of `llamacpp`, please see the [GitHub Repository](https://github.com/janhq/llamacpp). +To follow the latest developments of `llamacpp`, please see the [GitHub Repository](https://github.com/menloresearch/llamacpp). ::: diff --git a/docs/docs/cortex-onnx.mdx b/docs/docs/cortex-onnx.mdx index 1720c6e3a..b7e363b9a 100644 --- a/docs/docs/cortex-onnx.mdx +++ b/docs/docs/cortex-onnx.mdx @@ -40,7 +40,7 @@ The command will check, download, and install these dependencies for Windows: - vcruntime140_1.dll ``` :::info -To include `onnx` in your own server implementation, follow the steps [here](https://github.com/janhq/onnx/tree/main/examples/server). +To include `onnx` in your own server implementation, follow the steps [here](https://github.com/menloresearch/onnx/tree/main/examples/server). ::: #### Get ONNX Models diff --git a/docs/docs/cortex-tensorrt-llm.mdx b/docs/docs/cortex-tensorrt-llm.mdx index c6dc1859d..36269c9dc 100644 --- a/docs/docs/cortex-tensorrt-llm.mdx +++ b/docs/docs/cortex-tensorrt-llm.mdx @@ -13,7 +13,7 @@ import TabItem from "@theme/TabItem"; ## Introduction -[Cortex.tensorrt-llm](https://github.com/janhq/tensorrt-llm) is a C++ inference library for NVIDIA GPUs. It submodules NVIDIA’s [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM) for GPU accelerated inference. +[Cortex.tensorrt-llm](https://github.com/menloresearch/tensorrt-llm) is a C++ inference library for NVIDIA GPUs. It submodules NVIDIA’s [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM) for GPU accelerated inference. In addition to TensorRT-LLM, `tensorrt-llm` adds: @@ -58,7 +58,7 @@ The command will check, download, and install these dependencies: :::info -To include `tensorrt-llm` in your own server implementation, follow the steps [here](https://github.com/janhq/tensorrt-llm/tree/rel). +To include `tensorrt-llm` in your own server implementation, follow the steps [here](https://github.com/menloresearch/tensorrt-llm/tree/rel). ::: #### Get TensorRT-LLM Models diff --git a/docs/docs/engines/engine-extension.mdx b/docs/docs/engines/engine-extension.mdx index 6bb966f60..d2edde830 100644 --- a/docs/docs/engines/engine-extension.mdx +++ b/docs/docs/engines/engine-extension.mdx @@ -163,7 +163,7 @@ Please ensure all dependencies are included with your dynamic library. This allo #### 4.1 Publishing Your Engine (Optional) -If you wish to make your engine publicly available, you can publish it through GitHub. For reference, examine the [cortex.llamacpp releases](https://github.com/janhq/cortex.llamacpp/releases) structure: +If you wish to make your engine publicly available, you can publish it through GitHub. For reference, examine the [cortex.llamacpp releases](https://github.com/menloresearch/cortex.llamacpp/releases) structure: - Each release tag should represent your version - Include all variants within the same release diff --git a/docs/docs/engines/python-engine.mdx b/docs/docs/engines/python-engine.mdx index 64996406d..5839a346c 100644 --- a/docs/docs/engines/python-engine.mdx +++ b/docs/docs/engines/python-engine.mdx @@ -75,7 +75,7 @@ extra_params: ## Example: Ichigo Python Model -[Ichigo python](https://github.com/janhq/ichigo) is a built-in Cortex model for chat with audio support. +[Ichigo python](https://github.com/menloresearch/ichigo) is a built-in Cortex model for chat with audio support. ### Required Models @@ -240,7 +240,7 @@ if __name__ == "__main__": 1. Create model files following the example above 2. Add required `requirements.txt` and `requirements.cuda.txt` files -3. Trigger the [Python Script Package CI](https://github.com/janhq/cortex.cpp/actions/workflows/python-script-package.yml) -4. Trigger the [Python Venv Package CI](https://github.com/janhq/cortex.cpp/actions/workflows/python-venv-package.yml) +3. Trigger the [Python Script Package CI](https://github.com/menloresearch/cortex.cpp/actions/workflows/python-script-package.yml) +4. Trigger the [Python Venv Package CI](https://github.com/menloresearch/cortex.cpp/actions/workflows/python-venv-package.yml) The CIs will build and publish your model to Hugging Face where it can then be downloaded and used. diff --git a/docs/docs/guides/function-calling.md b/docs/docs/guides/function-calling.md index 387cf9b89..6b9157f18 100644 --- a/docs/docs/guides/function-calling.md +++ b/docs/docs/guides/function-calling.md @@ -318,5 +318,5 @@ Use enums to improve function accuracy: - Function calling accuracy depends on model quality. Smaller models (8B-12B) work best with simple use cases. - Cortex.cpp implements function calling through prompt engineering, injecting system prompts when tools are specified. - Best compatibility with llama3.1 and derivatives (mistral-nemo, qwen) -- System prompts can be customized for specific use cases (see [implementation details](https://github.com/janhq/cortex.cpp/pull/1472/files)) -- For complete implementation examples, refer to our [detailed guide](https://github.com/janhq/models/issues/16#issuecomment-2381129322) +- System prompts can be customized for specific use cases (see [implementation details](https://github.com/menloresearch/cortex.cpp/pull/1472/files)) +- For complete implementation examples, refer to our [detailed guide](https://github.com/menloresearch/models/issues/16#issuecomment-2381129322) diff --git a/docs/docs/installation.mdx b/docs/docs/installation.mdx index acee4d5d0..fe1910d1b 100644 --- a/docs/docs/installation.mdx +++ b/docs/docs/installation.mdx @@ -32,7 +32,7 @@ For more information, please check out the [different channels](#different-chann | **Local** | **Stable** | **MacOS** | [Download](https://app.cortexcpp.com/download/latest/mac-universal-local) | :::info -For other versions, please look at [cortex.cpp repo](https://github.com/janhq/cortex.cpp) or each installer page. +For other versions, please look at [cortex.cpp repo](https://github.com/menloresearch/cortex.cpp) or each installer page. ::: diff --git a/docs/docs/installation/docker.mdx b/docs/docs/installation/docker.mdx index ffc485962..0a582f22f 100644 --- a/docs/docs/installation/docker.mdx +++ b/docs/docs/installation/docker.mdx @@ -59,7 +59,7 @@ docker pull menloltd/cortex:nightly-1.0.1-224 1. **Clone the repo:** ```bash -git clone https://github.com/janhq/cortex.cpp.git +git clone https://github.com/menloresearch/cortex.cpp.git cd cortex.cpp git submodule update --init ``` diff --git a/docs/docs/installation/linux.mdx b/docs/docs/installation/linux.mdx index a45c9cefe..debcbce1b 100644 --- a/docs/docs/installation/linux.mdx +++ b/docs/docs/installation/linux.mdx @@ -27,12 +27,12 @@ This instruction is for stable releases. For beta and nightly releases, please r - Network installer for all linux distros ```bash - curl -s https://raw.githubusercontent.com/janhq/cortex/main/engine/templates/linux/install.sh | sudo bash -s + curl -s https://raw.githubusercontent.com/menloresearch/cortex/main/engine/templates/linux/install.sh | sudo bash -s ``` - Local installer for Debian-based distros ```bash - curl -s https://raw.githubusercontent.com/janhq/cortex/main/engine/templates/linux/install.sh | sudo bash -s -- --deb_local + curl -s https://raw.githubusercontent.com/menloresearch/cortex/main/engine/templates/linux/install.sh | sudo bash -s -- --deb_local ``` - Parameters @@ -83,7 +83,7 @@ sudo /usr/bin/cortex-uninstall.sh 1. **Clone the Cortex Repository** ```bash - git clone https://github.com/janhq/cortex.cpp.git + git clone https://github.com/menloresearch/cortex.cpp.git cd cortex.cpp git submodule update --init ``` diff --git a/docs/docs/installation/mac.mdx b/docs/docs/installation/mac.mdx index b1e8b5e2b..646a30571 100644 --- a/docs/docs/installation/mac.mdx +++ b/docs/docs/installation/mac.mdx @@ -13,7 +13,7 @@ The instructions below are for stable releases only. For beta and nightly releas ::: 1. Download the Linux installer: -- From release: https://github.com/janhq/cortex.cpp/releases +- From release: https://github.com/menloresearch/cortex.cpp/releases - From quick download links: - Local installer `.deb`: - Stable: https://app.cortexcpp.com/download/latest/mac-universal-local @@ -80,7 +80,7 @@ The script requires sudo permission. 1. **Clone the Cortex Repository** ```bash - git clone https://github.com/janhq/cortex.cpp.git + git clone https://github.com/menloresearch/cortex.cpp.git cd cortex.cpp git submodule update --init ``` diff --git a/docs/docs/installation/windows.mdx b/docs/docs/installation/windows.mdx index f49fe2c78..eeb3c3be1 100644 --- a/docs/docs/installation/windows.mdx +++ b/docs/docs/installation/windows.mdx @@ -21,7 +21,7 @@ and `cortex-nightly`, respectively. ::: Download the windows installer: -- From release: https://github.com/janhq/cortex.cpp/releases +- From release: https://github.com/menloresearch/cortex.cpp/releases - From quick download links: - Local installer `.deb`: - Stable: https://app.cortexcpp.com/download/latest/windows-amd64-local @@ -77,7 +77,7 @@ Follow the [linux installation steps](linux) to install Cortex.cpp on the WSL. 1. **Clone the Cortex Repository** ```cmd - git clone https://github.com/janhq/cortex.cpp.git + git clone https://github.com/menloresearch/cortex.cpp.git cd cortex.cpp git submodule update --init ``` diff --git a/docs/docs/overview.mdx b/docs/docs/overview.mdx index 4a00b55ba..4d983d937 100644 --- a/docs/docs/overview.mdx +++ b/docs/docs/overview.mdx @@ -17,7 +17,7 @@ Key Features: - Full C++ implementation, packageable into Desktop and Mobile apps - Pull from Huggingface, or Cortex Built-in Model Library - Models stored in universal file formats (vs blobs) -- Swappable Inference Backends (default: [`llamacpp`](https://github.com/janhq/cortex.llamacpp) and [`ONNXRuntime`](https://github.com/janhq/cortex.onnx)) +- Swappable Inference Backends (default: [`llamacpp`](https://github.com/menloresearch/cortex.llamacpp) and [`ONNXRuntime`](https://github.com/menloresearch/cortex.onnx)) - Cortex can be deployed as a standalone API server, or integrated into apps like [Jan.ai](https://jan.ai/) - Automatic API docs for your server diff --git a/docs/docusaurus.config.ts b/docs/docusaurus.config.ts index 659e155d7..6e7820088 100644 --- a/docs/docusaurus.config.ts +++ b/docs/docusaurus.config.ts @@ -214,7 +214,7 @@ const config: Config = { async contentLoaded({ content, actions }) { const { setGlobalData } = actions; const fetchRepoInfo = await fetch( - "https://api.github.com/repos/janhq/cortex.cpp" + "https://api.github.com/repos/menloresearch/cortex.cpp" ); const repoInfo = await fetchRepoInfo.json(); setGlobalData(repoInfo); @@ -227,7 +227,7 @@ const config: Config = { async contentLoaded({ content, actions }) { const { setGlobalData } = actions; const fetchLatestRelease = await fetch( - "https://api.github.com/repos/janhq/cortex.cpp/releases/latest" + "https://api.github.com/repos/menloresearch/cortex.cpp/releases/latest" ); const latestRelease = await fetchLatestRelease.json(); setGlobalData(latestRelease); @@ -310,7 +310,7 @@ const config: Config = { // GitHub pages deployment config. // If you aren't using GitHub pages, you don't need these. - organizationName: "janhq", // Usually your GitHub org/user name. + organizationName: "menloresearch", // Usually your GitHub org/user name. projectName: "cortex", // Usually your repo name. onBrokenLinks: "throw", @@ -342,7 +342,7 @@ const config: Config = { sidebarPath: "./sidebars.ts", // Please change this to your repo. // Remove this to remove the "edit this page" links. - editUrl: "https://github.com/janhq/cortex.cpp/blob/dev/docs/", + editUrl: "https://github.com/menloresearch/cortex.cpp/blob/dev/docs/", }, sitemap: { changefreq: "daily", @@ -451,7 +451,7 @@ const config: Config = { items: [ { label: "Github", - href: "https://github.com/janhq/cortex.cpp", + href: "https://github.com/menloresearch/cortex.cpp", }, { label: "Discord", diff --git a/docs/src/components/Announcement/index.tsx b/docs/src/components/Announcement/index.tsx index 31761a152..35e8d525f 100644 --- a/docs/src/components/Announcement/index.tsx +++ b/docs/src/components/Announcement/index.tsx @@ -7,7 +7,7 @@ const Announcement = () => {
🎉

- + {" "} Cortex.cpp v1.0 is now live on GitHub. Check it out! diff --git a/docs/src/components/SocialNavbar/index.tsx b/docs/src/components/SocialNavbar/index.tsx index efb9ed738..64ae08dc1 100644 --- a/docs/src/components/SocialNavbar/index.tsx +++ b/docs/src/components/SocialNavbar/index.tsx @@ -14,7 +14,7 @@ const SocialNavbar = () => {

diff --git a/docs/src/containers/Homepage/Download/CardDownload.tsx b/docs/src/containers/Homepage/Download/CardDownload.tsx index b02a481fd..2f1512382 100644 --- a/docs/src/containers/Homepage/Download/CardDownload.tsx +++ b/docs/src/containers/Homepage/Download/CardDownload.tsx @@ -85,7 +85,7 @@ export default function CardDownload({ lastRelease }: Props) { .replace("{tag}", tag); return { ...system, - href: `https://github.com/janhq/cortex/releases/download/${lastRelease.tag_name}/${downloadUrl}`, + href: `https://github.com/menloresearch/cortex/releases/download/${lastRelease.tag_name}/${downloadUrl}`, }; }); diff --git a/docs/static/huggingface/hub.json b/docs/static/huggingface/hub.json index 20eea4a0a..d7718974b 100644 --- a/docs/static/huggingface/hub.json +++ b/docs/static/huggingface/hub.json @@ -11,7 +11,7 @@ { "url": "https://huggingface.co/bartowski/Mistral-7B-Instruct-v0.3-GGUF/blob/main/Mistral-7B-Instruct-v0.3-Q4_K_M.gguf", "author": "Mistral AI", - "logo": "https://raw.githubusercontent.com/janhq/cortex-web/main/static/img/logos/mistral.svg", + "logo": "https://raw.githubusercontent.com/menloresearch/cortex-web/main/static/img/logos/mistral.svg", "model_name": "Mistral 7B Instruct v0.3 Q4_K_M GGUF", "note": "Small + Chat" }, diff --git a/docs/static/openapi/cortex.json b/docs/static/openapi/cortex.json index 8f378a83f..23970ef51 100644 --- a/docs/static/openapi/cortex.json +++ b/docs/static/openapi/cortex.json @@ -77,7 +77,9 @@ "oneOf": [ { "type": "string", - "enum": ["auto"] + "enum": [ + "auto" + ] }, { "type": "object" @@ -85,7 +87,9 @@ ] } }, - "required": ["model"] + "required": [ + "model" + ] } } } @@ -104,7 +108,9 @@ }, "object": { "type": "string", - "enum": ["assistant"], + "enum": [ + "assistant" + ], "description": "The object type, which is always 'assistant'." }, "created_at": { @@ -175,7 +181,9 @@ "oneOf": [ { "type": "string", - "enum": ["auto"] + "enum": [ + "auto" + ] }, { "type": "object" @@ -195,7 +203,9 @@ } } }, - "tags": ["Assistants"] + "tags": [ + "Assistants" + ] }, "patch": { "operationId": "AssistantsController_update", @@ -218,7 +228,9 @@ "description": "Beta feature header.", "schema": { "type": "string", - "enum": ["assistants=v2"] + "enum": [ + "assistants=v2" + ] } } ], @@ -293,7 +305,9 @@ "oneOf": [ { "type": "string", - "enum": ["auto"] + "enum": [ + "auto" + ] }, { "type": "object" @@ -320,7 +334,9 @@ }, "object": { "type": "string", - "enum": ["assistant"], + "enum": [ + "assistant" + ], "description": "The object type, which is always 'assistant'." }, "created_at": { @@ -391,7 +407,9 @@ "oneOf": [ { "type": "string", - "enum": ["auto"] + "enum": [ + "auto" + ] }, { "type": "object" @@ -411,7 +429,9 @@ } } }, - "tags": ["Assistants"] + "tags": [ + "Assistants" + ] }, "get": { "operationId": "AssistantsController_list", @@ -427,7 +447,9 @@ "properties": { "object": { "type": "string", - "enum": ["list"], + "enum": [ + "list" + ], "description": "The object type, which is always 'list' for a list response." }, "data": { @@ -441,7 +463,9 @@ }, "object": { "type": "string", - "enum": ["assistant"], + "enum": [ + "assistant" + ], "description": "The object type, which is always 'assistant'." }, "created_at": { @@ -468,13 +492,18 @@ } } }, - "required": ["object", "data"] + "required": [ + "object", + "data" + ] } } } } }, - "tags": ["Assistants"] + "tags": [ + "Assistants" + ] } }, "/v1/assistants/{id}": { @@ -499,7 +528,9 @@ "description": "Beta feature header.", "schema": { "type": "string", - "enum": ["assistants=v2"] + "enum": [ + "assistants=v2" + ] } } ], @@ -517,7 +548,9 @@ }, "object": { "type": "string", - "enum": ["assistant"], + "enum": [ + "assistant" + ], "description": "The object type, which is always 'assistant'." }, "created_at": { @@ -546,7 +579,9 @@ } } }, - "tags": ["Assistants"] + "tags": [ + "Assistants" + ] }, "delete": { "operationId": "AssistantsController_remove", @@ -577,22 +612,32 @@ }, "object": { "type": "string", - "enum": ["assistant.deleted"], + "enum": [ + "assistant.deleted" + ], "description": "The object type for a deleted assistant." }, "deleted": { "type": "boolean", - "enum": [true], + "enum": [ + true + ], "description": "Indicates the assistant was successfully deleted." } }, - "required": ["id", "object", "deleted"] + "required": [ + "id", + "object", + "deleted" + ] } } } } }, - "tags": ["Assistants"] + "tags": [ + "Assistants" + ] } }, "/healthz": { @@ -609,7 +654,9 @@ } } }, - "tags": ["Server"] + "tags": [ + "Server" + ] } }, "/processManager/destroy": { @@ -626,7 +673,9 @@ } } }, - "tags": ["Server"] + "tags": [ + "Server" + ] } }, "/v1/embeddings": { @@ -681,11 +730,17 @@ "encoding_format": { "type": "string", "description": "The format to return the embeddings in.", - "enum": ["float", "base64"], + "enum": [ + "float", + "base64" + ], "default": "float" } }, - "required": ["input", "model"] + "required": [ + "input", + "model" + ] } } } @@ -728,7 +783,9 @@ } } }, - "tags": ["Embeddings"] + "tags": [ + "Embeddings" + ] } }, "/v1/chat/completions": { @@ -768,7 +825,9 @@ } } }, - "tags": ["Chat"] + "tags": [ + "Chat" + ] } }, "/v1/models/pull": { @@ -867,10 +926,14 @@ } } }, - "tags": ["Pulling Models"] + "tags": [ + "Pulling Models" + ] }, "delete": { - "tags": ["Pulling Models"], + "tags": [ + "Pulling Models" + ], "summary": "Stop model download", "description": "Stops the download of a model with the corresponding taskId provided in the request body", "operationId": "ModelsController_stopModelDownload", @@ -886,7 +949,9 @@ "description": "The unique identifier of the download task to be stopped" } }, - "required": ["taskId"] + "required": [ + "taskId" + ] } } } @@ -1027,7 +1092,9 @@ } } }, - "tags": ["Pulling Models"] + "tags": [ + "Pulling Models" + ] } }, "/v1/models": { @@ -1048,7 +1115,9 @@ } } }, - "tags": ["Running Models"] + "tags": [ + "Running Models" + ] } }, "/v1/models/start": { @@ -1081,7 +1150,9 @@ } } }, - "tags": ["Running Models"] + "tags": [ + "Running Models" + ] } }, "/v1/models/stop": { @@ -1114,7 +1185,9 @@ } } }, - "tags": ["Running Models"] + "tags": [ + "Running Models" + ] } }, "/v1/models/{id}": { @@ -1145,7 +1218,9 @@ } } }, - "tags": ["Running Models"] + "tags": [ + "Running Models" + ] }, "delete": { "operationId": "ModelsController_remove", @@ -1174,7 +1249,9 @@ } } }, - "tags": ["Running Models"] + "tags": [ + "Running Models" + ] } }, "/v1/models/{model}": { @@ -1214,7 +1291,9 @@ } } }, - "tags": ["Running Models"] + "tags": [ + "Running Models" + ] } }, "/v1/models/import": { @@ -1255,7 +1334,9 @@ } } }, - "tags": ["Pulling Models"] + "tags": [ + "Pulling Models" + ] } }, "/v1/models/sources": { @@ -1297,7 +1378,9 @@ } } }, - "tags": ["Pulling Models"] + "tags": [ + "Pulling Models" + ] }, "delete": { "summary": "Remove a model source", @@ -1354,7 +1437,9 @@ } } }, - "tags": ["Pulling Models"] + "tags": [ + "Pulling Models" + ] } }, "/v1/threads": { @@ -1419,7 +1504,11 @@ "description": "Type of object, always 'thread'" } }, - "required": ["created_at", "id", "object"] + "required": [ + "created_at", + "id", + "object" + ] }, "example": { "created_at": 1734020845, @@ -1433,7 +1522,9 @@ } } }, - "tags": ["Threads"] + "tags": [ + "Threads" + ] }, "get": { "summary": "List Threads", @@ -1483,11 +1574,18 @@ "description": "Type of object, always 'thread'" } }, - "required": ["created_at", "id", "object"] + "required": [ + "created_at", + "id", + "object" + ] } } }, - "required": ["object", "data"] + "required": [ + "object", + "data" + ] }, "example": { "data": [ @@ -1514,7 +1612,9 @@ } } }, - "tags": ["Threads"] + "tags": [ + "Threads" + ] } }, "/v1/threads/{id}": { @@ -1567,7 +1667,11 @@ "description": "Type of object, always 'thread'" } }, - "required": ["created_at", "id", "object"] + "required": [ + "created_at", + "id", + "object" + ] }, "example": { "created_at": 1732370026, @@ -1582,7 +1686,9 @@ } } }, - "tags": ["Threads"] + "tags": [ + "Threads" + ] }, "patch": { "summary": "Modify Thread", @@ -1656,7 +1762,11 @@ "description": "Type of object, always 'thread'" } }, - "required": ["created_at", "id", "object"] + "required": [ + "created_at", + "id", + "object" + ] }, "example": { "created_at": 1733301054, @@ -1670,7 +1780,9 @@ } } }, - "tags": ["Threads"] + "tags": [ + "Threads" + ] }, "delete": { "summary": "Delete Thread", @@ -1707,7 +1819,11 @@ "description": "Type of object, always 'thread.deleted'" } }, - "required": ["deleted", "id", "object"] + "required": [ + "deleted", + "id", + "object" + ] }, "example": { "deleted": true, @@ -1718,7 +1834,9 @@ } } }, - "tags": ["Threads"] + "tags": [ + "Threads" + ] } }, "/v1/threads/{thread_id}/messages": { @@ -1746,14 +1864,20 @@ "role": { "type": "string", "description": "Role of the message sender", - "enum": ["user", "assistant"] + "enum": [ + "user", + "assistant" + ] }, "content": { "type": "string", "description": "The content of the message" } }, - "required": ["role", "content"] + "required": [ + "role", + "content" + ] }, "example": { "role": "user", @@ -1793,12 +1917,17 @@ "role": { "type": "string", "description": "Role of the message sender", - "enum": ["user", "assistant"] + "enum": [ + "user", + "assistant" + ] }, "status": { "type": "string", "description": "Status of the message", - "enum": ["completed"] + "enum": [ + "completed" + ] }, "content": { "type": "array", @@ -1808,7 +1937,9 @@ "type": { "type": "string", "description": "Type of content", - "enum": ["text"] + "enum": [ + "text" + ] }, "text": { "type": "object", @@ -1865,7 +1996,9 @@ } } }, - "tags": ["Messages"] + "tags": [ + "Messages" + ] }, "get": { "summary": "List Messages", @@ -1896,7 +2029,10 @@ "description": "Sort order of messages", "schema": { "type": "string", - "enum": ["asc", "desc"] + "enum": [ + "asc", + "desc" + ] } }, { @@ -1964,12 +2100,17 @@ "role": { "type": "string", "description": "Role of the message sender", - "enum": ["assistant", "user"] + "enum": [ + "assistant", + "user" + ] }, "status": { "type": "string", "description": "Status of the message", - "enum": ["completed"] + "enum": [ + "completed" + ] }, "content": { "type": "array", @@ -1979,7 +2120,9 @@ "type": { "type": "string", "description": "Type of content", - "enum": ["text"] + "enum": [ + "text" + ] }, "text": { "type": "object", @@ -2037,7 +2180,10 @@ } } }, - "required": ["object", "data"] + "required": [ + "object", + "data" + ] }, "example": { "data": [ @@ -2066,7 +2212,9 @@ } } }, - "tags": ["Messages"] + "tags": [ + "Messages" + ] } }, "/v1/threads/{thread_id}/messages/{message_id}": { @@ -2120,12 +2268,17 @@ "role": { "type": "string", "description": "Role of the message sender", - "enum": ["assistant", "user"] + "enum": [ + "assistant", + "user" + ] }, "status": { "type": "string", "description": "Status of the message", - "enum": ["completed"] + "enum": [ + "completed" + ] }, "content": { "type": "array", @@ -2135,7 +2288,9 @@ "type": { "type": "string", "description": "Type of content", - "enum": ["text"] + "enum": [ + "text" + ] }, "text": { "type": "object", @@ -2223,7 +2378,9 @@ } } }, - "tags": ["Messages"] + "tags": [ + "Messages" + ] }, "patch": { "summary": "Modify Message", @@ -2306,12 +2463,17 @@ "role": { "type": "string", "description": "Role of the message sender", - "enum": ["user", "assistant"] + "enum": [ + "user", + "assistant" + ] }, "status": { "type": "string", "description": "Status of the message", - "enum": ["completed"] + "enum": [ + "completed" + ] }, "content": { "type": "array", @@ -2321,7 +2483,9 @@ "type": { "type": "string", "description": "Type of content", - "enum": ["text"] + "enum": [ + "text" + ] }, "text": { "type": "object", @@ -2381,7 +2545,9 @@ } } }, - "tags": ["Messages"] + "tags": [ + "Messages" + ] }, "delete": { "summary": "Delete Message", @@ -2427,7 +2593,11 @@ "description": "Type of object, always 'thread.message.deleted'" } }, - "required": ["deleted", "id", "object"] + "required": [ + "deleted", + "id", + "object" + ] }, "example": { "deleted": true, @@ -2438,7 +2608,9 @@ } } }, - "tags": ["Messages"] + "tags": [ + "Messages" + ] } }, "/v1/system": { @@ -2452,7 +2624,9 @@ "description": "" } }, - "tags": ["System"] + "tags": [ + "System" + ] }, "get": { "operationId": "SystemController_get", @@ -2464,7 +2638,9 @@ "description": "Ok" } }, - "tags": ["System"] + "tags": [ + "System" + ] } }, "/v1/system/events/download": { @@ -2485,7 +2661,9 @@ } } }, - "tags": ["System"] + "tags": [ + "System" + ] } }, "/v1/system/events/model": { @@ -2506,7 +2684,9 @@ } } }, - "tags": ["System"] + "tags": [ + "System" + ] } }, "/v1/system/events/resources": { @@ -2527,7 +2707,9 @@ } } }, - "tags": ["System"] + "tags": [ + "System" + ] } }, "/v1/engines/{name}": { @@ -2542,7 +2724,11 @@ "required": true, "schema": { "type": "string", - "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"], + "enum": [ + "llama-cpp", + "onnxruntime", + "tensorrt-llm" + ], "default": "llama-cpp" }, "description": "The type of engine" @@ -2589,7 +2775,9 @@ } } }, - "tags": ["Engines"] + "tags": [ + "Engines" + ] } }, "/v1/engines/{name}/releases": { @@ -2603,7 +2791,11 @@ "required": true, "schema": { "type": "string", - "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"], + "enum": [ + "llama-cpp", + "onnxruntime", + "tensorrt-llm" + ], "default": "llama-cpp" }, "description": "The type of engine" @@ -2638,7 +2830,7 @@ }, "url": { "type": "string", - "example": "https://api.github.com/repos/janhq/cortex.llamacpp/releases/186479804" + "example": "https://api.github.com/repos/menloresearch/cortex.llamacpp/releases/186479804" } } } @@ -2647,7 +2839,9 @@ } } }, - "tags": ["Engines"] + "tags": [ + "Engines" + ] } }, "/v1/engines/{name}/releases/{version}": { @@ -2661,7 +2855,11 @@ "required": true, "schema": { "type": "string", - "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"], + "enum": [ + "llama-cpp", + "onnxruntime", + "tensorrt-llm" + ], "default": "llama-cpp" }, "description": "The type of engine" @@ -2681,7 +2879,10 @@ "required": false, "schema": { "type": "string", - "enum": ["all", "compatible"], + "enum": [ + "all", + "compatible" + ], "default": "all" }, "description": "Filter the variants list. Use 'compatible' to show only variants compatible with the current system, or 'all' to show all available variants." @@ -2725,7 +2926,9 @@ } } }, - "tags": ["Engines"] + "tags": [ + "Engines" + ] } }, "/v1/engines/{name}/releases/latest": { @@ -2739,7 +2942,11 @@ "required": true, "schema": { "type": "string", - "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"], + "enum": [ + "llama-cpp", + "onnxruntime", + "tensorrt-llm" + ], "default": "llama-cpp" }, "description": "The type of engine" @@ -2779,7 +2986,9 @@ } } }, - "tags": ["Engines"] + "tags": [ + "Engines" + ] } }, "/v1/engines/{name}/install": { @@ -2870,7 +3079,9 @@ } } }, - "tags": ["Engines"] + "tags": [ + "Engines" + ] }, "delete": { "summary": "Uninstall an engine", @@ -2951,7 +3162,9 @@ } } }, - "tags": ["Engines"] + "tags": [ + "Engines" + ] } }, "/v1/engines/{name}/update": { @@ -2965,7 +3178,11 @@ "required": true, "schema": { "type": "string", - "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"], + "enum": [ + "llama-cpp", + "onnxruntime", + "tensorrt-llm" + ], "default": "llama-cpp" }, "description": "The name of the engine to update" @@ -2989,7 +3206,9 @@ } } }, - "tags": ["Engines"] + "tags": [ + "Engines" + ] } }, "/v1/engines/{name}/default": { @@ -3003,7 +3222,11 @@ "required": true, "schema": { "type": "string", - "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"], + "enum": [ + "llama-cpp", + "onnxruntime", + "tensorrt-llm" + ], "default": "llama-cpp" }, "description": "The type of engine" @@ -3035,7 +3258,9 @@ } } }, - "tags": ["Engines"] + "tags": [ + "Engines" + ] }, "post": { "summary": "Set default engine variant", @@ -3047,7 +3272,11 @@ "required": true, "schema": { "type": "string", - "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"], + "enum": [ + "llama-cpp", + "onnxruntime", + "tensorrt-llm" + ], "default": "llama-cpp" }, "description": "The type of engine" @@ -3059,7 +3288,10 @@ "application/json": { "schema": { "type": "object", - "required": ["version", "variant"], + "required": [ + "version", + "variant" + ], "properties": { "version": { "type": "string", @@ -3094,7 +3326,9 @@ } } }, - "tags": ["Engines"] + "tags": [ + "Engines" + ] } }, "/v1/engines/{name}/load": { @@ -3138,7 +3372,9 @@ } } }, - "tags": ["Engines"] + "tags": [ + "Engines" + ] }, "delete": { "summary": "Unload engine", @@ -3150,7 +3386,11 @@ "required": true, "schema": { "type": "string", - "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"], + "enum": [ + "llama-cpp", + "onnxruntime", + "tensorrt-llm" + ], "default": "llama-cpp" }, "description": "The name of the engine to update" @@ -3174,7 +3414,9 @@ } } }, - "tags": ["Engines"] + "tags": [ + "Engines" + ] } }, "/v1/hardware": { @@ -3216,7 +3458,9 @@ } } }, - "tags": ["Hardware"] + "tags": [ + "Hardware" + ] } }, "/v1/hardware/activate": { @@ -3235,11 +3479,17 @@ "items": { "type": "integer" }, - "example": [0, 1, 2], + "example": [ + 0, + 1, + 2 + ], "description": "An array of GPU indices to activate." } }, - "required": ["gpus"] + "required": [ + "gpus" + ] } } } @@ -3262,7 +3512,11 @@ "items": { "type": "integer" }, - "example": [0, 1, 2], + "example": [ + 0, + 1, + 2 + ], "description": "List of GPU indices that were activated." } } @@ -3288,7 +3542,9 @@ } } }, - "tags": ["Hardware"] + "tags": [ + "Hardware" + ] } }, "/v1/files": { @@ -3308,11 +3564,16 @@ }, "purpose": { "type": "string", - "enum": ["assistants"], + "enum": [ + "assistants" + ], "description": "The intended purpose of the uploaded file" } }, - "required": ["file", "purpose"] + "required": [ + "file", + "purpose" + ] } } } @@ -3355,7 +3616,9 @@ } } }, - "tags": ["Files"] + "tags": [ + "Files" + ] }, "get": { "summary": "List files", @@ -3410,7 +3673,9 @@ } } }, - "tags": ["Files"] + "tags": [ + "Files" + ] } }, "/v1/files/{id}": { @@ -3475,7 +3740,9 @@ } } }, - "tags": ["Files"] + "tags": [ + "Files" + ] }, "delete": { "summary": "Delete File", @@ -3512,7 +3779,11 @@ "description": "Type of object, always 'file'" } }, - "required": ["deleted", "id", "object"] + "required": [ + "deleted", + "id", + "object" + ] }, "example": { "deleted": true, @@ -3534,7 +3805,9 @@ "description": "Error message describing the issue" } }, - "required": ["message"] + "required": [ + "message" + ] }, "example": { "message": "File not found: file-0001KNP26FC62D620DGYNG2R8H" @@ -3543,7 +3816,9 @@ } } }, - "tags": ["Files"] + "tags": [ + "Files" + ] } }, "/v1/files/{id}/content": { @@ -3595,13 +3870,17 @@ "description": "Error message describing the issue" } }, - "required": ["message"] + "required": [ + "message" + ] } } } } }, - "tags": ["Files"] + "tags": [ + "Files" + ] } }, "/v1/configs": { @@ -3621,7 +3900,10 @@ "items": { "type": "string" }, - "example": ["http://127.0.0.1:39281", "https://cortex.so"] + "example": [ + "http://127.0.0.1:39281", + "https://cortex.so" + ] }, "cors": { "type": "boolean", @@ -3663,6 +3945,16 @@ "huggingface_token": { "type": "string", "example": "your_token" + }, + "api_keys": { + "type": "array", + "items": { + "type": "string" + }, + "example": [ + "api_key1", + "api_key2" + ] } } }, @@ -3680,16 +3972,24 @@ "verify_peer_ssl": false, "verify_host_ssl": false, "no_proxy": "localhost", - "huggingface_token": "your_token" + "huggingface_token": "your_token", + "api_keys": [ + "api_key1", + "api_key2" + ] } } } } }, - "tags": ["Configurations"] + "tags": [ + "Configurations" + ] }, "patch": { - "tags": ["Configurations"], + "tags": [ + "Configurations" + ], "summary": "Update configuration settings", "requestBody": { "required": true, @@ -3709,7 +4009,10 @@ "type": "string" }, "description": "List of allowed origins.", - "example": ["http://127.0.0.1:39281", "https://cortex.so"] + "example": [ + "http://127.0.0.1:39281", + "https://cortex.so" + ] }, "proxy_username": { "type": "string", @@ -3755,6 +4058,17 @@ "type": "string", "description": "HuggingFace token to pull models.", "example": "your_token" + }, + "api_keys": { + "type": "array", + "items": { + "type": "string" + }, + "description": "List of allowed origins.", + "example": [ + "api_key1", + "api_key2" + ] } } } @@ -3821,6 +4135,16 @@ "huggingface_token": { "type": "string", "example": "your_token" + }, + "api_keys": { + "type": "array", + "items": { + "type": "string" + }, + "example": [ + "api_key1", + "api_key2" + ] } } }, @@ -3973,13 +4297,18 @@ "properties": { "type": { "type": "string", - "enum": ["function"] + "enum": [ + "function" + ] }, "function": { "$ref": "#/components/schemas/Function" } }, - "required": ["type", "function"] + "required": [ + "type", + "function" + ] } }, "metadata": { @@ -4100,7 +4429,11 @@ "description": "Indicates whether the assistant was successfully deleted." } }, - "required": ["id", "object", "deleted"] + "required": [ + "id", + "object", + "deleted" + ] }, "Message": { "type": "object", @@ -4117,14 +4450,21 @@ "properties": { "role": { "type": "string", - "enum": ["system", "user", "assistant", "tool"] + "enum": [ + "system", + "user", + "assistant", + "tool" + ] }, "name": { "type": "string", "description": "An optional name for the participant. Provides the model information to differentiate between participants of the same role." } }, - "required": ["role"] + "required": [ + "role" + ] }, "SystemMessage": { "allOf": [ @@ -4153,7 +4493,10 @@ "description": "An optional name for the participant. Provides the model information to differentiate between participants of the same role." } }, - "required": ["content", "role"] + "required": [ + "content", + "role" + ] } ] }, @@ -4204,7 +4547,10 @@ "description": "An optional name for the participant. Provides the model information to differentiate between participants of the same role." } }, - "required": ["content", "role"] + "required": [ + "content", + "role" + ] } ] }, @@ -4316,7 +4662,10 @@ "type": "string" } }, - "required": ["content", "tool_call_id"] + "required": [ + "content", + "tool_call_id" + ] } ] }, @@ -4333,26 +4682,36 @@ "properties": { "type": { "type": "string", - "enum": ["text"] + "enum": [ + "text" + ] }, "text": { "type": "string" } }, - "required": ["type", "text"] + "required": [ + "type", + "text" + ] }, "ImageContentPart": { "type": "object", "properties": { "type": { "type": "string", - "enum": ["image_url"] + "enum": [ + "image_url" + ] }, "image_url": { "$ref": "#/components/schemas/ImageUrl" } }, - "required": ["type", "image_url"] + "required": [ + "type", + "image_url" + ] }, "AudioContentPart": { "type": "object", @@ -4365,7 +4724,10 @@ "$ref": "#/components/schemas/InputAudio" } }, - "required": ["type", "input_audio"] + "required": [ + "type", + "input_audio" + ] }, "RefusalContentPart": { "type": "object", @@ -4377,7 +4739,10 @@ "type": "string" } }, - "required": ["type", "refusal"] + "required": [ + "type", + "refusal" + ] }, "ImageUrl": { "type": "object", @@ -4392,7 +4757,9 @@ "description": "Specifies the detail level of the image. Defaults to `auto`." } }, - "required": ["url"] + "required": [ + "url" + ] }, "InputAudio": { "type": "object", @@ -4403,11 +4770,17 @@ }, "format": { "type": "string", - "enum": ["wav", "mp3"], + "enum": [ + "wav", + "mp3" + ], "description": "The format of the encoded audio data. Currently supports `wav` and `mp3`." } }, - "required": ["data", "format"] + "required": [ + "data", + "format" + ] }, "Audio": { "type": "object", @@ -4418,7 +4791,9 @@ "description": "Unique identifier for a previous audio response from the model." } }, - "required": ["id"] + "required": [ + "id" + ] }, "ToolCall": { "type": "object", @@ -4433,7 +4808,11 @@ "$ref": "#/components/schemas/FunctionCall" } }, - "required": ["id", "type", "function"] + "required": [ + "id", + "type", + "function" + ] }, "FunctionCall": { "type": "object", @@ -4445,7 +4824,10 @@ "type": "string" } }, - "required": ["name", "arguments"] + "required": [ + "name", + "arguments" + ] }, "CreateChatCompletionDto": { "type": "object", @@ -4499,7 +4881,9 @@ }, "stop": { "description": "Defines specific tokens or phrases that signal the model to stop producing further output.", - "example": ["End"], + "example": [ + "End" + ], "type": "array", "items": { "type": "string" @@ -4529,13 +4913,18 @@ "type": "array", "items": { "type": "string", - "enum": ["text", "audio"] + "enum": [ + "text", + "audio" + ] }, - "description": "Specifies the modalities (types of input) supported by the model. Currently, cortex only support text modalities. We are actively working on this feature to bring cortex as fully OpenAI compatible platform. Planning and roadmap for this feature can be found [**here**](https://github.com/janhq/cortex.cpp/issues/1582).", - "example": ["text"] + "description": "Specifies the modalities (types of input) supported by the model. Currently, cortex only support text modalities. We are actively working on this feature to bring cortex as fully OpenAI compatible platform. Planning and roadmap for this feature can be found [**here**](https://github.com/menloresearch/cortex.cpp/issues/1582).", + "example": [ + "text" + ] }, "audio": { - "description": "Parameters for audio output. Required when audio output is requested with `modalities: ['audio']`. We are actively working on this feature to bring cortex as fully OpenAI compatible platform. Planning and roadmap for this feature can be found [**here**](https://github.com/janhq/cortex.cpp/issues/1582).", + "description": "Parameters for audio output. Required when audio output is requested with `modalities: ['audio']`. We are actively working on this feature to bring cortex as fully OpenAI compatible platform. Planning and roadmap for this feature can be found [**here**](https://github.com/menloresearch/cortex.cpp/issues/1582).", "type": "object", "properties": { "voice": { @@ -4545,20 +4934,29 @@ "format": { "type": "string", "description": "Specifies the output audio format. Must be one of `wav`, `mp3`, `flac`, `opus`, or `pcm16`.", - "enum": ["mp3", "wav", "flac", "opus", "pcm16"] + "enum": [ + "mp3", + "wav", + "flac", + "opus", + "pcm16" + ] } }, - "required": ["voice", "format"] + "required": [ + "voice", + "format" + ] }, "store": { "type": "boolean", - "description": "Whether or not to store the output of this chat completion request for use in our model distillation or evals products. We are actively working on this feature to bring cortex as fully OpenAI compatible platform. Planning and roadmap for this feature can be found [**here**](https://github.com/janhq/cortex.cpp/issues/1582).", + "description": "Whether or not to store the output of this chat completion request for use in our model distillation or evals products. We are actively working on this feature to bring cortex as fully OpenAI compatible platform. Planning and roadmap for this feature can be found [**here**](https://github.com/menloresearch/cortex.cpp/issues/1582).", "default": false, "example": false }, "metadata": { "type": "object", - "description": "Developer-defined tags and values used for filtering completions in the dashboard. We are actively working on this feature to bring cortex as fully OpenAI compatible platform. Planning and roadmap for this feature can be found [**here**](https://github.com/janhq/cortex.cpp/issues/1582).", + "description": "Developer-defined tags and values used for filtering completions in the dashboard. We are actively working on this feature to bring cortex as fully OpenAI compatible platform. Planning and roadmap for this feature can be found [**here**](https://github.com/menloresearch/cortex.cpp/issues/1582).", "example": { "type": "conversation" } @@ -4590,15 +4988,21 @@ }, "response_format": { "type": "object", - "description": "An object specifying the format that the model must output. Setting to { \"type\": \"json_object\" } enables JSON mode, which guarantees the message the model generates is valid JSON. We are actively working on this feature to bring cortex as fully OpenAI compatible platform. Planning and roadmap for this feature can be found [**here**](https://github.com/janhq/cortex.cpp/issues/1582).", + "description": "An object specifying the format that the model must output. Setting to { \"type\": \"json_object\" } enables JSON mode, which guarantees the message the model generates is valid JSON. We are actively working on this feature to bring cortex as fully OpenAI compatible platform. Planning and roadmap for this feature can be found [**here**](https://github.com/menloresearch/cortex.cpp/issues/1582).", "properties": { "type": { "type": "string", "description": "The format of the generated output. Must be one of `text`, `json_schema` or `json_object`.", - "enum": ["text", "json_object", "json_schema"] + "enum": [ + "text", + "json_object", + "json_schema" + ] } }, - "required": ["type"] + "required": [ + "type" + ] }, "seed": { "type": "number", @@ -4608,7 +5012,7 @@ }, "service_tier": { "type": "string", - "description": "Specifies the latency tier to use for processing the request. This parameter is relevant for customers subscribed to the scale tier service:\n\n - If set to 'auto', and the Project is Scale tier enabled, the system will utilize scale tier credits until they are exhausted.\n- If set to 'auto', and the Project is not Scale tier enabled, the request will be processed using the default service tier with a lower uptime SLA and no latency guarentee.\n- If set to 'default', the request will be processed using the default service tier with a lower uptime SLA and no latency guarentee.\nWhen not set, the default behavior is 'auto'.\nWhen this parameter is set, the response body will include the service_tier utilized.\n\n We are actively working on this feature to bring cortex as fully OpenAI compatible platform. Planning and roadmap for this feature can be found [**here**](https://github.com/janhq/cortex.cpp/issues/1582)." + "description": "Specifies the latency tier to use for processing the request. This parameter is relevant for customers subscribed to the scale tier service:\n\n - If set to 'auto', and the Project is Scale tier enabled, the system will utilize scale tier credits until they are exhausted.\n- If set to 'auto', and the Project is not Scale tier enabled, the request will be processed using the default service tier with a lower uptime SLA and no latency guarentee.\n- If set to 'default', the request will be processed using the default service tier with a lower uptime SLA and no latency guarentee.\nWhen not set, the default behavior is 'auto'.\nWhen this parameter is set, the response body will include the service_tier utilized.\n\n We are actively working on this feature to bring cortex as fully OpenAI compatible platform. Planning and roadmap for this feature can be found [**here**](https://github.com/menloresearch/cortex.cpp/issues/1582)." }, "stream_options": { "type": "object", @@ -4630,27 +5034,38 @@ "properties": { "type": { "type": "string", - "enum": ["function"] + "enum": [ + "function" + ] }, "function": { "$ref": "#/components/schemas/Function" } }, - "required": ["type", "function"] + "required": [ + "type", + "function" + ] } }, "tool_choice": { "anyOf": [ { "type": "string", - "enum": ["none", "auto", "required"] + "enum": [ + "none", + "auto", + "required" + ] }, { "type": "object", "properties": { "type": { "type": "string", - "enum": ["function"] + "enum": [ + "function" + ] }, "function": { "type": "object", @@ -4659,10 +5074,15 @@ "type": "string" } }, - "required": ["name"] + "required": [ + "name" + ] } }, - "required": ["type", "function"] + "required": [ + "type", + "function" + ] } ] }, @@ -4674,7 +5094,7 @@ }, "user": { "type": "string", - "description": "A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. We are actively working on this feature to bring cortex as fully OpenAI compatible platform. Planning and roadmap for this feature can be found [**here**](https://github.com/janhq/cortex.cpp/issues/1582)." + "description": "A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. We are actively working on this feature to bring cortex as fully OpenAI compatible platform. Planning and roadmap for this feature can be found [**here**](https://github.com/menloresearch/cortex.cpp/issues/1582)." }, "dynatemp_range": { "type": "number", @@ -4737,7 +5157,10 @@ "description": "Minimum number of tokens to keep. This parameter only supported by `llama-cpp` engine." } }, - "required": ["messages", "model"] + "required": [ + "messages", + "model" + ] }, "Function": { "type": "object", @@ -4757,7 +5180,9 @@ "default": false } }, - "required": ["name"] + "required": [ + "name" + ] }, "MessageDto": { "type": "object", @@ -4771,7 +5196,10 @@ "description": "The role of the participant in the chat, such as 'user' or 'system', indicating who is the sender of the message." } }, - "required": ["content", "role"] + "required": [ + "content", + "role" + ] }, "ChoiceDto": { "type": "object", @@ -4793,7 +5221,11 @@ ] } }, - "required": ["finish_reason", "index", "message"] + "required": [ + "finish_reason", + "index", + "message" + ] }, "UsageDto": { "type": "object", @@ -4811,7 +5243,11 @@ "description": "The total number of tokens used in both the prompt and the completion, summarizing the entire token count of the chat operation." } }, - "required": ["completion_tokens", "prompt_tokens", "total_tokens"] + "required": [ + "completion_tokens", + "prompt_tokens", + "total_tokens" + ] }, "ChatCompletionResponseDto": { "type": "object", @@ -4838,11 +5274,17 @@ "type": "object", "properties": { "content": { - "type": ["string", "null"], + "type": [ + "string", + "null" + ], "description": "The contents of the message." }, "refusal": { - "type": ["string", "null"], + "type": [ + "string", + "null" + ], "description": "The refusal message generated by the model." }, "tool_calls": { @@ -4871,10 +5313,17 @@ "description": "The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function." } }, - "required": ["name", "arguments"] + "required": [ + "name", + "arguments" + ] } }, - "required": ["id", "type", "function"] + "required": [ + "id", + "type", + "function" + ] } }, "role": { @@ -4895,7 +5344,10 @@ "description": "The name of the function to call." } }, - "required": ["arguments", "name"] + "required": [ + "arguments", + "name" + ] }, "audio": { "type": "object", @@ -4918,17 +5370,27 @@ "description": "Transcript of the audio generated by the model." } }, - "required": ["id", "expires_at", "data", "transcript"] + "required": [ + "id", + "expires_at", + "data", + "transcript" + ] } }, - "required": ["role"] + "required": [ + "role" + ] }, "logprobs": { "type": "object", "description": "Log probability information for the choice.", "properties": { "content": { - "type": ["array", "null"], + "type": [ + "array", + "null" + ], "description": "A list of message content tokens with log probability information.", "items": { "type": "object", @@ -4942,11 +5404,17 @@ "description": "The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely." }, "bytes": { - "type": ["array", "null"], + "type": [ + "array", + "null" + ], "description": "A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token." } }, - "required": ["token", "logprob"] + "required": [ + "token", + "logprob" + ] } }, "top_logprobs": { @@ -4964,15 +5432,24 @@ "description": "The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely." }, "bytes": { - "type": ["array", "null"], + "type": [ + "array", + "null" + ], "description": "A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token." } }, - "required": ["token", "logprob"] + "required": [ + "token", + "logprob" + ] } }, "refusal": { - "type": ["array", "null"], + "type": [ + "array", + "null" + ], "description": "A list of message refusal tokens with log probability information.", "items": { "type": "object", @@ -4986,17 +5463,27 @@ "description": "The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely." }, "bytes": { - "type": ["array", "null"], + "type": [ + "array", + "null" + ], "description": "A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token." } }, - "required": ["token", "logprob"] + "required": [ + "token", + "logprob" + ] } } } } }, - "required": ["finish_reason", "index", "message"] + "required": [ + "finish_reason", + "index", + "message" + ] } }, "created": { @@ -5008,7 +5495,10 @@ "description": "The model used for the chat completion." }, "service_tier": { - "type": ["string", "null"], + "type": [ + "string", + "null" + ], "description": "The service tier used for processing the request. This field is only included if the service_tier parameter is specified in the request." }, "system_fingerprint": { @@ -5048,7 +5538,10 @@ "description": "Tokens generated by the model for reasoning." } }, - "required": ["audio_tokens", "reasoning_tokens"] + "required": [ + "audio_tokens", + "reasoning_tokens" + ] }, "prompt_tokens_details": { "type": "object", @@ -5063,7 +5556,10 @@ "description": "Cached tokens present in the prompt." } }, - "required": ["audio_tokens", "cached_tokens"] + "required": [ + "audio_tokens", + "cached_tokens" + ] } }, "required": [ @@ -5103,7 +5599,10 @@ "description": "A chat completion delta generated by streamed model responses.", "properties": { "content": { - "type": ["string", "null"], + "type": [ + "string", + "null" + ], "description": "The contents of the chunk message." }, "function_call": { @@ -5141,10 +5640,18 @@ "description": "The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function." } }, - "required": ["name", "arguments"] + "required": [ + "name", + "arguments" + ] } }, - "required": ["index", "id", "type", "function"] + "required": [ + "index", + "id", + "type", + "function" + ] } }, "role": { @@ -5152,7 +5659,10 @@ "description": "The role of the author of this message." }, "refusal": { - "type": ["string", "null"], + "type": [ + "string", + "null" + ], "description": "The refusal message generated by the model." } } @@ -5162,7 +5672,10 @@ "description": "Log probability information for the choice.", "properties": { "content": { - "type": ["array", "null"], + "type": [ + "array", + "null" + ], "description": "A list of message content tokens with log probability information.", "items": { "type": "object", @@ -5176,11 +5689,17 @@ "description": "The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely." }, "bytes": { - "type": ["array", "null"], + "type": [ + "array", + "null" + ], "description": "A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token." } }, - "required": ["token", "logprob"] + "required": [ + "token", + "logprob" + ] } }, "top_logprobs": { @@ -5198,15 +5717,24 @@ "description": "The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely." }, "bytes": { - "type": ["array", "null"], + "type": [ + "array", + "null" + ], "description": "A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token." } }, - "required": ["token", "logprob"] + "required": [ + "token", + "logprob" + ] } }, "refusal": { - "type": ["array", "null"], + "type": [ + "array", + "null" + ], "description": "A list of message refusal tokens with log probability information.", "items": { "type": "object", @@ -5220,17 +5748,26 @@ "description": "The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely." }, "bytes": { - "type": ["array", "null"], + "type": [ + "array", + "null" + ], "description": "A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token." } }, - "required": ["token", "logprob"] + "required": [ + "token", + "logprob" + ] } } } }, "finish_reason": { - "type": ["string", "null"], + "type": [ + "string", + "null" + ], "description": "The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool, or function_call (deprecated) if the model called a function." }, "index": { @@ -5238,7 +5775,10 @@ "description": "The index of the choice in the list of choices." } }, - "required": ["delta", "index"] + "required": [ + "delta", + "index" + ] } }, "created": { @@ -5250,7 +5790,10 @@ "description": "The model used to generate the completion." }, "service_tier": { - "type": ["string", "null"], + "type": [ + "string", + "null" + ], "description": "The service tier used for processing the request. This field is only included if the service_tier parameter is specified in the request." }, "system_fingerprint": { @@ -5278,7 +5821,11 @@ "description": "Total number of tokens used in the request (prompt + completion)." } }, - "required": ["completion_tokens", "prompt_tokens", "total_tokens"] + "required": [ + "completion_tokens", + "prompt_tokens", + "total_tokens" + ] } }, "required": [ @@ -5299,7 +5846,9 @@ "description": "The name of the embedding model to be used." }, "input": { - "example": ["Hello World"], + "example": [ + "Hello World" + ], "description": "The text or token array(s) to be embedded. This can be a single string, an array of strings, or an array of token arrays to embed multiple inputs in one request.", "type": "array", "items": { @@ -5317,7 +5866,10 @@ "description": "Defines the number of dimensions for the output embeddings. This feature is supported by certain models only. This field is optional." } }, - "required": ["model", "input"] + "required": [ + "model", + "input" + ] }, "EmbeddingsResponseDto": { "type": "object", @@ -5346,11 +5898,18 @@ ] } }, - "required": ["object", "model", "embedding", "usage"] + "required": [ + "object", + "model", + "embedding", + "usage" + ] }, "PullModelRequest": { "type": "object", - "required": ["model"], + "required": [ + "model" + ], "properties": { "model": { "type": "string", @@ -5507,7 +6066,9 @@ }, "files": { "description": "The URL sources from which the model downloaded or accessed.", - "example": ["https://huggingface.co/cortexso/mistral/tree/gguf"], + "example": [ + "https://huggingface.co/cortexso/mistral/tree/gguf" + ], "oneOf": [ { "type": "array", @@ -5527,7 +6088,9 @@ }, "stop": { "description": "Defines specific tokens or phrases that signal the model to stop producing further output.", - "example": ["End"], + "example": [ + "End" + ], "type": "array", "items": { "type": "string" @@ -5597,7 +6160,10 @@ "default": "" } }, - "required": ["model", "files"] + "required": [ + "model", + "files" + ] }, "StartModelSuccessDto": { "type": "object", @@ -5611,7 +6177,10 @@ "description": "The unique identifier of the model." } }, - "required": ["message", "modelId"] + "required": [ + "message", + "modelId" + ] }, "ModelStartDto": { "type": "object", @@ -5658,7 +6227,9 @@ "example": "/tmp/model.gguf" } }, - "required": ["model"] + "required": [ + "model" + ] }, "ModelStopDto": { "type": "object", @@ -5669,7 +6240,9 @@ "description": "A downloaded model name." } }, - "required": ["model"] + "required": [ + "model" + ] }, "ImportModelRequest": { "type": "object", @@ -5689,10 +6262,16 @@ "option": { "type": "string", "description": "Import options such as symlink or copy.", - "enum": ["symlink", "copy"] + "enum": [ + "symlink", + "copy" + ] } }, - "required": ["model", "modelPath"] + "required": [ + "model", + "modelPath" + ] }, "ImportModelResponse": { "type": "object", @@ -5711,7 +6290,11 @@ "example": "OK" } }, - "required": ["message", "modelHandle", "result"] + "required": [ + "message", + "modelHandle", + "result" + ] }, "CommonResponseDto": { "type": "object", @@ -5721,7 +6304,9 @@ "description": "The response success or error message." } }, - "required": ["message"] + "required": [ + "message" + ] }, "EngineUninstallationResponseDto": { "type": "object", @@ -5777,7 +6362,11 @@ "example": "OK" } }, - "required": ["data", "object", "result"] + "required": [ + "data", + "object", + "result" + ] }, "Engine": { "type": "object", @@ -5807,7 +6396,12 @@ "example": "0.1.34" } }, - "required": ["description", "name", "productName", "status"] + "required": [ + "description", + "name", + "productName", + "status" + ] }, "CpuModeDto": { "type": "object", @@ -5872,7 +6466,9 @@ "description": "A predefined text or framework that guides the AI model's response generation." }, "stop": { - "example": ["End"], + "example": [ + "End" + ], "description": "Defines specific tokens or phrases that signal the model to stop producing further output.", "type": "array", "items": { @@ -5988,7 +6584,9 @@ "$ref": "#/components/schemas/RecommendDto" } }, - "required": ["id"] + "required": [ + "id" + ] }, "ListModelsResponseDto": { "type": "object", @@ -5996,7 +6594,9 @@ "object": { "type": "string", "example": "list", - "enum": ["list"] + "enum": [ + "list" + ] }, "data": { "description": "List of models", @@ -6006,7 +6606,10 @@ } } }, - "required": ["object", "data"] + "required": [ + "object", + "data" + ] }, "UpdateModelDto": { "type": "object", @@ -6025,7 +6628,9 @@ "items": { "type": "string" }, - "example": [""] + "example": [ + "" + ] }, "stream": { "type": "boolean", @@ -6215,7 +6820,11 @@ "description": "Indicates whether the model was successfully deleted." } }, - "required": ["id", "object", "deleted"] + "required": [ + "id", + "object", + "deleted" + ] }, "CreateThreadAssistantDto": { "type": "object", @@ -6305,7 +6914,10 @@ "tool_resources": { "type": "object", "example": { - "resources": ["database1", "database2"] + "resources": [ + "database1", + "database2" + ] }, "description": "Tool resources for the assistant." } @@ -6333,7 +6945,9 @@ } } }, - "required": ["assistants"] + "required": [ + "assistants" + ] }, "ContentDto": { "type": "object", @@ -6352,7 +6966,10 @@ "description": "Text content of the message along with any annotations." } }, - "required": ["type", "text"] + "required": [ + "type", + "text" + ] }, "GetMessageResponseDto": { "type": "object", @@ -6526,7 +7143,13 @@ "description": "Indicates whether there are more messages to retrieve." } }, - "required": ["object", "data", "first_id", "last_id", "has_more"] + "required": [ + "object", + "data", + "first_id", + "last_id", + "has_more" + ] }, "CreateMessageDto": { "type": "object", @@ -6542,7 +7165,10 @@ "description": "The text contents of the message." } }, - "required": ["role", "content"] + "required": [ + "role", + "content" + ] }, "UpdateMessageDto": { "type": "object", @@ -6568,7 +7194,11 @@ "description": "Indicates whether the message was successfully deleted." } }, - "required": ["id", "object", "deleted"] + "required": [ + "id", + "object", + "deleted" + ] }, "GetThreadResponseDto": { "type": "object", @@ -6589,7 +7219,9 @@ "description": "Unix timestamp representing the creation time of the thread." }, "assistants": { - "example": ["assistant-001"], + "example": [ + "assistant-001" + ], "description": "List of assistants involved in the thread.", "type": "array", "items": { @@ -6643,7 +7275,11 @@ "description": "Indicates whether the thread was successfully deleted." } }, - "required": ["id", "object", "deleted"] + "required": [ + "id", + "object", + "deleted" + ] }, "CPUDto": { "type": "object", @@ -6686,7 +7322,12 @@ "description": "The model name of the CPU." } }, - "required": ["arch", "cores", "instructions", "model"] + "required": [ + "arch", + "cores", + "instructions", + "model" + ] }, "GPUDto": { "type": "object", @@ -6710,7 +7351,10 @@ "description": "The version of the installed driver." } }, - "required": ["compute_cap", "driver_version"] + "required": [ + "compute_cap", + "driver_version" + ] }, "free_vram": { "type": "integer", @@ -6768,7 +7412,10 @@ "description": "The version of the operating system." } }, - "required": ["name", "version"] + "required": [ + "name", + "version" + ] }, "PowerDto": { "type": "object", @@ -6789,7 +7436,11 @@ "description": "Indicates if the power-saving mode is enabled." } }, - "required": ["battery_life", "charging_status", "is_power_saving"] + "required": [ + "battery_life", + "charging_status", + "is_power_saving" + ] }, "RAMDto": { "type": "object", @@ -6810,7 +7461,11 @@ "description": "The type of RAM." } }, - "required": ["available", "total", "type"] + "required": [ + "available", + "total", + "type" + ] }, "StorageDto": { "type": "object", @@ -6831,8 +7486,12 @@ "description": "The type of storage." } }, - "required": ["available", "total", "type"] + "required": [ + "available", + "total", + "type" + ] } } } -} +} \ No newline at end of file diff --git a/engine/README.md b/engine/README.md index 884e5efae..dec30887a 100644 --- a/engine/README.md +++ b/engine/README.md @@ -1,11 +1,11 @@ # cortex-cpp - Embeddable AI

- cortex-cpplogo + cortex-cpplogo

Documentation - API Reference - - Changelog - Bug reports - Discord + - Changelog - Bug reports - Discord

> ⚠️ **cortex-cpp is currently in Development**: Expect breaking changes and bugs! @@ -41,7 +41,7 @@ Ensure that your system meets the following requirements to run Cortex: ## Quickstart To install Cortex CLI, follow the steps below: -1. Download cortex-cpp here: https://github.com/janhq/cortex/releases +1. Download cortex-cpp here: https://github.com/menloresearch/cortex/releases 2. Install cortex-cpp by running the downloaded file. 3. Download a Model: @@ -121,37 +121,37 @@ Below is the available list of the model parameters you can set when loading a m Stable (Recommended) - + CPU - + CUDA - + Intel - + M1/M2 - + CPU - + CUDA @@ -159,7 +159,7 @@ Below is the available list of the model parameters you can set when loading a m -> Download the latest or older versions of Cortex-cpp at the **[GitHub Releases](https://github.com/janhq/cortex/releases)**. +> Download the latest or older versions of Cortex-cpp at the **[GitHub Releases](https://github.com/menloresearch/cortex/releases)**. ## Manual Build @@ -173,4 +173,4 @@ Manual build is a process in which the developers build the software manually. T ## Star History -[![Star History Chart](https://api.star-history.com/svg?repos=janhq/cortex.cpp&type=Date)](https://star-history.com/#janhq/cortex.cpp&Date) \ No newline at end of file +[![Star History Chart](https://api.star-history.com/svg?repos=menloresearch/cortex.cpp&type=Date)](https://star-history.com/#menloresearch/cortex.cpp&Date) \ No newline at end of file diff --git a/engine/cli/commands/cortex_upd_cmd.cc b/engine/cli/commands/cortex_upd_cmd.cc index 5d7b4bf4c..6c8baa1a4 100644 --- a/engine/cli/commands/cortex_upd_cmd.cc +++ b/engine/cli/commands/cortex_upd_cmd.cc @@ -515,10 +515,10 @@ bool CortexUpdCmd::GetLinuxInstallScript(const std::string& v, const std::string& channel) { std::vector path_list; if (channel == "nightly") { - path_list = {"janhq", "cortex.cpp", "dev", "engine", + path_list = {"menloresearch", "cortex.cpp", "dev", "engine", "templates", "linux", "install.sh"}; } else { - path_list = {"janhq", "cortex.cpp", "main", "engine", + path_list = {"menloresearch", "cortex.cpp", "main", "engine", "templates", "linux", "install.sh"}; } auto url_obj = url_parser::Url{ diff --git a/engine/cli/commands/cortex_upd_cmd.h b/engine/cli/commands/cortex_upd_cmd.h index 01793992f..7f02839cf 100644 --- a/engine/cli/commands/cortex_upd_cmd.h +++ b/engine/cli/commands/cortex_upd_cmd.h @@ -79,9 +79,9 @@ inline std::vector GetReleasePath() { if (CORTEX_VARIANT == file_manager_utils::kNightlyVariant) { return {"cortex", "latest", "version.json"}; } else if (CORTEX_VARIANT == file_manager_utils::kBetaVariant) { - return {"repos", "janhq", "cortex.cpp", "releases"}; + return {"repos", "menloresearch", "cortex.cpp", "releases"}; } else { - return {"repos", "janhq", "cortex.cpp", "releases", "latest"}; + return {"repos", "menloresearch", "cortex.cpp", "releases", "latest"}; } } diff --git a/engine/cli/main.cc b/engine/cli/main.cc index 8ed4beb61..1d47bacc2 100644 --- a/engine/cli/main.cc +++ b/engine/cli/main.cc @@ -1,6 +1,7 @@ #include #include "command_line_parser.h" #include "commands/cortex_upd_cmd.h" +#include "openssl/ssl.h" #include "services/download_service.h" #include "utils/archive_utils.h" #include "utils/cortex_utils.h" @@ -88,6 +89,7 @@ int main(int argc, char* argv[]) { return 1; } + SSL_library_init(); curl_global_init(CURL_GLOBAL_DEFAULT); bool should_install_server = false; @@ -152,7 +154,7 @@ int main(int argc, char* argv[]) { auto get_latest_version = []() -> cpp::result { try { auto res = github_release_utils::GetReleaseByVersion( - "janhq", "cortex.llamacpp", "latest"); + "menloresearch", "cortex.llamacpp", "latest"); if (res.has_error()) { CTL_ERR("Failed to get latest llama.cpp version: " << res.error()); return cpp::fail("Failed to get latest llama.cpp version: " + diff --git a/engine/common/api_server_configuration.h b/engine/common/api_server_configuration.h index 03b3022a4..63383301b 100644 --- a/engine/common/api_server_configuration.h +++ b/engine/common/api_server_configuration.h @@ -107,7 +107,7 @@ class ApiServerConfiguration { const std::string& proxy_url = "", const std::string& proxy_username = "", const std::string& proxy_password = "", const std::string& no_proxy = "", bool verify_peer_ssl = true, bool verify_host_ssl = true, - const std::string& hf_token = "") + const std::string& hf_token = "", std::vector api_keys = {}) : cors{cors}, allowed_origins{allowed_origins}, verify_proxy_ssl{verify_proxy_ssl}, @@ -118,7 +118,8 @@ class ApiServerConfiguration { no_proxy{no_proxy}, verify_peer_ssl{verify_peer_ssl}, verify_host_ssl{verify_host_ssl}, - hf_token{hf_token} {} + hf_token{hf_token}, + api_keys{api_keys} {} // cors bool cors{true}; @@ -139,6 +140,9 @@ class ApiServerConfiguration { // token std::string hf_token{""}; + // authentication + std::vector api_keys; + Json::Value ToJson() const { Json::Value root; root["cors"] = cors; @@ -155,6 +159,10 @@ class ApiServerConfiguration { root["verify_peer_ssl"] = verify_peer_ssl; root["verify_host_ssl"] = verify_host_ssl; root["huggingface_token"] = hf_token; + root["api_keys"] = Json::Value(Json::arrayValue); + for (const auto& api_key : api_keys) { + root["api_keys"].append(api_key); + } return root; } @@ -256,7 +264,8 @@ class ApiServerConfiguration { return true; }}, - {"allowed_origins", [this](const Json::Value& value) -> bool { + {"allowed_origins", + [this](const Json::Value& value) -> bool { if (!value.isArray()) { return false; } @@ -271,7 +280,26 @@ class ApiServerConfiguration { this->allowed_origins.push_back(origin.asString()); } return true; - }}}; + }}, + + {"api_keys", + [this](const Json::Value& value) -> bool { + if (!value.isArray()) { + return false; + } + for (const auto& key : value) { + if (!key.isString()) { + return false; + } + } + + this->api_keys.clear(); + for (const auto& key : value) { + this->api_keys.push_back(key.asString()); + } + return true; + }}, + }; for (const auto& key : json.getMemberNames()) { auto updater = field_updater.find(key); diff --git a/engine/config/gguf_parser.cc b/engine/config/gguf_parser.cc index c97c79c30..9acc97de2 100644 --- a/engine/config/gguf_parser.cc +++ b/engine/config/gguf_parser.cc @@ -2,12 +2,12 @@ #include #include #include +#include #include #include #include #include #include -#include #ifdef _WIN32 #include @@ -70,7 +70,7 @@ void GGUFHandler::OpenFile(const std::string& file_path) { #else file_size_ = std::filesystem::file_size(file_path); - + int file_descriptor = open(file_path.c_str(), O_RDONLY); // Memory-map the file data_ = static_cast( @@ -105,7 +105,8 @@ std::pair GGUFHandler::ReadString( std::memcpy(&length, data_ + offset, sizeof(uint64_t)); if (offset + 8 + length > file_size_) { - throw std::runtime_error("GGUF metadata string length exceeds file size.\n"); + throw std::runtime_error( + "GGUF metadata string length exceeds file size.\n"); } std::string value(reinterpret_cast(data_ + offset + 8), length); @@ -578,9 +579,8 @@ void GGUFHandler::ModelConfigFromMetadata() { model_config_.model = name; model_config_.id = name; model_config_.version = std::to_string(version); - model_config_.max_tokens = - std::min(kDefaultMaxContextLength, max_tokens); - model_config_.ctx_len = std::min(kDefaultMaxContextLength, max_tokens); + model_config_.max_tokens = max_tokens; + model_config_.ctx_len = max_tokens; model_config_.ngl = ngl; } diff --git a/engine/config/model_config.h b/engine/config/model_config.h index 1d51cfb01..e95a94278 100644 --- a/engine/config/model_config.h +++ b/engine/config/model_config.h @@ -18,16 +18,12 @@ namespace config { struct RemoteModelConfig { std::string model; - std::string header_template; std::string engine; std::string version; size_t created; std::string object = "model"; std::string owned_by = ""; Json::Value inference_params; - Json::Value transform_req; - Json::Value transform_resp; - Json::Value metadata; void LoadFromJson(const Json::Value& json) { if (!json.isObject()) { throw std::runtime_error("Input JSON must be an object"); @@ -35,8 +31,6 @@ struct RemoteModelConfig { // Load basic string fields model = json.get("model", model).asString(); - header_template = - json.get("header_template", header_template).asString(); engine = json.get("engine", engine).asString(); version = json.get("version", version).asString(); created = @@ -46,9 +40,6 @@ struct RemoteModelConfig { // Load JSON object fields directly inference_params = json.get("inference_params", inference_params); - transform_req = json.get("transform_req", transform_req); - transform_resp = json.get("transform_resp", transform_resp); - metadata = json.get("metadata", metadata); } Json::Value ToJson() const { @@ -56,7 +47,6 @@ struct RemoteModelConfig { // Add basic string fields json["model"] = model; - json["header_template"] = header_template; json["engine"] = engine; json["version"] = version; json["created"] = static_cast(created); @@ -65,9 +55,6 @@ struct RemoteModelConfig { // Add JSON object fields directly json["inference_params"] = inference_params; - json["transform_req"] = transform_req; - json["transform_resp"] = transform_resp; - json["metadata"] = metadata; return json; }; @@ -77,7 +64,6 @@ struct RemoteModelConfig { // Convert basic fields root["model"] = model; - root["header_template"] = header_template; root["engine"] = engine; root["version"] = version; root["object"] = object; @@ -87,9 +73,6 @@ struct RemoteModelConfig { // Convert Json::Value to YAML::Node using utility function root["inference_params"] = remote_models_utils::jsonToYaml(inference_params); - root["transform_req"] = remote_models_utils::jsonToYaml(transform_req); - root["transform_resp"] = remote_models_utils::jsonToYaml(transform_resp); - root["metadata"] = remote_models_utils::jsonToYaml(metadata); // Save to file std::ofstream fout(filepath); @@ -110,7 +93,6 @@ struct RemoteModelConfig { // Load basic fields model = root["model"].as(""); - header_template = root["header_template"].as(""); engine = root["engine"].as(""); version = root["version"] ? root["version"].as() : ""; created = root["created"] ? root["created"].as() : 0; @@ -120,9 +102,6 @@ struct RemoteModelConfig { // Load complex fields using utility function inference_params = remote_models_utils::yamlToJson(root["inference_params"]); - transform_req = remote_models_utils::yamlToJson(root["transform_req"]); - transform_resp = remote_models_utils::yamlToJson(root["transform_resp"]); - metadata = remote_models_utils::yamlToJson(root["metadata"]); } }; @@ -156,6 +135,7 @@ struct ModelConfig { bool text_model = std::numeric_limits::quiet_NaN(); std::string id; std::vector files; + std::string mmproj; std::size_t created; std::string object; std::string owned_by = ""; @@ -359,6 +339,9 @@ struct ModelConfig { files_array.append(file); } obj["files"] = files_array; + if (!mmproj.empty()) { + obj["mmproj"] = mmproj; + } obj["created"] = static_cast(created); obj["object"] = object; diff --git a/engine/config/yaml_config.cc b/engine/config/yaml_config.cc index 57b2b3ecb..8d5060615 100644 --- a/engine/config/yaml_config.cc +++ b/engine/config/yaml_config.cc @@ -21,11 +21,13 @@ void YamlHandler::ReadYamlFile(const std::string& file_path) { try { yaml_node_ = YAML::LoadFile(file_path); + auto nomalize_path = [](std::string p) { + std::replace(p.begin(), p.end(), '\\', '/'); + return p; + }; // incase of model.yml file, we don't have files yet, create them if (!yaml_node_["files"]) { - auto s = file_path; - // normalize path - std::replace(s.begin(), s.end(), '\\', '/'); + auto s = nomalize_path(file_path); std::vector v; if (yaml_node_["engine"] && (yaml_node_["engine"].as() == kLlamaRepo || @@ -41,6 +43,18 @@ void YamlHandler::ReadYamlFile(const std::string& file_path) { // TODO(any) need to support mutiple gguf files yaml_node_["files"] = v; } + + // add mmproj file to yml if exists + if (!yaml_node_["mmproj"]) { + auto s = nomalize_path(file_path); + auto abs_path = s.substr(0, s.find_last_of('/')) + "/mmproj.gguf"; + CTL_DBG("mmproj: " << abs_path); + auto rel_path = fmu::ToRelativeCortexDataPath(fs::path(abs_path)); + if (std::filesystem::exists(abs_path)) { + yaml_node_["mmproj"] = rel_path.string(); + } + } + } catch (const YAML::BadFile& e) { throw; } @@ -131,6 +145,8 @@ void YamlHandler::ModelConfigFromYaml() { tmp.stop = yaml_node_["stop"].as>(); if (yaml_node_["files"]) tmp.files = yaml_node_["files"].as>(); + if (yaml_node_["mmproj"]) + tmp.mmproj = yaml_node_["mmproj"].as(); if (yaml_node_["created"]) tmp.created = yaml_node_["created"].as(); @@ -239,6 +255,9 @@ void YamlHandler::UpdateModelConfig(ModelConfig new_model_config) { if (model_config_.files.size() > 0) yaml_node_["files"] = model_config_.files; + if (!model_config_.mmproj.empty()) + yaml_node_["mmproj"] = model_config_.mmproj; + if (!std::isnan(static_cast(model_config_.seed))) yaml_node_["seed"] = model_config_.seed; if (!std::isnan(model_config_.dynatemp_range)) @@ -301,17 +320,21 @@ void YamlHandler::WriteYamlFile(const std::string& file_path) const { "Model ID which is used for request construct - should be " "unique between models (author / quantization)"); out_file << format_utils::WriteKeyValue("name", yaml_node_["name"], - "metadata.general.name"); + "metadata.general.name"); if (yaml_node_["version"]) { - out_file << "version: " << yaml_node_["version"].as() << "\n"; + out_file << "version: " << yaml_node_["version"].as() + << "\n"; } if (yaml_node_["files"] && yaml_node_["files"].size()) { out_file << "files: # Can be relative OR absolute local file " - "path\n"; + "path\n"; for (const auto& source : yaml_node_["files"]) { out_file << " - " << source << "\n"; } } + if (yaml_node_["mmproj"]) { + out_file << "mmproj: " << yaml_node_["mmproj"].as() << "\n"; + } out_file << "# END GENERAL GGUF METADATA\n"; out_file << "\n"; @@ -330,9 +353,9 @@ void YamlHandler::WriteYamlFile(const std::string& file_path) const { out_file << "# BEGIN OPTIONAL\n"; out_file << format_utils::WriteKeyValue("size", yaml_node_["size"]); out_file << format_utils::WriteKeyValue("stream", yaml_node_["stream"], - "Default true?"); + "Default true?"); out_file << format_utils::WriteKeyValue("top_p", yaml_node_["top_p"], - "Ranges: 0 to 1"); + "Ranges: 0 to 1"); out_file << format_utils::WriteKeyValue( "temperature", yaml_node_["temperature"], "Ranges: 0 to 1"); out_file << format_utils::WriteKeyValue( @@ -344,26 +367,26 @@ void YamlHandler::WriteYamlFile(const std::string& file_path) const { "Should be default to context length"); out_file << format_utils::WriteKeyValue("seed", yaml_node_["seed"]); out_file << format_utils::WriteKeyValue("dynatemp_range", - yaml_node_["dynatemp_range"]); + yaml_node_["dynatemp_range"]); out_file << format_utils::WriteKeyValue("dynatemp_exponent", - yaml_node_["dynatemp_exponent"]); + yaml_node_["dynatemp_exponent"]); out_file << format_utils::WriteKeyValue("top_k", yaml_node_["top_k"]); out_file << format_utils::WriteKeyValue("min_p", yaml_node_["min_p"]); out_file << format_utils::WriteKeyValue("tfs_z", yaml_node_["tfs_z"]); out_file << format_utils::WriteKeyValue("typ_p", yaml_node_["typ_p"]); out_file << format_utils::WriteKeyValue("repeat_last_n", - yaml_node_["repeat_last_n"]); + yaml_node_["repeat_last_n"]); out_file << format_utils::WriteKeyValue("repeat_penalty", - yaml_node_["repeat_penalty"]); + yaml_node_["repeat_penalty"]); out_file << format_utils::WriteKeyValue("mirostat", yaml_node_["mirostat"]); out_file << format_utils::WriteKeyValue("mirostat_tau", - yaml_node_["mirostat_tau"]); + yaml_node_["mirostat_tau"]); out_file << format_utils::WriteKeyValue("mirostat_eta", - yaml_node_["mirostat_eta"]); + yaml_node_["mirostat_eta"]); out_file << format_utils::WriteKeyValue("penalize_nl", - yaml_node_["penalize_nl"]); + yaml_node_["penalize_nl"]); out_file << format_utils::WriteKeyValue("ignore_eos", - yaml_node_["ignore_eos"]); + yaml_node_["ignore_eos"]); out_file << format_utils::WriteKeyValue("n_probs", yaml_node_["n_probs"]); out_file << format_utils::WriteKeyValue("min_keep", yaml_node_["min_keep"]); out_file << format_utils::WriteKeyValue("grammar", yaml_node_["grammar"]); @@ -374,7 +397,7 @@ void YamlHandler::WriteYamlFile(const std::string& file_path) const { out_file << "# BEGIN MODEL LOAD PARAMETERS\n"; out_file << "# BEGIN REQUIRED\n"; out_file << format_utils::WriteKeyValue("engine", yaml_node_["engine"], - "engine to run model"); + "engine to run model"); out_file << "prompt_template:"; out_file << " " << yaml_node_["prompt_template"] << "\n"; out_file << "# END REQUIRED\n"; @@ -384,11 +407,11 @@ void YamlHandler::WriteYamlFile(const std::string& file_path) const { "ctx_len", yaml_node_["ctx_len"], "llama.context_length | 0 or undefined = loaded from model"); out_file << format_utils::WriteKeyValue("n_parallel", - yaml_node_["n_parallel"]); + yaml_node_["n_parallel"]); out_file << format_utils::WriteKeyValue("cpu_threads", - yaml_node_["cpu_threads"]); + yaml_node_["cpu_threads"]); out_file << format_utils::WriteKeyValue("ngl", yaml_node_["ngl"], - "Undefined = loaded from model"); + "Undefined = loaded from model"); out_file << "# END OPTIONAL\n"; out_file << "# END MODEL LOAD PARAMETERS\n"; diff --git a/engine/controllers/models.cc b/engine/controllers/models.cc index 86b749ce6..d88efc254 100644 --- a/engine/controllers/models.cc +++ b/engine/controllers/models.cc @@ -533,8 +533,8 @@ void Models::StartModel( auto model_handle = (*(req->getJsonObject())).get("model", "").asString(); std::optional mmproj; - if (auto& o = (*(req->getJsonObject()))["mmproj"]; !o.isNull()) { - mmproj = o.asString(); + if (auto& o = (*(req->getJsonObject())); o.isMember("mmproj")) { + mmproj = o["mmproj"].asString(); } auto bypass_llama_model_path = false; diff --git a/engine/database/models.cc b/engine/database/models.cc index 6bf891040..fa4144b24 100644 --- a/engine/database/models.cc +++ b/engine/database/models.cc @@ -310,7 +310,8 @@ cpp::result, std::string> Models::GetModelSources() "SELECT model_id, author_repo_id, branch_name, " "path_to_model_yaml, model_alias, model_format, " "model_source, status, engine, metadata FROM models " - "WHERE model_source != \"\" AND (status = \"downloaded\" OR status = " + "WHERE model_source != \"\" AND model_source != \"imported\" AND " + "(status = \"downloaded\" OR status = " "\"downloadable\")"); while (query.executeStep()) { ModelEntry entry; diff --git a/engine/e2e-test/api/engines/test_api_engine_install_nightly.py b/engine/e2e-test/api/engines/test_api_engine_install_nightly.py index 34fda2d18..64b8ccc4f 100644 --- a/engine/e2e-test/api/engines/test_api_engine_install_nightly.py +++ b/engine/e2e-test/api/engines/test_api_engine_install_nightly.py @@ -2,7 +2,7 @@ import requests from utils.test_runner import start_server, stop_server, get_latest_pre_release_tag -latest_pre_release_tag = get_latest_pre_release_tag("janhq", "cortex.llamacpp") +latest_pre_release_tag = get_latest_pre_release_tag("menloresearch", "cortex.llamacpp") class TestApiEngineInstall: diff --git a/engine/e2e-test/cli/engines/test_cli_engine_install_nightly.py b/engine/e2e-test/cli/engines/test_cli_engine_install_nightly.py index 376dd3c30..42835c4a0 100644 --- a/engine/e2e-test/cli/engines/test_cli_engine_install_nightly.py +++ b/engine/e2e-test/cli/engines/test_cli_engine_install_nightly.py @@ -5,7 +5,7 @@ import requests from utils.test_runner import run, start_server, stop_server, get_latest_pre_release_tag -latest_pre_release_tag = get_latest_pre_release_tag("janhq", "cortex.llamacpp") +latest_pre_release_tag = get_latest_pre_release_tag("menloresearch", "cortex.llamacpp") class TestCliEngineInstall: def setup_and_teardown(self): diff --git a/engine/examples/example-docker/Dockerfile b/engine/examples/example-docker/Dockerfile index 1e727826a..f44179bea 100644 --- a/engine/examples/example-docker/Dockerfile +++ b/engine/examples/example-docker/Dockerfile @@ -7,7 +7,7 @@ WORKDIR /app # Install curl, unzip, and numactl, download the file, unzip it, then remove unnecessary packages RUN apt-get update && \ apt-get install -y curl tar gzip numactl && \ - curl -L "https://github.com/janhq/nitro/releases/download/v0.1.17/nitro-0.1.17-linux-amd64.tar.gz" -o nitro.tar.gz && \ + curl -L "https://github.com/menloresearch/nitro/releases/download/v0.1.17/nitro-0.1.17-linux-amd64.tar.gz" -o nitro.tar.gz && \ tar -xzvf nitro.tar.gz && \ rm nitro.tar.gz && \ apt-get remove --purge -y curl tar gzip && \ diff --git a/engine/examples/example-docker/alpine.Dockerfile b/engine/examples/example-docker/alpine.Dockerfile index b80d87009..9b79af244 100644 --- a/engine/examples/example-docker/alpine.Dockerfile +++ b/engine/examples/example-docker/alpine.Dockerfile @@ -11,7 +11,7 @@ WORKDIR /work RUN apk add --no-cache git cmake g++ make util-linux-dev zlib-dev # Clone code -RUN git clone --recurse-submodules -j2 --depth 1 --branch v${NITRO_VERSION} --single-branch https://github.com/janhq/nitro.git +RUN git clone --recurse-submodules -j2 --depth 1 --branch v${NITRO_VERSION} --single-branch https://github.com/menloresearch/nitro.git # Build RUN cd nitro && \ diff --git a/engine/examples/example-docker/cuda.Dockerfile b/engine/examples/example-docker/cuda.Dockerfile index ac7feb32f..df7a042ab 100644 --- a/engine/examples/example-docker/cuda.Dockerfile +++ b/engine/examples/example-docker/cuda.Dockerfile @@ -7,7 +7,7 @@ WORKDIR /app # Install required packages RUN apt-get update && \ apt-get install -y git cmake numactl uuid-dev && \ - git clone --recurse https://github.com/janhq/nitro nitro && \ + git clone --recurse https://github.com/menloresearch/nitro nitro && \ cd nitro && \ ./install_deps.sh && \ mkdir build && \ diff --git a/engine/extensions/remote-engine/remote_engine.cc b/engine/extensions/remote-engine/remote_engine.cc index 1640b7fac..3924663aa 100644 --- a/engine/extensions/remote-engine/remote_engine.cc +++ b/engine/extensions/remote-engine/remote_engine.cc @@ -30,11 +30,16 @@ size_t StreamWriteCallback(char* ptr, size_t size, size_t nmemb, Json::Value check_error; Json::Reader reader; context->chunks += chunk; - if (reader.parse(context->chunks, check_error) || - (reader.parse(chunk, check_error) && - chunk.find("error") != std::string::npos)) { + + long http_code = k200OK; + if (context->curl) { + curl_easy_getinfo(context->curl, CURLINFO_RESPONSE_CODE, &http_code); + } + if (http_code != k200OK && (reader.parse(context->chunks, check_error) || + (chunk.find("error") != std::string::npos && + reader.parse(chunk, check_error)))) { CTL_WRN(context->chunks); - CTL_WRN(chunk); + CTL_WRN("http code: " << http_code << " - " << chunk); CTL_INF("Request: " << context->last_request); Json::Value status; status["is_done"] = true; @@ -116,11 +121,6 @@ CurlResponse RemoteEngine::MakeStreamingChatCompletionRequest( } std::string full_url = chat_url_; - - if (config.transform_req["chat_completions"]["url"]) { - full_url = - config.transform_req["chat_completions"]["url"].as(); - } CTL_DBG("full_url: " << full_url); struct curl_slist* headers = nullptr; @@ -134,12 +134,6 @@ CurlResponse RemoteEngine::MakeStreamingChatCompletionRequest( headers = curl_slist_append(headers, "Connection: keep-alive"); std::string stream_template = chat_res_template_; - if (config.transform_resp["chat_completions"] && - config.transform_resp["chat_completions"]["template"]) { - // Model level overrides engine level - stream_template = - config.transform_resp["chat_completions"]["template"].as(); - } StreamContext context{ std::make_shared>( @@ -150,7 +144,9 @@ CurlResponse RemoteEngine::MakeStreamingChatCompletionRequest( renderer_, stream_template, true, - body}; + body, + "", + curl}; curl_easy_setopt(curl, CURLOPT_URL, full_url.c_str()); curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers); @@ -256,11 +252,14 @@ CurlResponse RemoteEngine::MakeGetModelsRequest( return response; } - std::string api_key_header = - ReplaceApiKeyPlaceholder(header_template, api_key); + std::unordered_map replacements = { + {"api_key", api_key}}; + auto hs = ReplaceHeaderPlaceholders(header_template, replacements); struct curl_slist* headers = nullptr; - headers = curl_slist_append(headers, api_key_header.c_str()); + for (auto const& h : hs) { + headers = curl_slist_append(headers, h.c_str()); + } headers = curl_slist_append(headers, "Content-Type: application/json"); curl_easy_setopt(curl, CURLOPT_URL, url.c_str()); @@ -295,11 +294,6 @@ CurlResponse RemoteEngine::MakeChatCompletionRequest( return response; } std::string full_url = chat_url_; - - if (config.transform_req["chat_completions"]["url"]) { - full_url = - config.transform_req["chat_completions"]["url"].as(); - } CTL_DBG("full_url: " << full_url); struct curl_slist* headers = nullptr; @@ -341,7 +335,6 @@ bool RemoteEngine::LoadModelConfig(const std::string& model, ModelConfig model_config; model_config.model = model; - model_config.api_key = body["api_key"].asString(); // model_config.url = ; // Optional fields if (auto s = config["header_template"]; s && !s.as().empty()) { @@ -350,16 +343,6 @@ bool RemoteEngine::LoadModelConfig(const std::string& model, CTL_DBG("header: " << h); } } - if (config["transform_req"]) { - model_config.transform_req = config["transform_req"]; - } else { - LOG_WARN << "Missing transform_req in config for model " << model; - } - if (config["transform_resp"]) { - model_config.transform_resp = config["transform_resp"]; - } else { - LOG_WARN << "Missing transform_resp in config for model " << model; - } model_config.is_loaded = true; @@ -414,9 +397,10 @@ void RemoteEngine::LoadModel( std::shared_ptr json_body, std::function&& callback) { if (!json_body->isMember("model") || !json_body->isMember("model_path") || - !json_body->isMember("api_key")) { + !json_body->isMember("api_key") || !json_body->isMember("metadata")) { Json::Value error; - error["error"] = "Missing required fields: model or model_path"; + error["error"] = + "Missing required fields: model, model_path, api_key or metadata"; Json::Value status; status["is_done"] = true; status["has_error"] = true; @@ -428,43 +412,41 @@ void RemoteEngine::LoadModel( const std::string& model = (*json_body)["model"].asString(); const std::string& model_path = (*json_body)["model_path"].asString(); - const std::string& api_key = (*json_body)["api_key"].asString(); - - if (json_body->isMember("metadata")) { - metadata_ = (*json_body)["metadata"]; - if (!metadata_["transform_req"].isNull() && - !metadata_["transform_req"]["chat_completions"].isNull() && - !metadata_["transform_req"]["chat_completions"]["template"].isNull()) { - chat_req_template_ = - metadata_["transform_req"]["chat_completions"]["template"].asString(); - CTL_INF(chat_req_template_); - } - if (!metadata_["transform_resp"].isNull() && - !metadata_["transform_resp"]["chat_completions"].isNull() && - !metadata_["transform_resp"]["chat_completions"]["template"].isNull()) { - chat_res_template_ = - metadata_["transform_resp"]["chat_completions"]["template"] - .asString(); - CTL_INF(chat_res_template_); - } + metadata_ = (*json_body)["metadata"]; + if (!metadata_["transform_req"].isNull() && + !metadata_["transform_req"]["chat_completions"].isNull() && + !metadata_["transform_req"]["chat_completions"]["template"].isNull()) { + chat_req_template_ = + metadata_["transform_req"]["chat_completions"]["template"].asString(); + CTL_INF(chat_req_template_); + } else { + CTL_WRN("Required transform_req"); + } - if (!metadata_["transform_req"].isNull() && - !metadata_["transform_req"]["chat_completions"].isNull() && - !metadata_["transform_req"]["chat_completions"]["url"].isNull()) { - chat_url_ = - metadata_["transform_req"]["chat_completions"]["url"].asString(); - CTL_INF(chat_url_); - } + if (!metadata_["transform_resp"].isNull() && + !metadata_["transform_resp"]["chat_completions"].isNull() && + !metadata_["transform_resp"]["chat_completions"]["template"].isNull()) { + chat_res_template_ = + metadata_["transform_resp"]["chat_completions"]["template"].asString(); + CTL_INF(chat_res_template_); + } else { + CTL_WRN("Required transform_resp"); } - if (json_body->isMember("metadata")) { - if (!metadata_["header_template"].isNull()) { - header_ = ReplaceHeaderPlaceholders( - metadata_["header_template"].asString(), *json_body); - for (auto const& h : header_) { - CTL_DBG("header: " << h); - } + if (!metadata_["transform_req"].isNull() && + !metadata_["transform_req"]["chat_completions"].isNull() && + !metadata_["transform_req"]["chat_completions"]["url"].isNull()) { + chat_url_ = + metadata_["transform_req"]["chat_completions"]["url"].asString(); + CTL_INF(chat_url_); + } + + if (!metadata_["header_template"].isNull()) { + header_ = ReplaceHeaderPlaceholders(metadata_["header_template"].asString(), + *json_body); + for (auto const& h : header_) { + CTL_DBG("header: " << h); } } @@ -568,13 +550,8 @@ void RemoteEngine::HandleChatCompletion( if (!chat_req_template_.empty()) { CTL_DBG("Use engine transform request template: " << chat_req_template_); template_str = chat_req_template_; - } - if (model_config->transform_req["chat_completions"] && - model_config->transform_req["chat_completions"]["template"]) { - // Model level overrides engine level - template_str = model_config->transform_req["chat_completions"]["template"] - .as(); - CTL_DBG("Use model transform request template: " << template_str); + } else { + CTL_WRN("Required transform request template"); } // Render with error handling @@ -634,14 +611,8 @@ void RemoteEngine::HandleChatCompletion( CTL_DBG( "Use engine transform response template: " << chat_res_template_); template_str = chat_res_template_; - } - if (model_config->transform_resp["chat_completions"] && - model_config->transform_resp["chat_completions"]["template"]) { - // Model level overrides engine level - template_str = - model_config->transform_resp["chat_completions"]["template"] - .as(); - CTL_DBG("Use model transform request template: " << template_str); + } else { + CTL_WRN("Required transform response template"); } try { @@ -731,25 +702,7 @@ Json::Value RemoteEngine::GetRemoteModels(const std::string& url, const std::string& api_key, const std::string& header_template) { if (url.empty()) { - if (engine_name_ == kAnthropicEngine) { - Json::Value json_resp; - Json::Value model_array(Json::arrayValue); - for (const auto& m : kAnthropicModels) { - Json::Value val; - val["id"] = std::string(m); - val["engine"] = "anthropic"; - val["created"] = "_"; - val["object"] = "model"; - model_array.append(val); - } - - json_resp["object"] = "list"; - json_resp["data"] = model_array; - CTL_INF("Remote models responded"); - return json_resp; - } else { - return Json::Value(); - } + return Json::Value(); } else { auto response = MakeGetModelsRequest(url, api_key, header_template); if (response.error) { @@ -760,9 +713,23 @@ Json::Value RemoteEngine::GetRemoteModels(const std::string& url, } CTL_DBG(response.body); auto body_json = json_helper::ParseJsonString(response.body); - if (body_json.isMember("error")) { + if (body_json.isMember("error") && !body_json["error"].isNull()) { return body_json["error"]; } + + // hardcode for cohere + if (url.find("api.cohere.ai") != std::string::npos) { + if (body_json.isMember("models")) { + for (auto& model : body_json["models"]) { + if (model.isMember("name")) { + model["id"] = model["name"]; + model.removeMember("name"); + } + } + body_json["data"] = body_json["models"]; + body_json.removeMember("models"); + } + } return body_json; } } diff --git a/engine/extensions/remote-engine/remote_engine.h b/engine/extensions/remote-engine/remote_engine.h index 6f1b731c6..27d23e3aa 100644 --- a/engine/extensions/remote-engine/remote_engine.h +++ b/engine/extensions/remote-engine/remote_engine.h @@ -27,6 +27,7 @@ struct StreamContext { bool need_stop = true; std::string last_request; std::string chunks; + CURL* curl; }; struct CurlResponse { std::string body; @@ -40,10 +41,7 @@ class RemoteEngine : public RemoteEngineI { struct ModelConfig { std::string model; std::string version; - std::string api_key; std::string url; - YAML::Node transform_req; - YAML::Node transform_resp; bool is_loaded{false}; }; diff --git a/engine/install.bat b/engine/install.bat index fc2faa4b1..f61f05633 100644 --- a/engine/install.bat +++ b/engine/install.bat @@ -41,11 +41,11 @@ echo %VERSION% :: Get the release if "%VERSION%"=="latest" ( :: If the version is set to "latest", get the latest version number from the cortex-cpp GitHub repository - for /f "delims=" %%i in ('powershell -Command "& {$version = Invoke-RestMethod -Uri 'https://api.github.com/repos/janhq/cortex/releases/latest'; return $version.tag_name.TrimStart('v')}"') do set "VERSION=%%i" + for /f "delims=" %%i in ('powershell -Command "& {$version = Invoke-RestMethod -Uri 'https://api.github.com/repos/menloresearch/cortex/releases/latest'; return $version.tag_name.TrimStart('v')}"') do set "VERSION=%%i" ) :: Construct the download URL -set "URL=https://github.com/janhq/cortex/releases/download/v%VERSION%/cortex-cpp-%VERSION%-win-amd64%AVX%" +set "URL=https://github.com/menloresearch/cortex/releases/download/v%VERSION%/cortex-cpp-%VERSION%-win-amd64%AVX%" if "%GPU%"=="true" ( :: If --gpu option is provided, append -cuda to the URL set "URL=%URL%-cuda" diff --git a/engine/install.sh b/engine/install.sh index ee3499f31..f0356c774 100644 --- a/engine/install.sh +++ b/engine/install.sh @@ -139,7 +139,7 @@ fi # Construct GitHub API URL and get latest version if not specified if [ "$VERSION" == "latest" ]; then - API_URL="https://api.github.com/repos/janhq/cortex/releases/latest" + API_URL="https://api.github.com/repos/menloresearch/cortex/releases/latest" VERSION=$(curl -s $API_URL | jq -r ".tag_name" | sed 's/^v//') fi @@ -167,7 +167,7 @@ case $OS in ;; esac -DOWNLOAD_URL="https://github.com/janhq/cortex/releases/download/v${VERSION}/${FILE_NAME}" +DOWNLOAD_URL="https://github.com/menloresearch/cortex/releases/download/v${VERSION}/${FILE_NAME}" # Check AVX support if [ -z "$AVX" ] && [ "$OS" == "Linux" ]; then diff --git a/engine/main.cc b/engine/main.cc index 2f60916a6..d407726e0 100644 --- a/engine/main.cc +++ b/engine/main.cc @@ -17,6 +17,7 @@ #include "controllers/threads.h" #include "database/database.h" #include "migrations/migration_manager.h" +#include "openssl/ssl.h" #include "repositories/assistant_fs_repository.h" #include "repositories/file_fs_repository.h" #include "repositories/message_fs_repository.h" @@ -249,6 +250,54 @@ void RunServer(std::optional host, std::optional port, .setClientMaxBodySize(256 * 1024 * 1024) // Max 256MiB body size .setClientMaxMemoryBodySize(1024 * 1024); // 1MiB before writing to disk + auto validate_api_key = [config_service](const drogon::HttpRequestPtr& req) { + auto api_keys = config_service->GetApiServerConfiguration()->api_keys; + static const std::unordered_set public_endpoints = { + "/openapi.json", "/healthz", "/processManager/destroy"}; + + // If API key is not set, skip validation + if (api_keys.empty()) { + return true; + } + + // If path is public or is static file, skip validation + if (public_endpoints.find(req->path()) != public_endpoints.end() || + req->path() == "/") { + return true; + } + + // Check for API key in the header + auto auth_header = req->getHeader("Authorization"); + + std::string prefix = "Bearer "; + if (auth_header.substr(0, prefix.size()) == prefix) { + std::string received_api_key = auth_header.substr(prefix.size()); + if (std::find(api_keys.begin(), api_keys.end(), received_api_key) != + api_keys.end()) { + return true; // API key is valid + } + } + + CTL_WRN("Unauthorized: Invalid API Key\n"); + return false; + }; + + drogon::app().registerPreRoutingAdvice( + [&validate_api_key]( + const drogon::HttpRequestPtr& req, + std::function&& cb, + drogon::AdviceChainCallback&& ccb) { + if (!validate_api_key(req)) { + Json::Value ret; + ret["message"] = "Invalid API Key"; + auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret); + resp->setStatusCode(drogon::k401Unauthorized); + cb(resp); + return; + } + ccb(); + }); + // CORS drogon::app().registerPostHandlingAdvice( [config_service](const drogon::HttpRequestPtr& req, @@ -348,6 +397,7 @@ int main(int argc, char* argv[]) { return 1; } + SSL_library_init(); curl_global_init(CURL_GLOBAL_DEFAULT); // avoid printing logs to terminal diff --git a/engine/services/config_service.cc b/engine/services/config_service.cc index ce5526090..ae90e93fb 100644 --- a/engine/services/config_service.cc +++ b/engine/services/config_service.cc @@ -6,10 +6,10 @@ cpp::result ConfigService::UpdateApiServerConfiguration(const Json::Value& json) { auto config = file_manager_utils::GetCortexConfig(); ApiServerConfiguration api_server_config{ - config.enableCors, config.allowedOrigins, config.verifyProxySsl, - config.verifyProxyHostSsl, config.proxyUrl, config.proxyUsername, - config.proxyPassword, config.noProxy, config.verifyPeerSsl, - config.verifyHostSsl, config.huggingFaceToken}; + config.enableCors, config.allowedOrigins, config.verifyProxySsl, + config.verifyProxyHostSsl, config.proxyUrl, config.proxyUsername, + config.proxyPassword, config.noProxy, config.verifyPeerSsl, + config.verifyHostSsl, config.huggingFaceToken, config.apiKeys}; std::vector updated_fields; std::vector invalid_fields; @@ -36,6 +36,7 @@ ConfigService::UpdateApiServerConfiguration(const Json::Value& json) { config.verifyHostSsl = api_server_config.verify_host_ssl; config.huggingFaceToken = api_server_config.hf_token; + config.apiKeys = api_server_config.api_keys; auto result = file_manager_utils::UpdateCortexConfig(config); return api_server_config; @@ -45,8 +46,8 @@ cpp::result ConfigService::GetApiServerConfiguration() { auto config = file_manager_utils::GetCortexConfig(); return ApiServerConfiguration{ - config.enableCors, config.allowedOrigins, config.verifyProxySsl, - config.verifyProxyHostSsl, config.proxyUrl, config.proxyUsername, - config.proxyPassword, config.noProxy, config.verifyPeerSsl, - config.verifyHostSsl, config.huggingFaceToken}; + config.enableCors, config.allowedOrigins, config.verifyProxySsl, + config.verifyProxyHostSsl, config.proxyUrl, config.proxyUsername, + config.proxyPassword, config.noProxy, config.verifyPeerSsl, + config.verifyHostSsl, config.huggingFaceToken, config.apiKeys}; } diff --git a/engine/services/engine_service.cc b/engine/services/engine_service.cc index bdc647905..8fd070bea 100644 --- a/engine/services/engine_service.cc +++ b/engine/services/engine_service.cc @@ -443,7 +443,7 @@ std::string EngineService::GetMatchedVariant( cpp::result, std::string> EngineService::GetEngineReleases(const std::string& engine) const { auto ne = NormalizeEngine(engine); - return github_release_utils::GetReleases("janhq", ne); + return github_release_utils::GetReleases("menloresearch", ne); } cpp::result, std::string> @@ -452,7 +452,7 @@ EngineService::GetEngineVariants(const std::string& engine, bool filter_compatible_only) const { auto ne = NormalizeEngine(engine); auto engine_release = - github_release_utils::GetReleaseByVersion("janhq", ne, version); + github_release_utils::GetReleaseByVersion("menloresearch", ne, version); if (engine_release.has_error()) { return cpp::fail("Failed to get engine release: " + engine_release.error()); @@ -891,7 +891,7 @@ std::vector EngineService::GetLoadedEngines() { cpp::result EngineService::GetLatestEngineVersion(const std::string& engine) const { auto ne = NormalizeEngine(engine); - auto res = github_release_utils::GetReleaseByVersion("janhq", ne, "latest"); + auto res = github_release_utils::GetReleaseByVersion("menloresearch", ne, "latest"); if (res.has_error()) { return cpp::fail("Failed to fetch engine " + engine + " latest version!"); } diff --git a/engine/services/hardware_service.cc b/engine/services/hardware_service.cc index 972647b51..5ca27ddfb 100644 --- a/engine/services/hardware_service.cc +++ b/engine/services/hardware_service.cc @@ -304,7 +304,7 @@ void HardwareService::UpdateHardwareInfos() { }; for (auto const& he : b.value()) { if (!exists(he.uuid)) { - db_service_->DeleteHardwareEntry(he.uuid); + (void)db_service_->DeleteHardwareEntry(he.uuid); } } diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc index c13f7cf19..cc735d006 100644 --- a/engine/services/model_service.cc +++ b/engine/services/model_service.cc @@ -155,8 +155,8 @@ ModelService::ModelService(std::shared_ptr db_service, inference_svc_(inference_service), engine_svc_(engine_svc), task_queue_(task_queue) { - ProcessBgrTasks(); -}; + // ProcessBgrTasks(); + }; void ModelService::ForceIndexingModelList() { CTL_INF("Force indexing model list"); @@ -369,10 +369,6 @@ ModelService::EstimateModel(const std::string& model_handle, CTL_WRN("Error: " + model_entry.error()); return cpp::fail(model_entry.error()); } - auto file_path = fmu::ToAbsoluteCortexDataPath( - fs::path(model_entry.value().path_to_model_yaml)) - .parent_path() / - "model.gguf"; yaml_handler.ModelConfigFromFile( fmu::ToAbsoluteCortexDataPath( fs::path(model_entry.value().path_to_model_yaml)) @@ -389,13 +385,14 @@ ModelService::EstimateModel(const std::string& model_handle, free_vram_MiB = hw_info.ram.available_MiB; #endif - return hardware::EstimateLLaMACppRun(file_path.string(), - {.ngl = mc.ngl, - .ctx_len = mc.ctx_len, - .n_batch = n_batch, - .n_ubatch = n_ubatch, - .kv_cache_type = kv_cache, - .free_vram_MiB = free_vram_MiB}); + return hardware::EstimateLLaMACppRun( + fmu::ToAbsoluteCortexDataPath(fs::path(mc.files[0])).string(), + {.ngl = mc.ngl, + .ctx_len = mc.ctx_len, + .n_batch = n_batch, + .n_ubatch = n_ubatch, + .kv_cache_type = kv_cache, + .free_vram_MiB = free_vram_MiB}); } catch (const std::exception& e) { return cpp::fail("Fail to get model status with ID '" + model_handle + "': " + e.what()); @@ -760,12 +757,17 @@ cpp::result ModelService::DeleteModel( fs::path(model_entry.value().path_to_model_yaml)); yaml_handler.ModelConfigFromFile(yaml_fp.string()); auto mc = yaml_handler.GetModelConfig(); - // Remove yaml files - for (const auto& entry : - std::filesystem::directory_iterator(yaml_fp.parent_path())) { - if (entry.is_regular_file() && (entry.path().extension() == ".yml")) { - std::filesystem::remove(entry); - CTL_INF("Removed: " << entry.path().string()); + if (engine_svc_->IsRemoteEngine(mc.engine)) { + std::filesystem::remove(yaml_fp); + CTL_INF("Removed: " << yaml_fp.string()); + } else { + // Remove yaml files + for (const auto& entry : + std::filesystem::directory_iterator(yaml_fp.parent_path())) { + if (entry.is_regular_file() && (entry.path().extension() == ".yml")) { + std::filesystem::remove(entry); + CTL_INF("Removed: " << entry.path().string()); + } } } @@ -950,10 +952,20 @@ cpp::result ModelService::StartModel( LOG_WARN << "model_path is empty"; return StartModelResult{.success = false}; } + if (!mc.mmproj.empty()) { +#if defined(_WIN32) + json_data["mmproj"] = cortex::wc::WstringToUtf8( + fmu::ToAbsoluteCortexDataPath(fs::path(mc.mmproj)).wstring()); +#else + json_data["mmproj"] = + fmu::ToAbsoluteCortexDataPath(fs::path(mc.mmproj)).string(); +#endif + } json_data["system_prompt"] = mc.system_template; json_data["user_prompt"] = mc.user_template; json_data["ai_prompt"] = mc.ai_template; json_data["ctx_len"] = std::min(kDefautlContextLength, mc.ctx_len); + json_data["max_tokens"] = std::min(kDefautlContextLength, mc.ctx_len); max_model_context_length = mc.ctx_len; } else { bypass_stop_check_set_.insert(model_handle); @@ -978,6 +990,8 @@ cpp::result ModelService::StartModel( if (ctx_len) { json_data["ctx_len"] = std::min(ctx_len.value(), max_model_context_length); + json_data["max_tokens"] = + std::min(ctx_len.value(), max_model_context_length); } CTL_INF(json_data.toStyledString()); auto may_fallback_res = MayFallbackToCpu(json_data["model_path"].asString(), @@ -996,16 +1010,18 @@ cpp::result ModelService::StartModel( auto data = std::get<1>(ir); if (status == drogon::k200OK) { - // start model successfully, we store the metadata so we can use + // start model successfully, in case not vision model, we store the metadata so we can use // for each inference - auto metadata_res = GetModelMetadata(model_handle); - if (metadata_res.has_value()) { - loaded_model_metadata_map_.emplace(model_handle, - std::move(metadata_res.value())); - CTL_INF("Successfully stored metadata for model " << model_handle); - } else { - CTL_WRN("Failed to get metadata for model " << model_handle << ": " - << metadata_res.error()); + if (!json_data.isMember("mmproj") || json_data["mmproj"].isNull()) { + auto metadata_res = GetModelMetadata(model_handle); + if (metadata_res.has_value()) { + loaded_model_metadata_map_.emplace(model_handle, + std::move(metadata_res.value())); + CTL_INF("Successfully stored metadata for model " << model_handle); + } else { + CTL_WRN("Failed to get metadata for model " << model_handle << ": " + << metadata_res.error()); + } } return StartModelResult{.success = true, @@ -1437,5 +1453,5 @@ void ModelService::ProcessBgrTasks() { auto clone = cb; task_queue_.RunInQueue(std::move(cb)); - task_queue_.RunEvery(std::chrono::seconds(10), std::move(clone)); + task_queue_.RunEvery(std::chrono::seconds(60), std::move(clone)); } \ No newline at end of file diff --git a/engine/services/model_source_service.cc b/engine/services/model_source_service.cc index 3314fd53e..ea26718e2 100644 --- a/engine/services/model_source_service.cc +++ b/engine/services/model_source_service.cc @@ -432,9 +432,10 @@ cpp::result ModelSourceService::AddCortexsoRepo( } auto author = hub_author; + auto model_author = hu::GetModelAuthorCortexsoHub(model_name); if (auto model_author = hu::GetModelAuthorCortexsoHub(model_name); - model_author.has_value() && !model_author->empty()) { - author = *model_author; + model_author.has_value() && !model_author.value().empty()) { + author = model_author.value(); } // Get models from db @@ -443,6 +444,10 @@ cpp::result ModelSourceService::AddCortexsoRepo( std::unordered_set updated_model_list; std::vector> tasks; for (auto const& [branch, _] : branches.value()) { + if (!model_author.has_error() && branch == "main") { + CTL_DBG("Skip main branch"); + continue; + } CTL_DBG(branch); tasks.push_back(std::async(std::launch::async, [&, branch = branch] { return AddCortexsoRepoBranch(model_source, author, model_name, branch, diff --git a/engine/templates/linux/install.sh b/engine/templates/linux/install.sh index d5f1393e5..3b2474072 100644 --- a/engine/templates/linux/install.sh +++ b/engine/templates/linux/install.sh @@ -50,10 +50,10 @@ get_latest_version() { local tag_name case $channel in stable) - tag_name=$(curl -s "https://api.github.com/repos/janhq/cortex.cpp/releases/latest" | grep -oP '"tag_name": "\K(.*)(?=")') + tag_name=$(curl -s "https://api.github.com/repos/menloresearch/cortex.cpp/releases/latest" | grep -oP '"tag_name": "\K(.*)(?=")') ;; beta) - tag_name=$(curl -s "https://api.github.com/repos/janhq/cortex.cpp/releases" | jq -r '.[] | select(.prerelease) | .tag_name' | head -n 1) + tag_name=$(curl -s "https://api.github.com/repos/menloresearch/cortex.cpp/releases" | jq -r '.[] | select(.prerelease) | .tag_name' | head -n 1) ;; nightly) tag_name=$(curl -s "https://delta.jan.ai/cortex/latest/version.json" | jq -r '.tag_name') @@ -153,14 +153,14 @@ install_cortex() { case $channel in stable) - url_binary="https://github.com/janhq/cortex.cpp/releases/download/v${version}/cortex-${version}-linux-${ARCH}.tar.gz" - url_deb_local="https://github.com/janhq/cortex.cpp/releases/download/v${version}/cortex-${version}-linux-${ARCH}-local-installer.deb" - url_deb_network="https://github.com/janhq/cortex.cpp/releases/download/v${version}/cortex-${version}-linux-${ARCH}-network-installer.deb" + url_binary="https://github.com/menloresearch/cortex.cpp/releases/download/v${version}/cortex-${version}-linux-${ARCH}.tar.gz" + url_deb_local="https://github.com/menloresearch/cortex.cpp/releases/download/v${version}/cortex-${version}-linux-${ARCH}-local-installer.deb" + url_deb_network="https://github.com/menloresearch/cortex.cpp/releases/download/v${version}/cortex-${version}-linux-${ARCH}-network-installer.deb" ;; beta) - url_binary="https://github.com/janhq/cortex.cpp/releases/download/v${version}/cortex-${version}-linux-${ARCH}.tar.gz" - url_deb_local="https://github.com/janhq/cortex.cpp/releases/download/v${version}/cortex-${version}-linux-${ARCH}-local-installer.deb" - url_deb_network="https://github.com/janhq/cortex.cpp/releases/download/v${version}/cortex-${version}-linux-${ARCH}-network-installer.deb" + url_binary="https://github.com/menloresearch/cortex.cpp/releases/download/v${version}/cortex-${version}-linux-${ARCH}.tar.gz" + url_deb_local="https://github.com/menloresearch/cortex.cpp/releases/download/v${version}/cortex-${version}-linux-${ARCH}-local-installer.deb" + url_deb_network="https://github.com/menloresearch/cortex.cpp/releases/download/v${version}/cortex-${version}-linux-${ARCH}-network-installer.deb" ;; nightly) url_binary="https://delta.jan.ai/cortex/v${version}/linux-${ARCH}/cortex-nightly.tar.gz" diff --git a/engine/test/components/test_github_release_utils.cc b/engine/test/components/test_github_release_utils.cc index 284aed868..ae1e2c7c2 100644 --- a/engine/test/components/test_github_release_utils.cc +++ b/engine/test/components/test_github_release_utils.cc @@ -6,14 +6,14 @@ class GitHubReleaseUtilsTest : public ::testing::Test {}; TEST_F(GitHubReleaseUtilsTest, AbleToGetReleaseByVersion) { auto version{"v0.1.36"}; auto result = github_release_utils::GetReleaseByVersion( - "janhq", "cortex.llamacpp", version); + "menloresearch", "cortex.llamacpp", version); ASSERT_TRUE(result.has_value()); ASSERT_EQ(result->tag_name, version); } TEST_F(GitHubReleaseUtilsTest, AbleToGetReleaseList) { - auto result = github_release_utils::GetReleases("janhq", "cortex.llamacpp"); + auto result = github_release_utils::GetReleases("menloresearch", "cortex.llamacpp"); ASSERT_TRUE(result.has_value()); ASSERT_TRUE(result->size() > 0); diff --git a/engine/utils/config_yaml_utils.cc b/engine/utils/config_yaml_utils.cc index b26d690c6..49b31acd0 100644 --- a/engine/utils/config_yaml_utils.cc +++ b/engine/utils/config_yaml_utils.cc @@ -51,6 +51,7 @@ cpp::result CortexConfigMgr::DumpYamlConfig( node["sslKeyPath"] = config.sslKeyPath; node["supportedEngines"] = config.supportedEngines; node["checkedForSyncHubAt"] = config.checkedForSyncHubAt; + node["apiKeys"] = config.apiKeys; out_file << node; out_file.close(); @@ -87,7 +88,7 @@ CortexConfig CortexConfigMgr::FromYaml(const std::string& path, !node["verifyProxySsl"] || !node["verifyProxyHostSsl"] || !node["supportedEngines"] || !node["sslCertPath"] || !node["sslKeyPath"] || !node["noProxy"] || - !node["checkedForSyncHubAt"]); + !node["checkedForSyncHubAt"] || !node["apiKeys"]); CortexConfig config = { .logFolderPath = node["logFolderPath"] @@ -182,6 +183,11 @@ CortexConfig CortexConfigMgr::FromYaml(const std::string& path, .checkedForSyncHubAt = node["checkedForSyncHubAt"] ? node["checkedForSyncHubAt"].as() : default_cfg.checkedForSyncHubAt, + .apiKeys = + node["apiKeys"] + ? node["apiKeys"].as>() + : default_cfg.apiKeys, + }; if (should_update_config) { l.unlock(); diff --git a/engine/utils/config_yaml_utils.h b/engine/utils/config_yaml_utils.h index 1749cd2d0..c94b8fe5f 100644 --- a/engine/utils/config_yaml_utils.h +++ b/engine/utils/config_yaml_utils.h @@ -68,6 +68,7 @@ struct CortexConfig { std::string sslKeyPath; std::vector supportedEngines; uint64_t checkedForSyncHubAt; + std::vector apiKeys; }; class CortexConfigMgr { diff --git a/engine/utils/curl_utils.cc b/engine/utils/curl_utils.cc index 2481658ad..859c629d1 100644 --- a/engine/utils/curl_utils.cc +++ b/engine/utils/curl_utils.cc @@ -147,6 +147,7 @@ cpp::result SimpleGet(const std::string& url, std::default_delete()); SetUpProxy(curl, url); + curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); curl_easy_setopt(curl, CURLOPT_URL, url.c_str()); curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, CurlResponse::WriteCallback); curl_easy_setopt(curl, CURLOPT_WRITEDATA, response); diff --git a/engine/utils/engine_constants.h b/engine/utils/engine_constants.h index 35368c519..7bacf2249 100644 --- a/engine/utils/engine_constants.h +++ b/engine/utils/engine_constants.h @@ -3,10 +3,6 @@ constexpr const auto kLlamaEngine = "llama-cpp"; constexpr const auto kPythonEngine = "python-engine"; -constexpr const auto kOpenAiEngine = "openai"; -constexpr const auto kAnthropicEngine = "anthropic"; - - constexpr const auto kRemote = "remote"; constexpr const auto kLocal = "local"; diff --git a/engine/utils/file_manager_utils.cc b/engine/utils/file_manager_utils.cc index 743c6a641..79b4e421a 100644 --- a/engine/utils/file_manager_utils.cc +++ b/engine/utils/file_manager_utils.cc @@ -195,6 +195,7 @@ config_yaml_utils::CortexConfig GetDefaultConfig() { .sslKeyPath = "", .supportedEngines = config_yaml_utils::kDefaultSupportedEngines, .checkedForSyncHubAt = 0u, + .apiKeys = {}, }; } diff --git a/engine/utils/huggingface_utils.h b/engine/utils/huggingface_utils.h index fde5d11b2..2c4a3cb75 100644 --- a/engine/utils/huggingface_utils.h +++ b/engine/utils/huggingface_utils.h @@ -312,13 +312,13 @@ inline std::optional GetDefaultBranch( } } -inline std::optional GetModelAuthorCortexsoHub( +inline cpp::result GetModelAuthorCortexsoHub( const std::string& model_name) { try { auto remote_yml = curl_utils::ReadRemoteYaml(GetMetadataUrl(model_name)); if (remote_yml.has_error()) { - return std::nullopt; + return cpp::fail(remote_yml.error()); } auto metadata = remote_yml.value(); @@ -326,9 +326,9 @@ inline std::optional GetModelAuthorCortexsoHub( if (author.IsDefined()) { return author.as(); } - return std::nullopt; + return ""; } catch (const std::exception& e) { - return std::nullopt; + return ""; } } } // namespace huggingface_utils