diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index dc0719f95..356a2a9ca 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -1,52 +1,15 @@
name: CI
on:
- schedule:
- - cron: "0 20 * * *" # At 8 PM UTC, which is 3 AM UTC+7
push:
tags: ["v[0-9]+.[0-9]+.[0-9]+"]
paths:
[
- ".github/scripts/**",
- ".github/workflows/build.yml",
- "**/CMakeLists.txt",
- "**/Makefile",
- "**/*.h",
- "**/*.hpp",
- "**/*.c",
- "**/*.cpp",
- "**/*.cu",
- "**/*.cc",
- "**/*.cxx",
- "llama.cpp",
- "!docs/**",
- "!.gitignore",
- "!README.md",
- ]
- pull_request:
- types: [opened, synchronize, reopened]
- paths:
- [
- ".github/scripts/**",
- ".github/workflows/build.yml",
- "**/CMakeLists.txt",
- "**/Makefile",
- "**/*.h",
- "**/*.hpp",
- "**/*.c",
- "**/*.cpp",
- "**/*.cu",
- "**/*.cc",
- "**/*.cxx",
- "llama.cpp",
- "!docs/**",
- "!.gitignore",
- "!README.md",
+ "cortex-cpp/**",
]
workflow_dispatch:
env:
- BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
LLM_MODEL_URL: https://delta.jan.ai/tinyllama-1.1b-chat-v0.3.Q2_K.gguf
EMBEDDING_MODEL_URL: https://catalog.jan.ai/dist/models/embeds/nomic-embed-text-v1.5.f16.gguf
@@ -76,70 +39,120 @@ jobs:
draft: true
prerelease: false
- # Get the latest version of the release
- set-nitro-version:
- runs-on: ubuntu-latest
- outputs:
- version: ${{ steps.version_update.outputs.new_version }}
- steps:
- - name: Get latest release
- id: version_update
- run: |
- ldd --version
- if [[ ${{ github.event_name }} == push && ${{ github.ref }} == refs/tags/* ]]; then
- echo "VERSION=${GITHUB_REF#refs/tags/}"
- NEW_VERSION="${VERSION#v}"
- echo "::set-output name=new_version::$NEW_VERSION"
- else
- # Function to get the latest release tag
- get_latest_tag() {
- local retries=0
- local max_retries=3
- local tag
- while [ $retries -lt $max_retries ]; do
- tag=$(curl -s https://api.github.com/repos/janhq/cortex/releases/latest | jq -r .tag_name)
- if [ -n "$tag" ] && [ "$tag" != "null" ]; then
- echo $tag
- return
- else
- let retries++
- sleep 2
- fi
- done
- echo "Failed to fetch latest tag after $max_retries attempts."
- exit 1
- }
- # Get the latest release tag from GitHub API
- LATEST_TAG=$(get_latest_tag)
-
- # Remove the 'v' and append the build number to the version
- NEW_VERSION="${LATEST_TAG#v}-${GITHUB_RUN_NUMBER}"
- echo "New version: $NEW_VERSION"
- echo "::set-output name=new_version::$NEW_VERSION"
- fi
- echo "Version: $NEW_VERSION"
-
- ubuntu-amd64-build:
- runs-on: ubuntu-18-04-cuda-11-7
- needs: [create-draft-release, set-nitro-version]
- if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.set-nitro-version.result == 'success'
+ build-and-test:
+ runs-on: ${{ matrix.runs-on }}
+ needs: [create-draft-release]
timeout-minutes: 40
- permissions:
- contents: write
-
strategy:
+ fail-fast: false
matrix:
include:
- - build: "amd64-avx2"
- defines: "-DLLAMA_AVX2=ON -DLLAMA_NATIVE=OFF"
- - build: "amd64-avx"
- defines: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF"
- - build: "amd64-avx512"
- defines: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF"
- - build: "amd64-vulkan"
- defines: "-DLLAMA_VULKAN=ON -DLLAMA_NATIVE=OFF"
- # - build: "arm64"
- # defines: "-A ARM64 -DLLAMA_NATIVE=OFF"
+ - os: "linux"
+ name: "amd64-avx2"
+ runs-on: "ubuntu-18-04"
+ cmake-flags: "-DLLAMA_AVX2=ON -DLLAMA_NATIVE=OFF"
+ run-e2e: true
+
+ - os: "linux"
+ name: "amd64-avx"
+ runs-on: "ubuntu-18-04"
+ cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF"
+ run-e2e: false
+
+ - os: "linux"
+ name: "amd64-avx512"
+ runs-on: "ubuntu-18-04"
+ cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF"
+ run-e2e: false
+
+ - os: "linux"
+ name: "amd64-vulkan"
+ runs-on: "ubuntu-18-04-cuda-11-7"
+ cmake-flags: "-DLLAMA_VULKAN=ON -DLLAMA_NATIVE=OFF"
+ run-e2e: false
+
+ - os: "linux"
+ name: "amd64-cuda-11-7"
+ runs-on: "ubuntu-18-04-cuda-11-7"
+ cmake-flags: "-DCUDA_11_7=ON -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON"
+ run-e2e: false
+
+ - os: "linux"
+ name: "amd64-cuda-12-0"
+ runs-on: "ubuntu-18-04-cuda-12-0"
+ cmake-flags: "-DCUDA_12_0=ON -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON"
+ run-e2e: false
+
+ - os: "mac"
+ name: "amd64"
+ runs-on: "macos-13"
+ cmake-flags: ""
+ run-e2e: true
+
+ - os: "mac"
+ name: "arm64"
+ runs-on: "mac-silicon"
+ cmake-flags: "-DMAC_ARM64=ON"
+ run-e2e: true
+
+ - os: "windows"
+ name: "amd64-avx2"
+ runs-on: "windows-latest"
+ cmake-flags: "-DLLAMA_AVX2=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
+ run-e2e: true
+
+ - os: "windows"
+ name: "amd64-avx"
+ runs-on: "windows-latest"
+ cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
+ run-e2e: false
+
+ - os: "windows"
+ name: "amd64-avx512"
+ runs-on: "windows-latest"
+ cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
+ run-e2e: false
+
+ - os: "windows"
+ name: "amd64-vulkan"
+ runs-on: "windows-latest"
+ cmake-flags: "-DLLAMA_VULKAN=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
+ run-e2e: false
+
+ - os: "windows"
+ name: "amd64-avx2-cuda-12-0"
+ runs-on: "windows-cuda-12-0"
+ cmake-flags: "-DLLAMA_AVX2=ON -DLLAMA_NATIVE=OFF -DCUDA_12_0=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
+ run-e2e: false
+
+ - os: "windows"
+ name: "amd64-avx-cuda-12-0"
+ runs-on: "windows-cuda-12-0"
+ cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DCUDA_12_0=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
+ run-e2e: false
+
+ - os: "windows"
+ name: "amd64-avx512-cuda-12-0"
+ runs-on: "windows-cuda-12-0"
+ cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DCUDA_12_0=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
+ run-e2e: false
+
+ - os: "windows"
+ name: "amd64-avx2-cuda-11-7"
+ runs-on: "windows-cuda-11-7"
+ cmake-flags: "-DLLAMA_AVX2=ON -DLLAMA_NATIVE=OFF -DCUDA_11_7=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
+ run-e2e: false
+
+ - os: "windows"
+ name: "amd64-avx-cuda-11-7"
+ runs-on: "windows-cuda-11-7"
+ cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DCUDA_11_7=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
+ run-e2e: false
+ - os: "windows"
+ name: "amd64-avx512-cuda-11-7"
+ runs-on: "windows-cuda-11-7"
+ cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DCUDA_11_7=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
+ run-e2e: false
steps:
- name: Clone
@@ -148,372 +161,32 @@ jobs:
with:
submodules: recursive
- - name: Prepare Vulkan SDK
- if: ${{ matrix.build == 'amd64-vulkan' }}
- uses: humbletim/setup-vulkan-sdk@v1.2.0
- with:
- vulkan-query-version: 1.3.275.0
- vulkan-components: Vulkan-Headers, Vulkan-Loader
- vulkan-use-cache: true
-
- - name: Build
- id: make_build
+ - name: Install choco on Windows
+ if: runner.os == 'Windows'
run: |
- ldd --version
- cd cortex-cpp
- ./install_deps.sh
- mkdir build && cd build
- cmake ${{ matrix.defines }} -DNITRO_VERSION=${{ needs.set-nitro-version.outputs.version }} ..
- make -j $(nproc)
- ls -la
-
- - name: Package
- shell: bash
- run: |
- mkdir -p cortex-cpp/nitro
- mkdir -p cortex-cpp/nitro/engines/cortex.llamacpp
- cp cortex-cpp/build/nitro cortex-cpp/nitro/
- cp cortex-cpp/build/engines/cortex.llamacpp/libengine.so cortex-cpp/nitro/engines/cortex.llamacpp/
- tar -czvf cortex-cpp/nitro.tar.gz cortex-cpp/nitro
-
- - name: Upload Artifact
- uses: actions/upload-artifact@v2
- if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || github.event_name == 'pull_request'
- with:
- name: nitro-linux-${{ matrix.build }}
- path: ./cortex-cpp/nitro
-
- - name: Run e2e testing - LLama.CPP
- shell: bash
- if: ${{ matrix.build != 'arm64' && matrix.build != 'amd64-vulkan' && matrix.build != 'amd64-avx512' && matrix.build != 'amd64-avx' }}
- run: |
- # run e2e testing
- cd cortex-cpp/nitro
- chmod +x ../../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro ${{ env.LLM_MODEL_URL }} ${{ env.EMBEDDING_MODEL_URL }}
- rm -rf uploads/
-
- - uses: actions/upload-release-asset@v1.0.1
- if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
- env:
- GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- with:
- upload_url: ${{ needs.create-draft-release.outputs.upload_url }}
- asset_path: ./cortex-cpp/nitro.tar.gz
- asset_name: nitro-${{ needs.create-draft-release.outputs.version }}-linux-${{ matrix.build }}.tar.gz
- asset_content_type: application/gzip
-
- ubuntu-amd64-cuda-build:
- runs-on: ubuntu-18-04-cuda-${{ matrix.cuda }}
- needs: [create-draft-release, set-nitro-version]
- if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.set-nitro-version.result == 'success'
- timeout-minutes: 40
- permissions:
- contents: write
- strategy:
- matrix:
- include:
- - cuda: "12-0"
- flags: "-DCUDA_12_0=ON"
- - cuda: "11-7"
- flags: "-DCUDA_11_7=ON"
-
- steps:
- - name: Clone
- id: checkout
- uses: actions/checkout@v3
- with:
- submodules: recursive
+ choco install make -y
- name: Build
- id: make_build
run: |
cd cortex-cpp
- ./install_deps.sh
- mkdir build && cd build
- cmake ${{matrix.flags}} -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON -DNITRO_VERSION=${{ needs.set-nitro-version.outputs.version }} ..
- make -j $(nproc)
- ls -la
+ make build CMAKE_EXTRA_FLAGS="${{ matrix.cmake-flags }}"
- name: Package
- shell: bash
- run: |
- cd cortex-cpp
- mkdir -p nitro
- mkdir -p nitro/engines/cortex.llamacpp
- cp build/nitro nitro/
- cp build/engines/cortex.llamacpp/libengine.so nitro/engines/cortex.llamacpp/
- tar -czvf nitro.tar.gz nitro
-
- - name: Upload Artifact
- uses: actions/upload-artifact@v2
- if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || github.event_name == 'pull_request'
- with:
- name: nitro-linux-amd64-cuda-${{ matrix.cuda }}
- path: ./cortex-cpp/nitro
-
- - name: Run e2e testing - LLama.CPP
- shell: bash
- if: ${{ matrix.cuda != '12-0'}}
- run: |
- # run e2e testing
- cd cortex-cpp/nitro
- chmod +x ../../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro ${{ env.LLM_MODEL_URL }} ${{ env.EMBEDDING_MODEL_URL }}
- rm -rf uploads/
-
- - uses: actions/upload-release-asset@v1.0.1
- if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
- env:
- GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- with:
- upload_url: ${{ needs.create-draft-release.outputs.upload_url }}
- asset_path: ./cortex-cpp/nitro.tar.gz
- asset_name: nitro-${{ needs.create-draft-release.outputs.version }}-linux-amd64-cuda-${{ matrix.cuda }}.tar.gz
- asset_content_type: application/gzip
-
- macOS-silicon-build:
- runs-on: mac-silicon
- needs: [create-draft-release, set-nitro-version]
- if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.set-nitro-version.result == 'success'
- timeout-minutes: 40
- permissions:
- contents: write
- steps:
- - name: Clone
- id: checkout
- uses: actions/checkout@v3
- with:
- submodules: recursive
-
- - name: Build
- id: cmake_build
- run: |
- cd cortex-cpp
- ./install_deps.sh
- mkdir build && cd build
- cmake -DMAC_ARM64=ON -DNITRO_VERSION=${{ needs.set-nitro-version.outputs.version }} ..
- CC=gcc-8 make -j $(sysctl -n hw.ncpu)
- ls -la
-
- - name: Package
- shell: bash
- run: |
- cd cortex-cpp
- mkdir -p nitro
- mkdir -p nitro/engines/cortex.llamacpp
- cp build/nitro nitro/
- cp build/engines/cortex.llamacpp/libengine.dylib nitro/engines/cortex.llamacpp/
-
- - name: Upload Artifact
- uses: actions/upload-artifact@v2
- with:
- name: nitro-mac-arm64
- path: ./cortex-cpp/nitro
-
- - name: Run e2e testing - LLama.CPP
- run: |
- # run e2e testing
- cd cortex-cpp/nitro/
- chmod +x ../../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro ${{ env.LLM_MODEL_URL }} ${{ env.EMBEDDING_MODEL_URL }}
- rm -rf uploads/
-
- macOS-amd64-build:
- runs-on: macos-13
- needs: [create-draft-release, set-nitro-version]
- if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.set-nitro-version.result == 'success'
- timeout-minutes: 40
- permissions:
- contents: write
- steps:
- - name: Clone
- id: checkout
- uses: actions/checkout@v3
- with:
- submodules: recursive
-
- - name: Dependencies
- id: depends
- continue-on-error: true
- run: |
- brew update
- brew install sdl2
-
- - name: Build
- id: cmake_build
- run: |
- cd cortex-cpp
- ./install_deps.sh
- mkdir build && cd build
- cmake -DNITRO_VERSION=${{ needs.set-nitro-version.outputs.version }} ..
- CC=gcc-8 make -j $(sysctl -n hw.ncp)
- ls -la
-
- - name: Package
- shell: bash
- run: |
- cd cortex-cpp
- mkdir -p nitro
- mkdir -p nitro/engines/cortex.llamacpp
- cp build/nitro nitro/
- cp build/engines/cortex.llamacpp/libengine.dylib nitro/engines/cortex.llamacpp/
-
- - name: Upload Artifact
- uses: actions/upload-artifact@v2
- with:
- name: nitro-mac-amd64
- path: ./cortex-cpp/nitro
-
- - name: Run e2e testing - LLama.CPP
- shell: bash
- run: |
- # run e2e testing
- cd cortex-cpp
- cd nitro
- chmod +x ../../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro ${{ env.LLM_MODEL_URL }} ${{ env.EMBEDDING_MODEL_URL }}
- rm -rf uploads/
-
- universal-nitro-artifact-macos:
- runs-on: macos-latest
- needs: [create-draft-release, set-nitro-version, macOS-silicon-build, macOS-amd64-build]
- if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.set-nitro-version.result == 'success'
- timeout-minutes: 40
- permissions:
- contents: write
- steps:
- - name: download artifact amd64
- uses: actions/download-artifact@v2
- with:
- name: nitro-mac-amd64
- path: ./cortex-cpp/nitro-mac-amd64
-
- - name: download artifact arm64
- uses: actions/download-artifact@v2
- with:
- name: nitro-mac-arm64
- path: ./cortex-cpp/nitro-mac-arm64
-
- - name: bundle universal binary
- run: |
- cd cortex-cpp
- mkdir -p nitro
- mkdir -p nitro/engines/cortex.llamacpp/
- ls ./nitro-mac-amd64
- lipo -create ./nitro-mac-amd64/nitro ./nitro-mac-arm64/nitro -output ./nitro/nitro
- lipo -create ./nitro-mac-amd64/engines/cortex.llamacpp/libengine.dylib ./nitro-mac-arm64/engines/cortex.llamacpp/libengine.dylib -output ./nitro/engines/cortex.llamacpp/libengine.dylib
- tar -czvf nitro.tar.gz nitro
-
- - name: Upload Artifact
- uses: actions/upload-artifact@v2
- with:
- name: nitro-mac-universal
- path: ./cortex-cpp/nitro
-
- - uses: actions/upload-release-asset@v1.0.1
- if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
- env:
- GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- with:
- upload_url: ${{ needs.create-draft-release.outputs.upload_url }}
- asset_path: ./cortex-cpp/nitro.tar.gz
- asset_name: nitro-${{ needs.create-draft-release.outputs.version }}-mac-universal.tar.gz
- asset_content_type: application/gzip
-
- windows-amd64-build:
- runs-on: windows-latest
- needs: [create-draft-release, set-nitro-version]
- if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.set-nitro-version.result == 'success'
- timeout-minutes: 40
-
- strategy:
- matrix:
- include:
- - build: "amd64-avx2"
- defines: "-DLLAMA_AVX2=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
- - build: "amd64-avx"
- defines: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
- - build: "amd64-avx512"
- defines: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
- - build: "amd64-vulkan"
- defines: "-DLLAMA_VULKAN=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
- # - build: "arm64"
- # defines: "-A ARM64 -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON"
-
- permissions:
- contents: write
-
- steps:
- - name: Clone
-
- id: checkout
- uses: actions/checkout@v3
- with:
- submodules: recursive
-
- - name: Setup VSWhere.exe
- uses: warrenbuckley/Setup-VSWhere@v1
- with:
- version: latest
- silent: true
- env:
- ACTIONS_ALLOW_UNSECURE_COMMANDS: true
-
- - name: Add msbuild to PATH
- uses: microsoft/setup-msbuild@v1
-
- - name: actions-setup-cmake
- uses: jwlawson/actions-setup-cmake@v1.14.1
-
- - name: Prepare Vulkan SDK
- uses: humbletim/setup-vulkan-sdk@v1.2.0
- if: ${{ matrix.build == 'amd64-vulkan' }}
- with:
- vulkan-query-version: 1.3.275.0
- vulkan-components: Vulkan-Headers, Vulkan-Loader
- vulkan-use-cache: true
-
- - name: Build
- id: cmake_build
- shell: cmd
run: |
cd cortex-cpp
- cmake -S ./nitro_deps -B ./build_deps/nitro_deps
- cmake --build ./build_deps/nitro_deps --config Release
- mkdir -p build
- cd build
- cmake .. ${{ matrix.defines }} -DNITRO_VERSION=${{ needs.set-nitro-version.outputs.version }}
- cmake --build . --config Release -j "%NUMBER_OF_PROCESSORS%"
+ make package
- - name: Pack artifacts
- id: pack_artifacts
- shell: cmd
+ - name: Run e2e testing
+ if: ${{ matrix.run-e2e }}
run: |
cd cortex-cpp
- mkdir .\build\Release\engines\cortex.llamacpp\
- robocopy .\build\engines\cortex.llamacpp\ .\build\Release\engines\cortex.llamacpp\ engine.dll
- robocopy build_deps\_install\bin\ .\build\Release\ zlib.dll
- robocopy build\bin\Release\ .\build\Release\ llama.dll
- robocopy ..\.github\patches\windows\ .\build\Release\ msvcp140.dll
- robocopy ..\.github\patches\windows\ .\build\Release\ vcruntime140_1.dll
- robocopy ..\.github\patches\windows\ .\build\Release\ vcruntime140.dll
- dotnet tool install --global AzureSignTool
- azuresigntool.exe sign -kvu "${{ secrets.AZURE_KEY_VAULT_URI }}" -kvi "${{ secrets.AZURE_CLIENT_ID }}" -kvt "${{ secrets.AZURE_TENANT_ID }}" -kvs "${{ secrets.AZURE_CLIENT_SECRET }}" -kvc ${{ secrets.AZURE_CERT_NAME }} -tr http://timestamp.globalsign.com/tsa/r6advanced1 -v ".\build\Release\nitro.exe"
- 7z a -ttar temp.tar .\build\Release\*
- 7z a -tgzip nitro.tar.gz temp.tar
-
- - name: Run e2e testing - Llama.cpp
- shell: cmd
- if: ${{ matrix.build != 'arm64' && matrix.build != 'amd64-vulkan' && matrix.build != 'amd64-avx512' }}
- run: |
- cd cortex-cpp
- cd build\Release
- ..\..\..\.github\scripts\e2e-test-llama-windows.bat nitro.exe ${{ env.LLM_MODEL_URL }} ${{ env.EMBEDDING_MODEL_URL }}
- rmdir /S /Q .\build\Release\uploads
+ make run-e2e-test RUN_TESTS=true LLM_MODEL_URL=${{ env.LLM_MODEL_URL }} EMBEDDING_MODEL_URL=${{ env.EMBEDDING_MODEL_URL }}
- name: Upload Artifact
uses: actions/upload-artifact@v2
- if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || github.event_name == 'pull_request'
with:
- name: nitro-win-${{ matrix.build }}
- path: ./cortex-cpp/build/Release
+ name: cortex-llamacpp-engine-${{ matrix.os }}-${{ matrix.name }}
+ path: ./cortex-cpp/cortex
- uses: actions/upload-release-asset@v1.0.1
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
@@ -521,197 +194,6 @@ jobs:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
upload_url: ${{ needs.create-draft-release.outputs.upload_url }}
- asset_path: ./cortex-cpp/nitro.tar.gz
- asset_name: nitro-${{ needs.create-draft-release.outputs.version }}-win-${{ matrix.build }}.tar.gz
- asset_content_type: application/gzip
-
- windows-amd64-cuda-build:
- runs-on: windows-cuda-${{ matrix.cuda }}
- needs: [create-draft-release, set-nitro-version]
- if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.set-nitro-version.result == 'success'
- timeout-minutes: 40
- permissions:
- contents: write
-
- strategy:
- matrix:
- include:
- - cuda: "12-0"
- instructions: "amd64-avx2"
- inst-flags: "-DLLAMA_AVX2=ON -DLLAMA_NATIVE=OFF"
- cmake-flags: "-DCUDA_12_0=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
- - cuda: "12-0"
- instructions: "amd64-avx"
- inst-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF"
- cmake-flags: "-DCUDA_12_0=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
- - cuda: "12-0"
- instructions: "amd64-avx512"
- inst-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF"
- cmake-flags: "-DCUDA_12_0=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
- - cuda: "11-7"
- instructions: "amd64-avx2"
- inst-flags: "-DLLAMA_AVX2=ON -DLLAMA_NATIVE=OFF"
- cmake-flags: "-DCUDA_11_7=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
- - cuda: "11-7"
- instructions: "amd64-avx"
- inst-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF"
- cmake-flags: "-DCUDA_11_7=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
- - cuda: "11-7"
- instructions: "amd64-avx512"
- inst-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF"
- cmake-flags: "-DCUDA_11_7=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
-
- steps:
- - name: Clone
- id: checkout
- uses: actions/checkout@v3
- with:
- submodules: recursive
-
- - uses: actions/setup-dotnet@v3
- with:
- dotnet-version: "6.0.x"
-
- - name: Build
- id: cmake_build
- shell: cmd
- run: |
- cd cortex-cpp
- cmake -S ./nitro_deps -B ./build_deps/nitro_deps
- cmake --build ./build_deps/nitro_deps --config Release
- mkdir -p build
- cd build
- cmake .. ${{ matrix.inst-flags }} ${{ matrix.cmake-flags }} -DNITRO_VERSION=${{ needs.set-nitro-version.outputs.version }}
- cmake --build . --config Release -j "%NUMBER_OF_PROCESSORS%"
-
- - name: Pack artifacts
- id: pack_artifacts
- shell: cmd
- run: |
- set PATH=%PATH%;C:\Program Files\7-Zip\
- cd cortex-cpp
- mkdir .\build\Release\engines\cortex.llamacpp\
- robocopy .\build\engines\cortex.llamacpp\ .\build\Release\engines\cortex.llamacpp\ engine.dll
- robocopy build_deps\_install\bin\ .\build\Release\ zlib.dll
- robocopy build\bin\Release\ .\build\Release\ llama.dll
- robocopy ..\.github\patches\windows\ .\build\Release\ msvcp140.dll
- robocopy ..\.github\patches\windows\ .\build\Release\ vcruntime140_1.dll
- robocopy ..\.github\patches\windows\ .\build\Release\ vcruntime140.dll
- dotnet tool install --global AzureSignTool
- %USERPROFILE%\.dotnet\tools\azuresigntool.exe sign -kvu "${{ secrets.AZURE_KEY_VAULT_URI }}" -kvi "${{ secrets.AZURE_CLIENT_ID }}" -kvt "${{ secrets.AZURE_TENANT_ID }}" -kvs "${{ secrets.AZURE_CLIENT_SECRET }}" -kvc ${{ secrets.AZURE_CERT_NAME }} -tr http://timestamp.globalsign.com/tsa/r6advanced1 -v ".\build\Release\nitro.exe"
- 7z a -ttar temp.tar .\build\Release\*
- 7z a -tgzip nitro.tar.gz temp.tar
-
- - name: Upload Artifact
- uses: actions/upload-artifact@v2
- if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || github.event_name == 'pull_request'
- with:
- name: nitro-win-${{ matrix.instructions }}-cuda-${{ matrix.cuda }}
- path: ./cortex-cpp/build/Release
-
- - uses: actions/upload-release-asset@v1.0.1
- if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
- env:
- GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- with:
- upload_url: ${{ needs.create-draft-release.outputs.upload_url }}
- asset_path: ./cortex-cpp/nitro.tar.gz
- asset_name: nitro-${{ needs.create-draft-release.outputs.version }}-win-${{ matrix.instructions }}-cuda-${{ matrix.cuda }}.tar.gz
- asset_content_type: application/gzip
-
- update_release_draft:
- if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
- timeout-minutes: 40
- needs:
- [
- ubuntu-amd64-build,
- ubuntu-amd64-cuda-build,
- macOS-silicon-build,
- macOS-amd64-build,
- windows-amd64-build,
- windows-amd64-cuda-build,
- ]
- permissions:
- contents: write
- pull-requests: write
- runs-on: ubuntu-latest
- steps:
- - uses: release-drafter/release-drafter@v5
- env:
- GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-
- noti-discord-nightly:
- timeout-minutes: 40
- if: github.event_name == 'schedule' && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.ubuntu-amd64-build.result == 'success' && needs.ubuntu-amd64-cuda-build.result == 'success' && needs.macOS-silicon-build.result == 'success' && needs.macOS-amd64-build.result == 'success' && needs.windows-amd64-build.result == 'success' && needs.windows-amd64-cuda-build.result == 'success'
- needs:
- [
- create-draft-release,
- ubuntu-amd64-build,
- ubuntu-amd64-cuda-build,
- macOS-silicon-build,
- macOS-amd64-build,
- windows-amd64-build,
- windows-amd64-cuda-build,
- ]
- runs-on: ubuntu-latest
- steps:
- - name: Checkout code
- uses: actions/checkout@v3
- with:
- fetch-depth: "0"
- token: ${{ secrets.PAT_SERVICE_ACCOUNT }}
- - name: Notify Discord
- uses: Ilshidur/action-discord@master
- with:
- args: "Nightly build artifact: https://github.com/janhq/nitro/actions/runs/{{ GITHUB_RUN_ID }}"
- env:
- DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK }}
- - name: Update README.md with artifact URL
- run: |
- sed -i "s|||" README.md
- git config --global user.email "service@jan.ai"
- git config --global user.name "Service Account"
- git add README.md
- git commit -m "${GITHUB_REPOSITORY}: Update README.md with nightly build artifact URL"
- git -c http.extraheader="AUTHORIZATION: bearer ${{ secrets.PAT_SERVICE_ACCOUNT }}" push origin HEAD:main
- env:
- GITHUB_RUN_ID: ${{ github.run_id }}
-
- noti-discord-manual:
- timeout-minutes: 40
- if: github.event_name == 'workflow_dispatch' && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.ubuntu-amd64-build.result == 'success' && needs.ubuntu-amd64-cuda-build.result == 'success' && needs.macOS-silicon-build.result == 'success' && needs.macOS-amd64-build.result == 'success' && needs.windows-amd64-build.result == 'success' && needs.windows-amd64-cuda-build.result == 'success'
- needs:
- [
- create-draft-release,
- ubuntu-amd64-build,
- ubuntu-amd64-cuda-build,
- macOS-silicon-build,
- macOS-amd64-build,
- windows-amd64-build,
- windows-amd64-cuda-build,
- ]
- runs-on: ubuntu-latest
- steps:
- - name: Checkout code
- uses: actions/checkout@v3
- with:
- fetch-depth: "0"
- token: ${{ secrets.PAT_SERVICE_ACCOUNT }}
- - name: Notify Discord
- uses: Ilshidur/action-discord@master
- with:
- args: "Manual build artifact: https://github.com/janhq/nitro/actions/runs/{{ GITHUB_RUN_ID }}"
- env:
- DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK }}
- # Update README.md with artifact URL if manual build from main branch
- - name: Update README.md with artifact URL
- if: github.ref == 'refs/heads/main'
- run: |
- sed -i "s|||" README.md
- git config --global user.email "service@jan.ai"
- git config --global user.name "Service Account"
- git add README.md
- git commit -m "${GITHUB_REPOSITORY}: Update README.md with nightly build artifact URL"
- git -c http.extraheader="AUTHORIZATION: bearer ${{ secrets.PAT_SERVICE_ACCOUNT }}" push origin HEAD:main
- env:
- GITHUB_RUN_ID: ${{ github.run_id }}
+ asset_path: ./cortex-cpp/cortex.tar.gz
+ asset_name: cortex-llamacpp-engine-${{ needs.create-draft-release.outputs.version }}-${{ matrix.os }}-${{ matrix.name }}.tar.gz
+ asset_content_type: application/gzip
\ No newline at end of file
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
deleted file mode 100644
index 75d46cb03..000000000
--- a/.github/workflows/docs.yml
+++ /dev/null
@@ -1,95 +0,0 @@
-name: Nitro Docs
-
-on:
- push:
- branches:
- - main
- paths:
- - 'docs/**'
- - '.github/workflows/docs.yml'
- pull_request:
- branches:
- - main
- paths:
- - 'docs/**'
- - '.github/workflows/docs.yml'
- # Review gh actions docs if you want to further define triggers, paths, etc
- # https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#on
-
-jobs:
- deploy:
- name: Deploy to GitHub Pages
- env:
- CLOUDFLARE_ACCOUNT_ID: 9707100ef42a1a25bd70e3ee2137bd0e
- CLOUDFLARE_PROJECT_NAME: nitro
- runs-on: ubuntu-latest
- steps:
- - uses: actions/checkout@v3
- - uses: actions/setup-node@v3
- with:
- node-version: 18
-
- - name: Install jq
- uses: dcarbone/install-jq-action@v2.0.1
-
- - name: Fill env vars
- run: |
- env_example_file=".env.example"
- touch .env
- while IFS= read -r line || [[ -n "$line" ]]; do
- if [[ "$line" == *"="* ]]; then
- var_name=$(echo $line | cut -d '=' -f 1)
- echo $var_name
- var_value="$(jq -r --arg key "$var_name" '.[$key]' <<< "$SECRETS")"
- echo "$var_name=$var_value" >> .env
- fi
- done < "$env_example_file"
- working-directory: docs
- env:
- SECRETS: '${{ toJson(secrets) }}'
-
- - name: Install dependencies
- run: yarn install
- working-directory: docs
- - name: Build website
- run: sed -i '/process.env.DEBUG = namespaces;/c\// process.env.DEBUG = namespaces;' ./node_modules/debug/src/node.js && yarn build
- working-directory: docs
-
- - name: Publish to Cloudflare Pages PR Preview and Staging
- if: (github.event_name == 'push' && github.ref == 'refs/heads/main') || (github.event_name == 'pull_request' && github.event.pull_request.base.ref == 'main')
- uses: cloudflare/pages-action@v1
- with:
- apiToken: ${{ secrets.CLOUDFLARE_API_TOKEN }}
- accountId: ${{ env.CLOUDFLARE_ACCOUNT_ID }}
- projectName: ${{ env.CLOUDFLARE_PROJECT_NAME }}
- directory: ./docs/build
- # Optional: Enable this if you want to have GitHub Deployments triggered
- gitHubToken: ${{ secrets.GITHUB_TOKEN }}
- id: deployCloudflarePages
-
- - uses: mshick/add-pr-comment@v2
- if: github.event_name == 'pull_request' && github.event.pull_request.base.ref == 'main'
- with:
- message: |
- Preview URL: ${{ steps.deployCloudflarePages.outputs.url }}
-
- - name: Add Custome Domain file
- if: github.event_name == 'push' && github.event.pull_request.head.repo.full_name != github.repository
- run: echo "${{ vars.DOCUSAURUS_DOMAIN }}" > ./docs/build/CNAME
-
- # Popular action to deploy to GitHub Pages:
- # Docs: https://github.com/peaceiris/actions-gh-pages#%EF%B8%8F-docusaurus
- - name: Deploy to GitHub Pages
- if: github.event_name == 'push' && github.event.pull_request.head.repo.full_name != github.repository
- uses: peaceiris/actions-gh-pages@v3
- with:
- github_token: ${{ secrets.GITHUB_TOKEN }}
- # Build output to publish to the `gh-pages` branch:
- publish_dir: ./docs/build
- # The following lines assign commit authorship to the official
- # GH-Actions bot for deploys to `gh-pages` branch:
- # https://github.com/actions/checkout/issues/13#issuecomment-724415212
- # The GH actions bot is used by default if you didn't specify the two fields.
- # You can swap them out with your own user credentials.
- user_name: github-actions[bot]
- user_email: 41898282+github-actions[bot]@users.noreply.github.com
\ No newline at end of file
diff --git a/.github/workflows/quality-gate.yml b/.github/workflows/quality-gate.yml
new file mode 100644
index 000000000..82930e0ed
--- /dev/null
+++ b/.github/workflows/quality-gate.yml
@@ -0,0 +1,163 @@
+name: CI Quality Gate
+
+on:
+ pull_request:
+ types: [opened, synchronize, reopened]
+ paths:
+ [
+ "cortex-cpp/**",
+ ]
+ workflow_dispatch:
+
+env:
+ LLM_MODEL_URL: https://delta.jan.ai/tinyllama-1.1b-chat-v0.3.Q2_K.gguf
+ EMBEDDING_MODEL_URL: https://catalog.jan.ai/dist/models/embeds/nomic-embed-text-v1.5.f16.gguf
+
+jobs:
+ build-and-test:
+ runs-on: ${{ matrix.runs-on }}
+ timeout-minutes: 40
+ strategy:
+ fail-fast: false
+ matrix:
+ include:
+ - os: "linux"
+ name: "amd64-avx2"
+ runs-on: "ubuntu-18-04"
+ cmake-flags: "-DLLAMA_AVX2=ON -DLLAMA_NATIVE=OFF"
+ run-e2e: true
+
+ - os: "linux"
+ name: "amd64-avx"
+ runs-on: "ubuntu-18-04"
+ cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF"
+ run-e2e: false
+
+ - os: "linux"
+ name: "amd64-avx512"
+ runs-on: "ubuntu-18-04"
+ cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF"
+ run-e2e: false
+
+ - os: "linux"
+ name: "amd64-vulkan"
+ runs-on: "ubuntu-18-04-cuda-11-7"
+ cmake-flags: "-DLLAMA_VULKAN=ON -DLLAMA_NATIVE=OFF"
+ run-e2e: false
+
+ - os: "linux"
+ name: "amd64-cuda-11-7"
+ runs-on: "ubuntu-18-04-cuda-11-7"
+ cmake-flags: "-DCUDA_11_7=ON -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON"
+ run-e2e: false
+
+ - os: "linux"
+ name: "amd64-cuda-12-0"
+ runs-on: "ubuntu-18-04-cuda-12-0"
+ cmake-flags: "-DCUDA_12_0=ON -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON"
+ run-e2e: false
+
+ - os: "mac"
+ name: "amd64"
+ runs-on: "macos-13"
+ cmake-flags: ""
+ run-e2e: true
+
+ - os: "mac"
+ name: "arm64"
+ runs-on: "mac-silicon"
+ cmake-flags: "-DMAC_ARM64=ON"
+ run-e2e: true
+
+ - os: "windows"
+ name: "amd64-avx2"
+ runs-on: "windows-latest"
+ cmake-flags: "-DLLAMA_AVX2=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
+ run-e2e: true
+
+ - os: "windows"
+ name: "amd64-avx"
+ runs-on: "windows-latest"
+ cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
+ run-e2e: false
+
+ - os: "windows"
+ name: "amd64-avx512"
+ runs-on: "windows-latest"
+ cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
+ run-e2e: false
+
+ - os: "windows"
+ name: "amd64-vulkan"
+ runs-on: "windows-latest"
+ cmake-flags: "-DLLAMA_VULKAN=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
+ run-e2e: false
+
+ - os: "windows"
+ name: "amd64-avx2-cuda-12-0"
+ runs-on: "windows-cuda-12-0"
+ cmake-flags: "-DLLAMA_AVX2=ON -DLLAMA_NATIVE=OFF -DCUDA_12_0=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
+ run-e2e: false
+
+ - os: "windows"
+ name: "amd64-avx-cuda-12-0"
+ runs-on: "windows-cuda-12-0"
+ cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DCUDA_12_0=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
+ run-e2e: false
+
+ - os: "windows"
+ name: "amd64-avx512-cuda-12-0"
+ runs-on: "windows-cuda-12-0"
+ cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DCUDA_12_0=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
+ run-e2e: false
+
+ - os: "windows"
+ name: "amd64-avx2-cuda-11-7"
+ runs-on: "windows-cuda-11-7"
+ cmake-flags: "-DLLAMA_AVX2=ON -DLLAMA_NATIVE=OFF -DCUDA_11_7=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
+ run-e2e: false
+
+ - os: "windows"
+ name: "amd64-avx-cuda-11-7"
+ runs-on: "windows-cuda-11-7"
+ cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DCUDA_11_7=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
+ run-e2e: false
+ - os: "windows"
+ name: "amd64-avx512-cuda-11-7"
+ runs-on: "windows-cuda-11-7"
+ cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DCUDA_11_7=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
+ run-e2e: false
+
+ steps:
+ - name: Clone
+ id: checkout
+ uses: actions/checkout@v3
+ with:
+ submodules: recursive
+
+ - name: Install choco on Windows
+ if: runner.os == 'Windows'
+ run: |
+ choco install make -y
+
+ - name: Build
+ run: |
+ cd cortex-cpp
+ make build CMAKE_EXTRA_FLAGS="${{ matrix.cmake-flags }}"
+
+ - name: Package
+ run: |
+ cd cortex-cpp
+ make package
+
+ - name: Run e2e testing
+ if: ${{ matrix.run-e2e }}
+ run: |
+ cd cortex-cpp
+ make run-e2e-test RUN_TESTS=true LLM_MODEL_URL=${{ env.LLM_MODEL_URL }} EMBEDDING_MODEL_URL=${{ env.EMBEDDING_MODEL_URL }}
+
+ - name: Upload Artifact
+ uses: actions/upload-artifact@v2
+ with:
+ name: cortex-llamacpp-engine-${{ matrix.os }}-${{ matrix.name }}
+ path: ./cortex-cpp/cortex
\ No newline at end of file
diff --git a/cortex-cpp/.gitignore b/cortex-cpp/.gitignore
index be1237faa..69c167305 100644
--- a/cortex-cpp/.gitignore
+++ b/cortex-cpp/.gitignore
@@ -85,7 +85,6 @@ CMakeCache.txt
CMakeFiles
CMakeScripts
Testing
-Makefile
!nitro-node/Makefile
cmake_install.cmake
install_manifest.txt
diff --git a/cortex-cpp/Makefile b/cortex-cpp/Makefile
new file mode 100644
index 000000000..960bb198a
--- /dev/null
+++ b/cortex-cpp/Makefile
@@ -0,0 +1,68 @@
+# Makefile for Cortex llamacpp engine - Build, Lint, Test, and Clean
+.PHONY: all build package run-e2e-test
+
+
+CMAKE_EXTRA_FLAGS ?= ""
+RUN_TESTS ?= false
+LLM_MODEL_URL ?= "https://delta.jan.ai/tinyllama-1.1b-chat-v0.3.Q2_K.gguf"
+EMBEDDING_MODEL_URL ?= "https://catalog.jan.ai/dist/models/embeds/nomic-embed-text-v1.5.f16.gguf"
+
+# Default target, does nothing
+all:
+ @echo "Specify a target to run"
+
+# Build the Cortex engine
+build:
+ifeq ($(OS),Windows_NT)
+ @powershell -Command "cmake -S ./nitro_deps -B ./build_deps/nitro_deps;"
+ @powershell -Command "cmake --build ./build_deps/nitro_deps --config Release -j4;"
+ @powershell -Command "mkdir -p build; cd build; cmake .. $(CMAKE_EXTRA_FLAGS); cmake --build . --config Release -j4;"
+else ifeq ($(shell uname -s),Linux)
+ @./install_deps.sh;
+ @mkdir -p build && cd build; \
+ cmake .. $(CMAKE_EXTRA_FLAGS); \
+ make -j4;
+else
+ @./install_deps.sh;
+ @mkdir -p build && cd build; \
+ cmake .. $(CMAKE_EXTRA_FLAGS); \
+ make -j4;
+endif
+
+package:
+ifeq ($(OS),Windows_NT)
+ @powershell -Command "mkdir -p cortex\engines\cortex.llamacpp\; cp build\engines\cortex.llamacpp\engine.dll cortex\engines\cortex.llamacpp\;"
+ @powershell -Command "cp build\Release\nitro.exe .\cortex\;"
+ @powershell -Command "cp build_deps\_install\bin\zlib.dll .\cortex\;"
+ @powershell -Command "cp ..\.github\patches\windows\msvcp140.dll .\cortex\;"
+ @powershell -Command "cp ..\.github\patches\windows\vcruntime140_1.dll .\cortex\;"
+ @powershell -Command "cp ..\.github\patches\windows\vcruntime140.dll .\cortex\;"
+ @powershell -Command "7z a -ttar temp.tar cortex\\*; 7z a -tgzip cortex.tar.gz temp.tar;"
+else ifeq ($(shell uname -s),Linux)
+ @mkdir -p cortex/engines/cortex.llamacpp; \
+ cp build/engines/cortex.llamacpp/libengine.so cortex/engines/cortex.llamacpp/; \
+ cp build/nitro cortex/; \
+ tar -czvf cortex.tar.gz cortex;
+else
+ @mkdir -p cortex/engines/cortex.llamacpp; \
+ cp build/engines/cortex.llamacpp/libengine.dylib cortex/engines/cortex.llamacpp/; \
+ cp build/nitro cortex/; \
+ tar -czvf cortex.llamacpp.tar.gz cortex;
+endif
+
+run-e2e-test:
+ifeq ($(RUN_TESTS),false)
+ @echo "Skipping tests"
+ @exit 0
+endif
+ifeq ($(OS),Windows_NT)
+ @powershell -Command "cd cortex; ..\..\.github\scripts\e2e-test-llama-windows.bat nitro.exe $(LLM_MODEL_URL) $(EMBEDDING_MODEL_URL);"
+else ifeq ($(shell uname -s),Linux)
+ @cd cortex; \
+ chmod +x ../../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro $(LLM_MODEL_URL) $(EMBEDDING_MODEL_URL); \
+ rm -rf uploads/;
+else
+ @cd cortex; \
+ chmod +x ../../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro $(LLM_MODEL_URL) $(EMBEDDING_MODEL_URL); \
+ rm -rf uploads/;
+endif
\ No newline at end of file