Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 12 additions & 6 deletions .github/scripts/e2e-test-llama-linux-and-mac.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ range=$((max - min + 1))
PORT=$((RANDOM % range + min))

# Start the binary file
"$BINARY_PATH" 1 127.0.0.1 $PORT >/tmp/nitro.log 2>&1 &
"$BINARY_PATH" 1 127.0.0.1 $PORT >/tmp/nitro.log &

# Get the process id of the binary file
pid=$!
Expand All @@ -37,21 +37,27 @@ sleep 5

# Check if /tmp/testllm exists, if not, download it
if [[ ! -f "/tmp/testllm" ]]; then
wget $DOWNLOAD_URL -O /tmp/testllm
curl --connect-timeout 300 $DOWNLOAD_URL --output /tmp/testllm
fi

# Run the curl commands
response1=$(curl -o /tmp/response1.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/llamacpp/loadModel" \
response1=$(curl --connect-timeout 60 -o /tmp/response1.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/llamacpp/loadModel" \
--header 'Content-Type: application/json' \
--data '{
"llama_model_path": "/tmp/testllm",
"ctx_len": 50,
"ngl": 32,
"embedding": false
}' 2>&1)
}')

if ! ps -p $pid >/dev/null; then
echo "nitro failed to load model. Logs:"
cat /tmp/nitro.log
exit 1
fi

response2=$(
curl -o /tmp/response2.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/v1/chat/completions" \
curl --connect-timeout 60 -o /tmp/response2.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/v1/chat/completions" \
--header 'Content-Type: application/json' \
--header 'Accept: text/event-stream' \
--header 'Access-Control-Allow-Origin: *' \
Expand All @@ -67,7 +73,7 @@ response2=$(
"frequency_penalty": 0,
"presence_penalty": 0,
"temperature": 0.1
}' 2>&1
}'
)

error_occurred=0
Expand Down
2 changes: 1 addition & 1 deletion .github/scripts/e2e-test-llama-windows.bat
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ rem Wait for a few seconds to let the server start

rem Check if %TEMP%\testmodel exists, if not, download it
if not exist "%MODEL_PATH%" (
bitsadmin.exe /transfer "DownloadTestModel" %DOWNLOAD_URL% "%MODEL_PATH%"
curl.exe --connect-timeout 300 %DOWNLOAD_URL% --output "%MODEL_PATH%"
)

rem Define JSON strings for curl data
Expand Down
12 changes: 6 additions & 6 deletions .github/scripts/e2e-test-whisper-linux-and-mac.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ range=$((max - min + 1))
PORT=$((RANDOM % range + min))

# Start the binary file
"$BINARY_PATH" 1 127.0.0.1 $PORT >/tmp/nitro.log 2>&1 &
"$BINARY_PATH" 1 127.0.0.1 $PORT >/tmp/nitro.log &

# Get the process id of the binary file
pid=$!
Expand All @@ -37,25 +37,25 @@ sleep 5

# Check if /tmp/testwhisper exists, if not, download it
if [[ ! -f "/tmp/testwhisper" ]]; then
wget $DOWNLOAD_URL -O /tmp/testwhisper
curl --connect-timeout 300 $DOWNLOAD_URL --output /tmp/testwhisper
fi

# Run the curl commands
response1=$(curl -o /tmp/response1.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/v1/audio/load_model" \
response1=$(curl --connect-timeout 60 -o /tmp/response1.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/v1/audio/load_model" \
--header 'Content-Type: application/json' \
--data '{
"model_path": "/tmp/testwhisper",
"model_id": "whisper.cpp"
}' 2>&1)
}')

response2=$(
curl -o /tmp/response2.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/v1/audio/transcriptions" \
curl --connect-timeout 60 -o /tmp/response2.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/v1/audio/transcriptions" \
--header 'Access-Control-Allow-Origin: *' \
--form 'file=@"../whisper.cpp/samples/jfk.wav"' \
--form 'model_id="whisper.cpp"' \
--form 'temperature="0.0"' \
--form 'prompt="The transcript is about OpenAI which makes technology like DALL·E, GPT-3, and ChatGPT with the hope of one day building an AGI system that benefits all of humanity. The president is trying to raly people to support the cause."' \
2>&1

)

error_occurred=0
Expand Down
6 changes: 3 additions & 3 deletions .github/scripts/e2e-test-whisper-windows.bat
Original file line number Diff line number Diff line change
Expand Up @@ -47,17 +47,17 @@ rem Wait for a few seconds to let the server start

rem Check if %TEMP%\testwhisper exists, if not, download it
if not exist "%MODEL_PATH%" (
bitsadmin.exe /transfer "DownloadTestModel" %DOWNLOAD_URL% "%MODEL_PATH%"
curl.exe --connect-timeout 300 %DOWNLOAD_URL% --output "%MODEL_PATH%"
)

rem Define JSON strings for curl data
call set "MODEL_PATH_STRING=%%MODEL_PATH:\=\\%%"
set "curl_data1={\"model_path\":\"%MODEL_PATH_STRING%\",\"model_id\":\"whisper\"}"

rem Run the curl commands and capture the status code
curl.exe -o %TEMP%\response1.log -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/v1/audio/load_model" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1_code.log 2>&1
curl.exe --connect-timeout 60 -o %TEMP%\response1.log -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/v1/audio/load_model" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1_code.log 2>&1

curl -o %TEMP%\response2.log -s -w "%%{http_code}" --location "http://localhost:%PORT%/v1/audio/transcriptions" ^
curl --connect-timeout 60 -o %TEMP%\response2.log -s -w "%%{http_code}" --location "http://localhost:%PORT%/v1/audio/transcriptions" ^
--form "file=@../..//whisper.cpp/samples/jfk.wav" ^
--form "model_id=whisper" > %TEMP%\response2_code.log 2>&1

Expand Down
142 changes: 29 additions & 113 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ on:

env:
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
LLM_MODEL_URL: https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q2_K.gguf
WHISPER_MODEL_URL: https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny-q5_1.bin
LLM_MODEL_URL: https://delta.jan.ai/tinyllama-1.1b-chat-v0.3.Q2_K.gguf
WHISPER_MODEL_URL: https://delta.jan.ai/ggml-tiny-q5_1.bin

jobs:
create-draft-release:
Expand Down Expand Up @@ -125,6 +125,7 @@ jobs:
runs-on: ubuntu-18-04-cuda-11-7
needs: [create-draft-release, set-nitro-version]
if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.set-nitro-version.result == 'success'
timeout-minutes: 20
permissions:
contents: write
steps:
Expand Down Expand Up @@ -188,6 +189,7 @@ jobs:
runs-on: ubuntu-18-04-cuda-11-7
needs: [create-draft-release, set-nitro-version]
if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.set-nitro-version.result == 'success'
timeout-minutes: 20
permissions:
contents: write
steps:
Expand Down Expand Up @@ -231,22 +233,6 @@ jobs:
name: nitro-linux-amd64-vulkan
path: ./nitro

# - name: Run e2e testing - LLama.CPP
# shell: bash
# run: |
# # run e2e testing
# cd nitro
# chmod +x ../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro ${{ env.LLM_MODEL_URL }}
# rm -rf uploads/

# - name: Run e2e testing - Whisper.CPP
# shell: bash
# run: |
# # run e2e testing
# cd nitro
# chmod +x ../.github/scripts/e2e-test-whisper-linux-and-mac.sh && ../.github/scripts/e2e-test-whisper-linux-and-mac.sh ./nitro ${{ env.WHISPER_MODEL_URL }}
# rm -rf uploads/

- uses: actions/upload-release-asset@v1.0.1
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
env:
Expand All @@ -261,6 +247,7 @@ jobs:
runs-on: ubuntu-18-04-cuda-${{ matrix.cuda }}
needs: [create-draft-release, set-nitro-version]
if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.set-nitro-version.result == 'success'
timeout-minutes: 20
permissions:
contents: write
strategy:
Expand Down Expand Up @@ -297,21 +284,21 @@ jobs:
name: nitro-linux-amd64-cuda-${{ matrix.cuda }}
path: ./nitro

- name: Run e2e testing - LLama.CPP
shell: bash
run: |
# run e2e testing
cd nitro
chmod +x ../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro ${{ env.LLM_MODEL_URL }}
rm -rf uploads/
# - name: Run e2e testing - LLama.CPP
# shell: bash
# run: |
# # run e2e testing
# cd nitro
# chmod +x ../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro ${{ env.LLM_MODEL_URL }}
# rm -rf uploads/

- name: Run e2e testing - Whisper.CPP
shell: bash
run: |
# run e2e testing
cd nitro
chmod +x ../.github/scripts/e2e-test-whisper-linux-and-mac.sh && ../.github/scripts/e2e-test-whisper-linux-and-mac.sh ./nitro ${{ env.WHISPER_MODEL_URL }}
rm -rf uploads/
# - name: Run e2e testing - Whisper.CPP
# shell: bash
# run: |
# # run e2e testing
# cd nitro
# chmod +x ../.github/scripts/e2e-test-whisper-linux-and-mac.sh && ../.github/scripts/e2e-test-whisper-linux-and-mac.sh ./nitro ${{ env.WHISPER_MODEL_URL }}
# rm -rf uploads/

- uses: actions/upload-release-asset@v1.0.1
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
Expand All @@ -327,6 +314,7 @@ jobs:
runs-on: mac-silicon
needs: [create-draft-release, set-nitro-version]
if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.set-nitro-version.result == 'success'
timeout-minutes: 20
permissions:
contents: write
steps:
Expand Down Expand Up @@ -405,6 +393,7 @@ jobs:
runs-on: macos-latest
needs: [create-draft-release, set-nitro-version]
if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.set-nitro-version.result == 'success'
timeout-minutes: 20
permissions:
contents: write
steps:
Expand Down Expand Up @@ -470,86 +459,11 @@ jobs:
asset_name: nitro-${{ needs.create-draft-release.outputs.version }}-mac-amd64.tar.gz
asset_content_type: application/gzip

# macOS-amd64-vulkan-build:
# runs-on: macos-latest
# needs: [create-draft-release, set-nitro-version]
# if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.set-nitro-version.result == 'success'
# permissions:
# contents: write
# steps:
# - name: Clone
# id: checkout
# uses: actions/checkout@v3
# with:
# submodules: recursive

# - name: Dependencies
# id: depends
# continue-on-error: true
# run: |
# brew update
# brew install sdl2

# - name: Prepare Vulkan SDK
# uses: humbletim/setup-vulkan-sdk@v1.2.0
# with:
# vulkan-query-version: 1.3.204.0
# vulkan-components: Vulkan-Headers, Vulkan-Loader
# vulkan-use-cache: true

# - name: Build
# id: cmake_build
# run: |
# ./install_deps.sh
# mkdir build && cd build
# cmake -DNITRO_VERSION=${{ needs.set-nitro-version.outputs.version }} -DLLAMA_VULKAN=ON -DLLAMA_METAL=OFF ..
# CC=gcc-8 make -j $(sysctl -n hw.ncp)
# ls -la

# - name: Package
# shell: bash
# run: |
# mkdir -p nitro
# cp build/nitro nitro/
# tar -czvf nitro.tar.gz nitro

# - name: Upload Artifact
# uses: actions/upload-artifact@v2
# if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || github.event_name == 'pull_request'
# with:
# name: nitro-mac-amd64
# path: ./nitro

# - name: Run e2e testing - LLama.CPP
# shell: bash
# run: |
# # run e2e testing
# cd nitro
# chmod +x ../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro ${{ env.LLM_MODEL_URL }}
# rm -rf uploads/

# - name: Run e2e testing - Whisper.CPP
# shell: bash
# run: |
# # run e2e testing
# cd nitro
# chmod +x ../.github/scripts/e2e-test-whisper-linux-and-mac.sh && ../.github/scripts/e2e-test-whisper-linux-and-mac.sh ./nitro ${{ env.WHISPER_MODEL_URL }}
# rm -rf uploads/

# - uses: actions/upload-release-asset@v1.0.1
# if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
# env:
# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# with:
# upload_url: ${{ needs.create-draft-release.outputs.upload_url }}
# asset_path: ./nitro.tar.gz
# asset_name: nitro-${{ needs.create-draft-release.outputs.version }}-mac-amd64-vulkan.tar.gz
# asset_content_type: application/gzip

windows-amd64-build:
runs-on: windows-latest
needs: [create-draft-release, set-nitro-version]
if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.set-nitro-version.result == 'success'
timeout-minutes: 20
permissions:
contents: write

Expand Down Expand Up @@ -643,6 +557,7 @@ jobs:
runs-on: windows-latest
needs: [create-draft-release, set-nitro-version]
if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.set-nitro-version.result == 'success'
timeout-minutes: 20
permissions:
contents: write

Expand Down Expand Up @@ -729,6 +644,7 @@ jobs:
runs-on: windows-cuda-${{ matrix.cuda }}
needs: [create-draft-release, set-nitro-version]
if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.set-nitro-version.result == 'success'
timeout-minutes: 20
permissions:
contents: write

Expand Down Expand Up @@ -822,6 +738,7 @@ jobs:

update_release_draft:
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
timeout-minutes: 20
needs:
[
ubuntu-amd64-build,
Expand All @@ -830,7 +747,6 @@ jobs:
macOS-amd64-build,
windows-amd64-build,
windows-amd64-cuda-build,
# macOS-amd64-vulkan-build,
ubuntu-amd64-vulkan-build,
windows-amd64-vulkan-build,
]
Expand All @@ -844,7 +760,8 @@ jobs:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

noti-discord-nightly:
if: always() && github.event_name == 'schedule' && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.ubuntu-amd64-build.result == 'success' && needs.ubuntu-amd64-cuda-build.result == 'success' && needs.macOS-silicon-build.result == 'success' && needs.macOS-amd64-build.result == 'success' && needs.windows-amd64-build.result == 'success' && needs.windows-amd64-cuda-build.result == 'success'
timeout-minutes: 20
if: github.event_name == 'schedule' && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.ubuntu-amd64-build.result == 'success' && needs.ubuntu-amd64-cuda-build.result == 'success' && needs.macOS-silicon-build.result == 'success' && needs.macOS-amd64-build.result == 'success' && needs.windows-amd64-build.result == 'success' && needs.windows-amd64-cuda-build.result == 'success'
needs:
[
create-draft-release,
Expand All @@ -854,7 +771,6 @@ jobs:
macOS-amd64-build,
windows-amd64-build,
windows-amd64-cuda-build,
# macOS-amd64-vulkan-build,
ubuntu-amd64-vulkan-build,
windows-amd64-vulkan-build,
]
Expand Down Expand Up @@ -883,7 +799,8 @@ jobs:
GITHUB_RUN_ID: ${{ github.run_id }}

noti-discord-manual:
if: always() && github.event_name == 'workflow_dispatch' && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.ubuntu-amd64-build.result == 'success' && needs.ubuntu-amd64-cuda-build.result == 'success' && needs.macOS-silicon-build.result == 'success' && needs.macOS-amd64-build.result == 'success' && needs.windows-amd64-build.result == 'success' && needs.windows-amd64-cuda-build.result == 'success'
timeout-minutes: 20
if: github.event_name == 'workflow_dispatch' && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.ubuntu-amd64-build.result == 'success' && needs.ubuntu-amd64-cuda-build.result == 'success' && needs.macOS-silicon-build.result == 'success' && needs.macOS-amd64-build.result == 'success' && needs.windows-amd64-build.result == 'success' && needs.windows-amd64-cuda-build.result == 'success'
needs:
[
create-draft-release,
Expand All @@ -893,7 +810,6 @@ jobs:
macOS-amd64-build,
windows-amd64-build,
windows-amd64-cuda-build,
# macOS-amd64-vulkan-build,
ubuntu-amd64-vulkan-build,
windows-amd64-vulkan-build,
]
Expand Down