Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 101 additions & 0 deletions .github/scripts/e2e-test-linux-and-mac.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
#!/bin/bash

## Example run command
# ./linux-and-mac.sh './jan/plugins/@janhq/inference-plugin/dist/nitro/nitro_mac_arm64' https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q2_K.gguf

# Check for required arguments
if [[ $# -ne 2 ]]; then
echo "Usage: $0 <path_to_binary> <url_to_download>"
exit 1
fi

rm /tmp/response1.log /tmp/response2.log /tmp/nitro.log

BINARY_PATH=$1
DOWNLOAD_URL=$2

# Start the binary file
"$BINARY_PATH" > /tmp/nitro.log 2>&1 &

# Get the process id of the binary file
pid=$!

if ! ps -p $pid > /dev/null; then
echo "nitro failed to start. Logs:"
cat /tmp/nitro.log
exit 1
fi

# Wait for a few seconds to let the server start
sleep 5



# Check if /tmp/testmodel exists, if not, download it
if [[ ! -f "/tmp/testmodel" ]]; then
wget $DOWNLOAD_URL -O /tmp/testmodel
fi

# Run the curl commands
response1=$(curl -o /tmp/response1.log -s -w "%{http_code}" --location 'http://localhost:3928/inferences/llamacpp/loadModel' \
--header 'Content-Type: application/json' \
--data '{
"llama_model_path": "/tmp/testmodel",
"ctx_len": 2048,
"ngl": 32,
"embedding": false
}' 2>&1)

response2=$(curl -o /tmp/response2.log -s -w "%{http_code}" --location 'http://localhost:3928/inferences/llamacpp/chat_completion' \
--header 'Content-Type: application/json' \
--header 'Accept: text/event-stream' \
--header 'Access-Control-Allow-Origin: *' \
--data '{
"messages": [
{"content": "Hello there", "role": "assistant"},
{"content": "Write a long and sad story for me", "role": "user"}
],
"stream": true,
"model": "gpt-3.5-turbo",
"max_tokens": 2048,
"stop": ["hello"],
"frequency_penalty": 0,
"presence_penalty": 0,
"temperature": 0.7
}' 2>&1
)

error_occurred=0
if [[ "$response1" -ne 200 ]]; then
echo "The first curl command failed with status code: $response1"
cat /tmp/response1.log
error_occurred=1
fi

if [[ "$response2" -ne 200 ]]; then
echo "The second curl command failed with status code: $response2"
cat /tmp/response2.log
error_occurred=1
fi

if [[ "$error_occurred" -eq 1 ]]; then
echo "Nitro test run failed!!!!!!!!!!!!!!!!!!!!!!"
echo "Nitro Error Logs:"
cat /tmp/nitro.log
kill $pid
exit 1
fi

echo "----------------------"
echo "Log load model:"
cat /tmp/response1.log

echo "----------------------"
echo "Log run test:"
cat /tmp/response2.log


echo "Nitro test run successfully!"

# Kill the server process
kill $pid
104 changes: 104 additions & 0 deletions .github/scripts/e2e-test-windows.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
@echo off

set "TEMP=C:\Users\%UserName%\AppData\Local\Temp"
set "MODEL_PATH=%TEMP%\testmodel"

rem Check for required arguments
if "%~2"=="" (
echo Usage: %~0 ^<path_to_binary^> ^<url_to_download^>
exit /b 1
)

set "BINARY_PATH=%~1"
set "DOWNLOAD_URL=%~2"

for %%i in ("%BINARY_PATH%") do set "BINARY_NAME=%%~nxi"

echo BINARY_NAME=%BINARY_NAME%

del %TEMP%\response1.log 2>nul
del %TEMP%\response2.log 2>nul
del %TEMP%\nitro.log 2>nul

rem Start the binary file
start /B "" "%BINARY_PATH%" > %TEMP%\nitro.log 2>&1

ping -n 6 127.0.0.1 > nul

rem Capture the PID of the started process with "nitro" in its name
for /f "tokens=2" %%a in ('tasklist /fi "imagename eq %BINARY_NAME%" /fo list ^| findstr /B "PID:"') do (
set "pid=%%a"
)

echo pid=%pid%

if not defined pid (
echo nitro failed to start. Logs:
type %TEMP%\nitro.log
exit /b 1
)

rem Wait for a few seconds to let the server start

rem Check if %TEMP%\testmodel exists, if not, download it
if not exist "%MODEL_PATH%" (
bitsadmin.exe /transfer "DownloadTestModel" %DOWNLOAD_URL% "%MODEL_PATH%"
)

rem Define JSON strings for curl data
call set "MODEL_PATH_STRING=%%MODEL_PATH:\=\\%%"
set "curl_data1={\"llama_model_path\":\"%MODEL_PATH_STRING%\"}"
set "curl_data2={\"messages\":[{\"content\":\"Hello there\",\"role\":\"assistant\"},{\"content\":\"Write a long and sad story for me\",\"role\":\"user\"}],\"stream\":true,\"model\":\"gpt-3.5-turbo\",\"max_tokens\":2048,\"stop\":[\"hello\"],\"frequency_penalty\":0,\"presence_penalty\":0,\"temperature\":0.7}"

rem Print the values of curl_data1 and curl_data2 for debugging
echo curl_data1=%curl_data1%
echo curl_data2=%curl_data2%

rem Run the curl commands and capture the status code
curl.exe -o %TEMP%\response1.log -s -w "%%{http_code}" --location "http://localhost:3928/inferences/llamacpp/loadModel" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1_code.log 2>&1

curl.exe -o %TEMP%\response2.log -s -w "%%{http_code}" --location "http://localhost:3928/inferences/llamacpp/chat_completion" ^
--header "Content-Type: application/json" ^
--header "Accept: text/event-stream" ^
--header "Access-Control-Allow-Origin: *" ^
--data "%curl_data2%" > %TEMP%\response2_code.log 2>&1

set "error_occurred=0"

rem Read the status codes from the log files
for /f %%a in (%TEMP%\response1_code.log) do set "response1=%%a"
for /f %%a in (%TEMP%\response2_code.log) do set "response2=%%a"

if "%response1%" neq "200" (
echo The first curl command failed with status code: %response1%
type %TEMP%\response1.log
set "error_occurred=1"
)

if "%response2%" neq "200" (
echo The second curl command failed with status code: %response2%
type %TEMP%\response2.log
set "error_occurred=1"
)

if "%error_occurred%"=="1" (
echo Nitro test run failed!!!!!!!!!!!!!!!!!!!!!!
echo Nitro Error Logs:
type %TEMP%\nitro.log
taskkill /f /pid %pid%
exit /b 1
)


echo ----------------------
echo Log load model:
type %TEMP%\response1.log

echo ----------------------
echo "Log run test:"
type %TEMP%\response2.log

echo Nitro test run successfully!

rem Kill the server process
taskkill /f /pid %pid%
43 changes: 39 additions & 4 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,14 @@ on:
branches:
- main
tags: ['v*.*.*']
paths: ['.github/workflows/**', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu']
paths: ['.github/scripts/**','.github/workflows/**', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu']
pull_request:
types: [opened, synchronize, reopened]
paths: ['**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu']
paths: ['.github/scripts/**','.github/workflows/**', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu']

env:
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
MODEL_URL: https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q2_K.gguf

jobs:
create-draft-release:
Expand Down Expand Up @@ -72,6 +73,12 @@ jobs:
run: |
mkdir -p nitro
cp build/nitro nitro/

# run e2e testing
cd nitro
chmod +x ../.github/scripts/e2e-test-linux-and-mac.sh && ../.github/scripts/e2e-test-linux-and-mac.sh ./nitro ${{ env.MODEL_URL }}
cd ..

zip -r nitro.zip nitro

- uses: actions/upload-release-asset@v1.0.1
Expand Down Expand Up @@ -117,6 +124,12 @@ jobs:
run: |
mkdir -p nitro
cp build/nitro nitro/

# run e2e testing
cd nitro
chmod +x ../.github/scripts/e2e-test-linux-and-mac.sh && ../.github/scripts/e2e-test-linux-and-mac.sh ./nitro ${{ env.MODEL_URL }}
cd ..

zip -r nitro.zip nitro

- uses: actions/upload-release-asset@v1.0.1
Expand Down Expand Up @@ -164,6 +177,12 @@ jobs:
mkdir -p nitro
cp llama.cpp/ggml-metal.metal nitro/
cp build/nitro nitro/

# run e2e testing
cd nitro
chmod +x ../.github/scripts/e2e-test-linux-and-mac.sh && ../.github/scripts/e2e-test-linux-and-mac.sh ./nitro ${{ env.MODEL_URL }}
cd ..

zip -r nitro.zip nitro

- uses: actions/upload-release-asset@v1.0.1
Expand Down Expand Up @@ -209,6 +228,12 @@ jobs:
run: |
mkdir -p nitro
cp build/nitro nitro/

# run e2e testing
cd nitro
chmod +x ../.github/scripts/e2e-test-linux-and-mac.sh && ../.github/scripts/e2e-test-linux-and-mac.sh ./nitro ${{ env.MODEL_URL }}
cd ..

zip -r nitro.zip nitro

- uses: actions/upload-release-asset@v1.0.1
Expand Down Expand Up @@ -278,6 +303,11 @@ jobs:
robocopy build\bin\Release .\build\Release llama.dll
robocopy ext_libs .\build\Release libcrypto-3-x64.dll
robocopy ext_libs .\build\Release libssl-3-x64.dll

cd .\build\Release
..\..\.github\scripts\e2e-test-windows.bat .\nitro.exe ${{ env.MODEL_URL }}
cd ..\..

7z a nitro.zip .\build\Release\*

- uses: actions/upload-release-asset@v1.0.1
Expand All @@ -287,7 +317,7 @@ jobs:
with:
upload_url: ${{ needs.create-draft-release.outputs.upload_url }}
asset_path: ./nitro.zip
asset_name: nitro-${{ needs.create-draft-release.outputs.version }}-win-amd64-${{ matrix.build }}.zip
asset_name: nitro-${{ needs.create-draft-release.outputs.version }}-win-amd64.zip
asset_content_type: application/zip

windows-amd64-cuda-build:
Expand Down Expand Up @@ -338,6 +368,11 @@ jobs:
robocopy build\bin\Release .\build\Release llama.dll
robocopy ext_libs .\build\Release libcrypto-3-x64.dll
robocopy ext_libs .\build\Release libssl-3-x64.dll

cd .\build\Release
..\..\.github\scripts\e2e-test-windows.bat .\nitro.exe ${{ env.MODEL_URL }}
cd ..\..

7z a nitro.zip .\build\Release\*

- uses: actions/upload-release-asset@v1.0.1
Expand All @@ -347,7 +382,7 @@ jobs:
with:
upload_url: ${{ needs.create-draft-release.outputs.upload_url }}
asset_path: ./nitro.zip
asset_name: nitro-${{ needs.create-draft-release.outputs.version }}-win-amd64-${{ matrix.build }}-cu${{ matrix.cuda }}.zip
asset_name: nitro-${{ needs.create-draft-release.outputs.version }}-win-amd64-cuda.zip
asset_content_type: application/zip

update_release_draft:
Expand Down