Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 80 additions & 13 deletions .github/scripts/e2e-test-llama-linux-and-mac.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,16 @@
# ./linux-and-mac.sh './jan/plugins/@janhq/inference-plugin/dist/nitro/nitro_mac_arm64' https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q2_K.gguf

# Check for required arguments
if [[ $# -ne 2 ]]; then
echo "Usage: $0 <path_to_binary> <url_to_download>"
if [[ $# -ne 3 ]]; then
echo "Usage: $0 <path_to_binary> <url_to_download_llm> <url_to_download_embedding>"
exit 1
fi

rm /tmp/response1.log /tmp/response2.log /tmp/nitro.log
rm /tmp/load-llm-model-res.log /tmp/completion-res.log /tmp/unload-model-res.log /tmp/load-embedding-model-res.log /tmp/embedding-res.log /tmp/nitro.log

BINARY_PATH=$1
DOWNLOAD_URL=$2
DOWNLOAD_LLM_URL=$2
DOWNLOAD_EMBEDDING_URL=$3

# Random port to ensure it's not used
min=10000
Expand All @@ -37,11 +38,16 @@ sleep 5

# Check if /tmp/testllm exists, if not, download it
if [[ ! -f "/tmp/testllm" ]]; then
curl --connect-timeout 300 $DOWNLOAD_URL --output /tmp/testllm
curl --connect-timeout 300 $DOWNLOAD_LLM_URL --output /tmp/testllm
fi

# Check if /tmp/test-embedding exists, if not, download it
if [[ ! -f "/tmp/test-embedding" ]]; then
curl --connect-timeout 300 $DOWNLOAD_EMBEDDING_URL --output /tmp/test-embedding
fi

# Run the curl commands
response1=$(curl --connect-timeout 60 -o /tmp/response1.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/llamacpp/loadModel" \
response1=$(curl --connect-timeout 60 -o /tmp/load-llm-model-res.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/llamacpp/loadModel" \
--header 'Content-Type: application/json' \
--data '{
"llama_model_path": "/tmp/testllm",
Expand All @@ -57,7 +63,7 @@ if ! ps -p $pid >/dev/null; then
fi

response2=$(
curl --connect-timeout 60 -o /tmp/response2.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/v1/chat/completions" \
curl --connect-timeout 60 -o /tmp/completion-res.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/v1/chat/completions" \
--header 'Content-Type: application/json' \
--header 'Accept: text/event-stream' \
--header 'Access-Control-Allow-Origin: *' \
Expand All @@ -76,16 +82,65 @@ response2=$(
}'
)

# unload model
response3=$(curl --connect-timeout 60 -o /tmp/unload-model-res.log --request GET -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/llamacpp/unloadModel" \
--header 'Content-Type: application/json' \
--data '{
"llama_model_path": "/tmp/testllm"
}')

# load embedding model
response4=$(curl --connect-timeout 60 -o /tmp/load-embedding-model-res.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/llamacpp/loadModel" \
--header 'Content-Type: application/json' \
--data '{
"llama_model_path": "/tmp/test-embedding",
"ctx_len": 50,
"ngl": 32,
"embedding": true,
"model_type": "embedding"
}')

# request embedding
response5=$(
curl --connect-timeout 60 -o /tmp/embedding-res.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/v1/embeddings" \
--header 'Content-Type: application/json' \
--header 'Accept: text/event-stream' \
--header 'Access-Control-Allow-Origin: *' \
--data '{
"input": "Hello",
"model": "test-embedding",
"encoding_format": "float"
}'
)

error_occurred=0
if [[ "$response1" -ne 200 ]]; then
echo "The first curl command failed with status code: $response1"
cat /tmp/response1.log
echo "The load llm model curl command failed with status code: $response1"
cat /tmp/load-llm-model-res.log
error_occurred=1
fi

if [[ "$response2" -ne 200 ]]; then
echo "The second curl command failed with status code: $response2"
cat /tmp/response2.log
echo "The completion curl command failed with status code: $response2"
cat /tmp/completion-res.log
error_occurred=1
fi

if [[ "$response3" -ne 200 ]]; then
echo "The unload model curl command failed with status code: $response3"
cat /tmp/unload-model-res.log
error_occurred=1
fi

if [[ "$response4" -ne 200 ]]; then
echo "The load embedding model curl command failed with status code: $response4"
cat /tmp/load-embedding-model-res.log
error_occurred=1
fi

if [[ "$response5" -ne 200 ]]; then
echo "The embedding curl command failed with status code: $response5"
cat /tmp/embedding-res.log
error_occurred=1
fi

Expand All @@ -99,11 +154,23 @@ fi

echo "----------------------"
echo "Log load model:"
cat /tmp/response1.log
cat /tmp/load-llm-model-res.log

echo "----------------------"
echo "Log run test:"
cat /tmp/completion-res.log

echo "----------------------"
echo "Log run test:"
cat /tmp/unload-model-res.log

echo "----------------------"
echo "Log run test:"
cat /tmp/load-embedding-model-res.log

echo "----------------------"
echo "Log run test:"
cat /tmp/response2.log
cat /tmp/embedding-res.log

echo "Nitro test run successfully!"

Expand Down
85 changes: 70 additions & 15 deletions .github/scripts/e2e-test-llama-windows.bat
Original file line number Diff line number Diff line change
@@ -1,23 +1,28 @@
@echo off

set "TEMP=C:\Users\%UserName%\AppData\Local\Temp"
set "MODEL_PATH=%TEMP%\testllm"
set "MODEL_LLM_PATH=%TEMP%\testllm"
set "MODEL_EMBEDDING_PATH=%TEMP%\test-embedding"

rem Check for required arguments
if "%~2"=="" (
echo Usage: %~0 ^<path_to_binary^> ^<url_to_download^>
if "%~3"=="" (
echo Usage: %~0 ^<path_to_binary^> ^<url_to_download_llm^> ^<url_to_download_embedding^>
exit /b 1
)

set "BINARY_PATH=%~1"
set "DOWNLOAD_URL=%~2"
set "DOWNLOAD_LLM_URL=%~2"
set "DOWNLOAD_EMBEDDING_URL=%~3"

for %%i in ("%BINARY_PATH%") do set "BINARY_NAME=%%~nxi"

echo BINARY_NAME=%BINARY_NAME%

del %TEMP%\response1.log 2>nul
del %TEMP%\response2.log 2>nul
del %TEMP%\response3.log 2>nul
del %TEMP%\response4.log 2>nul
del %TEMP%\response5.log 2>nul
del %TEMP%\nitro.log 2>nul

set /a min=9999
Expand Down Expand Up @@ -46,33 +51,53 @@ if not defined pid (
rem Wait for a few seconds to let the server start

rem Check if %TEMP%\testmodel exists, if not, download it
if not exist "%MODEL_PATH%" (
curl.exe --connect-timeout 300 %DOWNLOAD_URL% --output "%MODEL_PATH%"
if not exist "%MODEL_LLM_PATH%" (
curl.exe --connect-timeout 300 %DOWNLOAD_LLM_URL% --output "%MODEL_LLM_PATH%"
)

rem Define JSON strings for curl data
call set "MODEL_PATH_STRING=%%MODEL_PATH:\=\\%%"
set "curl_data1={\"llama_model_path\":\"%MODEL_PATH_STRING%\"}"
set "curl_data2={\"messages\":[{\"content\":\"Hello there\",\"role\":\"assistant\"},{\"content\":\"Write a long and sad story for me\",\"role\":\"user\"}],\"stream\":true,\"model\":\"gpt-3.5-turbo\",\"max_tokens\":50,\"stop\":[\"hello\"],\"frequency_penalty\":0,\"presence_penalty\":0,\"temperature\":0.1}"
if not exist "%MODEL_EMBEDDING_PATH%" (
curl.exe --connect-timeout 300 %DOWNLOAD_EMBEDDING_URL% --output "%MODEL_EMBEDDING_PATH%"
)

rem Print the values of curl_data1 and curl_data2 for debugging
rem Define JSON strings for curl data
call set "MODEL_LLM_PATH_STRING=%%MODEL_LLM_PATH:\=\\%%"
call set "MODEL_EMBEDDING_PATH_STRING=%%MODEL_EMBEDDING_PATH:\=\\%%"
set "curl_data1={\"llama_model_path\":\"%MODEL_LLM_PATH_STRING%\"}"
set "curl_data2={\"messages\":[{\"content\":\"Hello there\",\"role\":\"assistant\"},{\"content\":\"Write a long and sad story for me\",\"role\":\"user\"}],\"stream\":false,\"model\":\"gpt-3.5-turbo\",\"max_tokens\":50,\"stop\":[\"hello\"],\"frequency_penalty\":0,\"presence_penalty\":0,\"temperature\":0.1}"
set "curl_data3={\"llama_model_path\":\"%MODEL_LLM_PATH_STRING%\"}"
set "curl_data4={\"llama_model_path\":\"%MODEL_EMBEDDING_PATH_STRING%\", \"embedding\": true, \"model_type\": \"embedding\"}"
set "curl_data5={\"input\": \"Hello\", \"model\": \"test-embedding\", \"encoding_format\": \"float\"}"

rem Print the values of curl_data for debugging
echo curl_data1=%curl_data1%
echo curl_data2=%curl_data2%
echo curl_data3=%curl_data3%
echo curl_data4=%curl_data4%
echo curl_data5=%curl_data5%

rem Run the curl commands and capture the status code
curl.exe --connect-timeout 60 -o "%TEMP%\response1.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/loadModel" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1.log 2>&1

curl.exe --connect-timeout 60 -o "%TEMP%\response2.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/chat_completion" ^
--header "Content-Type: application/json" ^
--header "Accept: text/event-stream" ^
--header "Access-Control-Allow-Origin: *" ^
--data "%curl_data2%" > %TEMP%\response2.log 2>&1

curl.exe --connect-timeout 60 -o "%TEMP%\response3.log" --request GET -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/unloadModel" --header "Content-Type: application/json" --data "%curl_data3%" > %TEMP%\response3.log 2>&1

curl.exe --connect-timeout 60 -o "%TEMP%\response4.log" --request POST -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/loadModel" --header "Content-Type: application/json" --data "%curl_data4%" > %TEMP%\response4.log 2>&1

curl.exe --connect-timeout 60 -o "%TEMP%\response5.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/v1/embeddings" ^
--header "Content-Type: application/json" ^
--data "%curl_data5%" > %TEMP%\response5.log 2>&1

set "error_occurred=0"

rem Read the status codes from the log files
for /f %%a in (%TEMP%\response1.log) do set "response1=%%a"
for /f %%a in (%TEMP%\response2.log) do set "response2=%%a"
for /f %%a in (%TEMP%\response3.log) do set "response3=%%a"
for /f %%a in (%TEMP%\response4.log) do set "response4=%%a"
for /f %%a in (%TEMP%\response5.log) do set "response5=%%a"

if "%response1%" neq "200" (
echo The first curl command failed with status code: %response1%
Expand All @@ -86,6 +111,24 @@ if "%response2%" neq "200" (
set "error_occurred=1"
)

if "%response3%" neq "200" (
echo The third curl command failed with status code: %response3%
type %TEMP%\response3.log
set "error_occurred=1"
)

if "%response4%" neq "200" (
echo The fourth curl command failed with status code: %response4%
type %TEMP%\response4.log
set "error_occurred=1"
)

if "%response5%" neq "200" (
echo The fifth curl command failed with status code: %response5%
type %TEMP%\response5.log
set "error_occurred=1"
)

if "%error_occurred%"=="1" (
echo Nitro test run failed!!!!!!!!!!!!!!!!!!!!!!
echo Nitro Error Logs:
Expand All @@ -96,13 +139,25 @@ if "%error_occurred%"=="1" (


echo ----------------------
echo Log load model:
echo Log load llm model:
type %TEMP%\response1.log

echo ----------------------
echo "Log run test:"
echo Log run test:
type %TEMP%\response2.log

echo ----------------------
echo Log unload model:
type %TEMP%\response3.log

echo ----------------------
echo Log load embedding model:
type %TEMP%\response3.log

echo ----------------------
echo Log run embedding test:
type %TEMP%\response5.log

echo Nitro test run successfully!

rem Kill the server process
Expand Down
9 changes: 5 additions & 4 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ env:
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
LLM_MODEL_URL: https://delta.jan.ai/tinyllama-1.1b-chat-v0.3.Q2_K.gguf
WHISPER_MODEL_URL: https://delta.jan.ai/ggml-tiny-q5_1.bin
EMBEDDING_MODEL_URL: https://catalog.jan.ai/dist/models/embeds/nomic-embed-text-v1.5.f16.gguf

jobs:
create-draft-release:
Expand Down Expand Up @@ -186,7 +187,7 @@ jobs:
run: |
# run e2e testing
cd nitro
chmod +x ../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro ${{ env.LLM_MODEL_URL }}
chmod +x ../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro ${{ env.LLM_MODEL_URL }} ${{ env.EMBEDDING_MODEL_URL }}
rm -rf uploads/

- name: Run e2e testing - Whisper.CPP
Expand Down Expand Up @@ -307,7 +308,7 @@ jobs:
run: |
# run e2e testing
cd nitro/
chmod +x ../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro ${{ env.LLM_MODEL_URL }}
chmod +x ../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro ${{ env.LLM_MODEL_URL }} ${{ env.EMBEDDING_MODEL_URL }}
rm -rf uploads/

- name: Run e2e testing - Whisper.CPP
Expand Down Expand Up @@ -373,7 +374,7 @@ jobs:
run: |
# run e2e testing
cd nitro
chmod +x ../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro ${{ env.LLM_MODEL_URL }}
chmod +x ../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro ${{ env.LLM_MODEL_URL }} ${{ env.EMBEDDING_MODEL_URL }}
rm -rf uploads/

- name: Run e2e testing - Whisper.CPP
Expand Down Expand Up @@ -519,7 +520,7 @@ jobs:
if: ${{ matrix.build != 'arm64' && matrix.build != 'amd64-vulkan' && matrix.build != 'amd64-avx512' }}
run: |
cd build\Release
..\..\.github\scripts\e2e-test-llama-windows.bat nitro.exe ${{ env.LLM_MODEL_URL }}
..\..\.github\scripts\e2e-test-llama-windows.bat nitro.exe ${{ env.LLM_MODEL_URL }} ${{ env.EMBEDDING_MODEL_URL }}
rmdir /S /Q .\build\Release\uploads

- name: Run e2e testing - Whisper.cpp
Expand Down