From 30687eee3fe202c8e148466db498fe9e4c0e90ab Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Tue, 16 Apr 2024 09:47:52 +0700 Subject: [PATCH 1/6] feat: e2e embedding endpoint testing for linux and mac --- .../e2e-test-embedding-linux-and-mac.sh | 104 ++++++++++++++++++ .github/workflows/build.yml | 26 +++++ 2 files changed, 130 insertions(+) create mode 100644 .github/scripts/e2e-test-embedding-linux-and-mac.sh diff --git a/.github/scripts/e2e-test-embedding-linux-and-mac.sh b/.github/scripts/e2e-test-embedding-linux-and-mac.sh new file mode 100644 index 000000000..f62cfca39 --- /dev/null +++ b/.github/scripts/e2e-test-embedding-linux-and-mac.sh @@ -0,0 +1,104 @@ +#!/bin/bash + +## Example run command +# ./linux-and-mac.sh './jan/plugins/@janhq/inference-plugin/dist/nitro/nitro_mac_arm64' https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q2_K.gguf + +# Check for required arguments +if [[ $# -ne 2 ]]; then + echo "Usage: $0 " + exit 1 +fi + +rm /tmp/response1.log /tmp/response2.log /tmp/nitro.log + +BINARY_PATH=$1 +DOWNLOAD_URL=$2 + +# Random port to ensure it's not used +min=10000 +max=11000 +range=$((max - min + 1)) +PORT=$((RANDOM % range + min)) + +# Start the binary file +"$BINARY_PATH" 1 127.0.0.1 $PORT >/tmp/nitro.log & + +# Get the process id of the binary file +pid=$! + +if ! ps -p $pid >/dev/null; then + echo "nitro failed to start. Logs:" + cat /tmp/nitro.log + exit 1 +fi + +# Wait for a few seconds to let the server start +sleep 5 + +# Check if /tmp/test-embedding exists, if not, download it +if [[ ! -f "/tmp/test-embedding" ]]; then + curl --connect-timeout 300 $DOWNLOAD_URL --output /tmp/test-embedding +fi + +# Run the curl commands +response1=$(curl --connect-timeout 60 -o /tmp/response1.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/llamacpp/loadModel" \ + --header 'Content-Type: application/json' \ + --data '{ + "llama_model_path": "/tmp/test-embedding", + "ctx_len": 50, + "ngl": 32, + "embedding": true, + "model_type": "embedding" +}') + +if ! ps -p $pid >/dev/null; then + echo "nitro failed to load model. Logs:" + cat /tmp/nitro.log + exit 1 +fi + +response2=$( + curl --connect-timeout 60 -o /tmp/response2.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/v1/embeddings" \ + --header 'Content-Type: application/json' \ + --header 'Accept: text/event-stream' \ + --header 'Access-Control-Allow-Origin: *' \ + --data '{ + "input": "Hello", + "model": "test-embedding", + "encoding_format": "float" + }' +) + +error_occurred=0 +if [[ "$response1" -ne 200 ]]; then + echo "The first curl command failed with status code: $response1" + cat /tmp/response1.log + error_occurred=1 +fi + +if [[ "$response2" -ne 200 ]]; then + echo "The second curl command failed with status code: $response2" + cat /tmp/response2.log + error_occurred=1 +fi + +if [[ "$error_occurred" -eq 1 ]]; then + echo "Nitro test run failed!!!!!!!!!!!!!!!!!!!!!!" + echo "Nitro Error Logs:" + cat /tmp/nitro.log + kill $pid + exit 1 +fi + +echo "----------------------" +echo "Log load model:" +cat /tmp/response1.log + +echo "----------------------" +echo "Log run test:" +cat /tmp/response2.log + +echo "Nitro test run successfully!" + +# Kill the server process +kill $pid diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 5d6f44115..c342c1a58 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -49,6 +49,7 @@ env: BRANCH_NAME: ${{ github.head_ref || github.ref_name }} LLM_MODEL_URL: https://delta.jan.ai/tinyllama-1.1b-chat-v0.3.Q2_K.gguf WHISPER_MODEL_URL: https://delta.jan.ai/ggml-tiny-q5_1.bin + EMBEDDING_MODEL_URL: https://catalog.jan.ai/dist/models/embeds/nomic-embed-text-v1.5.f16.gguf jobs: create-draft-release: @@ -188,6 +189,15 @@ jobs: cd nitro chmod +x ../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro ${{ env.LLM_MODEL_URL }} rm -rf uploads/ + + - name: Run e2e testing - Embedding + shell: bash + if: ${{ matrix.build != 'arm64' && matrix.build != 'amd64-vulkan' && matrix.build != 'amd64-avx512' }} + run: | + # run e2e testing + cd nitro + chmod +x ../.github/scripts/e2e-test-embedding-linux-and-mac.sh && ../.github/scripts/e2e-test-embedding-linux-and-mac.sh ./nitro ${{ env.EMBEDDING_MODEL_URL }} + rm -rf uploads/ - name: Run e2e testing - Whisper.CPP shell: bash @@ -309,6 +319,14 @@ jobs: cd nitro/ chmod +x ../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro ${{ env.LLM_MODEL_URL }} rm -rf uploads/ + + - name: Run e2e testing - Embedding + shell: bash + run: | + # run e2e testing + cd nitro/ + chmod +x ../.github/scripts/e2e-test-embedding-linux-and-mac.sh && ../.github/scripts/e2e-test-embedding-linux-and-mac.sh ./nitro ${{ env.EMBEDDING_MODEL_URL }} + rm -rf uploads/ - name: Run e2e testing - Whisper.CPP shell: bash @@ -375,6 +393,14 @@ jobs: cd nitro chmod +x ../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro ${{ env.LLM_MODEL_URL }} rm -rf uploads/ + + - name: Run e2e testing - Embedding + shell: bash + run: | + # run e2e testing + cd nitro + chmod +x ../.github/scripts/e2e-test-embedding-linux-and-mac.sh && ../.github/scripts/e2e-test-embedding-linux-and-mac.sh ./nitro ${{ env.EMBEDDING_MODEL_URL }} + rm -rf uploads/ - name: Run e2e testing - Whisper.CPP shell: bash From 1307204b432d02310ee6fe5d0f5a1e17e0e5b436 Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Tue, 16 Apr 2024 10:24:05 +0700 Subject: [PATCH 2/6] feat: e2e embedding endpoint testing for windows --- .../scripts/e2e-test-embedding-windows.bat | 110 ++++++++++++++++++ .github/workflows/build.yml | 8 ++ 2 files changed, 118 insertions(+) create mode 100644 .github/scripts/e2e-test-embedding-windows.bat diff --git a/.github/scripts/e2e-test-embedding-windows.bat b/.github/scripts/e2e-test-embedding-windows.bat new file mode 100644 index 000000000..9358230b1 --- /dev/null +++ b/.github/scripts/e2e-test-embedding-windows.bat @@ -0,0 +1,110 @@ +@echo off + +set "TEMP=C:\Users\%UserName%\AppData\Local\Temp" +set "MODEL_PATH=%TEMP%\test-embedding" + +rem Check for required arguments +if "%~2"=="" ( + echo Usage: %~0 ^ ^ + exit /b 1 +) + +set "BINARY_PATH=%~1" +set "DOWNLOAD_URL=%~2" + +for %%i in ("%BINARY_PATH%") do set "BINARY_NAME=%%~nxi" + +echo BINARY_NAME=%BINARY_NAME% + +del %TEMP%\response1.log 2>nul +del %TEMP%\response2.log 2>nul +del %TEMP%\nitro.log 2>nul + +set /a min=9999 +set /a max=11000 +set /a range=max-min+1 +set /a PORT=%min% + %RANDOM% %% %range% + +rem Start the binary file +start /B "" "%BINARY_PATH%" 1 "127.0.0.1" %PORT% > %TEMP%\nitro.log 2>&1 + +ping -n 6 127.0.0.1 %PORT% > nul + +rem Capture the PID of the started process with "nitro" in its name +for /f "tokens=2" %%a in ('tasklist /fi "imagename eq %BINARY_NAME%" /fo list ^| findstr /B "PID:"') do ( + set "pid=%%a" +) + +echo pid=%pid% + +if not defined pid ( + echo nitro failed to start. Logs: + type %TEMP%\nitro.log + exit /b 1 +) + +rem Wait for a few seconds to let the server start + +rem Check if %TEMP%\testmodel exists, if not, download it +if not exist "%MODEL_PATH%" ( + curl.exe --connect-timeout 300 %DOWNLOAD_URL% --output "%MODEL_PATH%" +) + +rem Define JSON strings for curl data +call set "MODEL_PATH_STRING=%%MODEL_PATH:\=\\%%" +set "curl_data1={\"llama_model_path\":\"%MODEL_PATH_STRING%\", \"embedding\": true, \"model_type\": \"embedding\"}" +set "curl_data2={\"input\": \"Hello\", \"model\": \"test-embedding\", \"encoding_format\": \"float\"}" + +rem Print the values of curl_data1 and curl_data2 for debugging +echo curl_data1=%curl_data1% +echo curl_data2=%curl_data2% + +rem Run the curl commands and capture the status code +curl.exe --connect-timeout 60 -o "%TEMP%\response1.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/loadModel" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1.log 2>&1 + +curl.exe --connect-timeout 60 -o "%TEMP%\response2.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/v1/embeddings" ^ +--header "Content-Type: application/json" ^ +--header "Accept: text/event-stream" ^ +--header "Access-Control-Allow-Origin: *" ^ +--data "%curl_data2%" > %TEMP%\response2.log 2>&1 + +set "error_occurred=0" + +rem Read the status codes from the log files +for /f %%a in (%TEMP%\response1.log) do set "response1=%%a" +for /f %%a in (%TEMP%\response2.log) do set "response2=%%a" + +if "%response1%" neq "200" ( + echo The first curl command failed with status code: %response1% + type %TEMP%\response1.log + set "error_occurred=1" +) + +if "%response2%" neq "200" ( + echo The second curl command failed with status code: %response2% + type %TEMP%\response2.log + set "error_occurred=1" +) + +if "%error_occurred%"=="1" ( + echo Nitro test run failed!!!!!!!!!!!!!!!!!!!!!! + echo Nitro Error Logs: + type %TEMP%\nitro.log + taskkill /f /pid %pid% + exit /b 1 +) + + +echo ---------------------- +echo Log load model: +type %TEMP%\response1.log + +echo ---------------------- +echo "Log run test:" +type %TEMP%\response2.log + +echo Nitro test run successfully! + +rem Kill the server process +@REM taskkill /f /pid %pid% +taskkill /f /im nitro.exe 2>nul || exit /B 0 \ No newline at end of file diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c342c1a58..bcb6095ac 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -548,6 +548,14 @@ jobs: ..\..\.github\scripts\e2e-test-llama-windows.bat nitro.exe ${{ env.LLM_MODEL_URL }} rmdir /S /Q .\build\Release\uploads + - name: Run e2e testing - Embedding + shell: cmd + if: ${{ matrix.build != 'arm64' && matrix.build != 'amd64-vulkan' && matrix.build != 'amd64-avx512' }} + run: | + cd build\Release + ..\..\.github\scripts\e2e-test-embedding-windows.bat nitro.exe ${{ env.EMBEDDING_MODEL_URL }} + rmdir /S /Q .\build\Release\uploads + - name: Run e2e testing - Whisper.cpp shell: cmd if: ${{ matrix.build != 'arm64' && matrix.build != 'amd64-vulkan' && matrix.build != 'amd64-avx512' }} From 5e216339c4867eca088ffcb6a7e558dc6b02789d Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Tue, 16 Apr 2024 11:43:10 +0700 Subject: [PATCH 3/6] fix: move e2e embedding linux and mac to llama --- .../e2e-test-embedding-linux-and-mac.sh | 104 ------------------ .../scripts/e2e-test-llama-linux-and-mac.sh | 93 +++++++++++++--- .github/workflows/build.yml | 31 +----- 3 files changed, 83 insertions(+), 145 deletions(-) delete mode 100644 .github/scripts/e2e-test-embedding-linux-and-mac.sh diff --git a/.github/scripts/e2e-test-embedding-linux-and-mac.sh b/.github/scripts/e2e-test-embedding-linux-and-mac.sh deleted file mode 100644 index f62cfca39..000000000 --- a/.github/scripts/e2e-test-embedding-linux-and-mac.sh +++ /dev/null @@ -1,104 +0,0 @@ -#!/bin/bash - -## Example run command -# ./linux-and-mac.sh './jan/plugins/@janhq/inference-plugin/dist/nitro/nitro_mac_arm64' https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q2_K.gguf - -# Check for required arguments -if [[ $# -ne 2 ]]; then - echo "Usage: $0 " - exit 1 -fi - -rm /tmp/response1.log /tmp/response2.log /tmp/nitro.log - -BINARY_PATH=$1 -DOWNLOAD_URL=$2 - -# Random port to ensure it's not used -min=10000 -max=11000 -range=$((max - min + 1)) -PORT=$((RANDOM % range + min)) - -# Start the binary file -"$BINARY_PATH" 1 127.0.0.1 $PORT >/tmp/nitro.log & - -# Get the process id of the binary file -pid=$! - -if ! ps -p $pid >/dev/null; then - echo "nitro failed to start. Logs:" - cat /tmp/nitro.log - exit 1 -fi - -# Wait for a few seconds to let the server start -sleep 5 - -# Check if /tmp/test-embedding exists, if not, download it -if [[ ! -f "/tmp/test-embedding" ]]; then - curl --connect-timeout 300 $DOWNLOAD_URL --output /tmp/test-embedding -fi - -# Run the curl commands -response1=$(curl --connect-timeout 60 -o /tmp/response1.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/llamacpp/loadModel" \ - --header 'Content-Type: application/json' \ - --data '{ - "llama_model_path": "/tmp/test-embedding", - "ctx_len": 50, - "ngl": 32, - "embedding": true, - "model_type": "embedding" -}') - -if ! ps -p $pid >/dev/null; then - echo "nitro failed to load model. Logs:" - cat /tmp/nitro.log - exit 1 -fi - -response2=$( - curl --connect-timeout 60 -o /tmp/response2.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/v1/embeddings" \ - --header 'Content-Type: application/json' \ - --header 'Accept: text/event-stream' \ - --header 'Access-Control-Allow-Origin: *' \ - --data '{ - "input": "Hello", - "model": "test-embedding", - "encoding_format": "float" - }' -) - -error_occurred=0 -if [[ "$response1" -ne 200 ]]; then - echo "The first curl command failed with status code: $response1" - cat /tmp/response1.log - error_occurred=1 -fi - -if [[ "$response2" -ne 200 ]]; then - echo "The second curl command failed with status code: $response2" - cat /tmp/response2.log - error_occurred=1 -fi - -if [[ "$error_occurred" -eq 1 ]]; then - echo "Nitro test run failed!!!!!!!!!!!!!!!!!!!!!!" - echo "Nitro Error Logs:" - cat /tmp/nitro.log - kill $pid - exit 1 -fi - -echo "----------------------" -echo "Log load model:" -cat /tmp/response1.log - -echo "----------------------" -echo "Log run test:" -cat /tmp/response2.log - -echo "Nitro test run successfully!" - -# Kill the server process -kill $pid diff --git a/.github/scripts/e2e-test-llama-linux-and-mac.sh b/.github/scripts/e2e-test-llama-linux-and-mac.sh index e97c51f63..5b7b9771d 100644 --- a/.github/scripts/e2e-test-llama-linux-and-mac.sh +++ b/.github/scripts/e2e-test-llama-linux-and-mac.sh @@ -4,15 +4,16 @@ # ./linux-and-mac.sh './jan/plugins/@janhq/inference-plugin/dist/nitro/nitro_mac_arm64' https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q2_K.gguf # Check for required arguments -if [[ $# -ne 2 ]]; then - echo "Usage: $0 " +if [[ $# -ne 3 ]]; then + echo "Usage: $0 " exit 1 fi -rm /tmp/response1.log /tmp/response2.log /tmp/nitro.log +rm /tmp/load-llm-model-res.log /tmp/completion-res.log /tmp/unload-model-res.log /tmp/load-embedding-model-res.log /tmp/embedding-res.log /tmp/nitro.log BINARY_PATH=$1 -DOWNLOAD_URL=$2 +DOWNLOAD_LLM_URL=$2 +DOWNLOAD_EMBEDDING_URL=$3 # Random port to ensure it's not used min=10000 @@ -37,11 +38,16 @@ sleep 5 # Check if /tmp/testllm exists, if not, download it if [[ ! -f "/tmp/testllm" ]]; then - curl --connect-timeout 300 $DOWNLOAD_URL --output /tmp/testllm + curl --connect-timeout 300 $DOWNLOAD_LLM_URL --output /tmp/testllm +fi + +# Check if /tmp/test-embedding exists, if not, download it +if [[ ! -f "/tmp/test-embedding" ]]; then + curl --connect-timeout 300 $DOWNLOAD_EMBEDDING_URL --output /tmp/test-embedding fi # Run the curl commands -response1=$(curl --connect-timeout 60 -o /tmp/response1.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/llamacpp/loadModel" \ +response1=$(curl --connect-timeout 60 -o /tmp/load-llm-model-res.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/llamacpp/loadModel" \ --header 'Content-Type: application/json' \ --data '{ "llama_model_path": "/tmp/testllm", @@ -57,7 +63,7 @@ if ! ps -p $pid >/dev/null; then fi response2=$( - curl --connect-timeout 60 -o /tmp/response2.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/v1/chat/completions" \ + curl --connect-timeout 60 -o /tmp/completion-res.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/v1/chat/completions" \ --header 'Content-Type: application/json' \ --header 'Accept: text/event-stream' \ --header 'Access-Control-Allow-Origin: *' \ @@ -76,16 +82,65 @@ response2=$( }' ) +# unload model +response3=$(curl --connect-timeout 60 -o /tmp/unload-model-res.log --request GET -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/llamacpp/unloadModel" \ + --header 'Content-Type: application/json' \ + --data '{ + "llama_model_path": "/tmp/testllm" +}') + +# load embedding model +response4=$(curl --connect-timeout 60 -o /tmp/load-embedding-model-res.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/llamacpp/loadModel" \ + --header 'Content-Type: application/json' \ + --data '{ + "llama_model_path": "/tmp/test-embedding", + "ctx_len": 50, + "ngl": 32, + "embedding": true, + "model_type": "embedding" +}') + +# request embedding +response5=$( + curl --connect-timeout 60 -o /tmp/embedding-res.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/v1/embeddings" \ + --header 'Content-Type: application/json' \ + --header 'Accept: text/event-stream' \ + --header 'Access-Control-Allow-Origin: *' \ + --data '{ + "input": "Hello", + "model": "test-embedding", + "encoding_format": "float" + }' +) + error_occurred=0 if [[ "$response1" -ne 200 ]]; then - echo "The first curl command failed with status code: $response1" - cat /tmp/response1.log + echo "The load llm model curl command failed with status code: $response1" + cat /tmp/load-llm-model-res.log error_occurred=1 fi if [[ "$response2" -ne 200 ]]; then - echo "The second curl command failed with status code: $response2" - cat /tmp/response2.log + echo "The completion curl command failed with status code: $response2" + cat /tmp/completion-res.log + error_occurred=1 +fi + +if [[ "$response3" -ne 200 ]]; then + echo "The unload model curl command failed with status code: $response3" + cat /tmp/unload-model-res.log + error_occurred=1 +fi + +if [[ "$response4" -ne 200 ]]; then + echo "The load embedding model curl command failed with status code: $response4" + cat /tmp/load-embedding-model-res.log + error_occurred=1 +fi + +if [[ "$response5" -ne 200 ]]; then + echo "The embedding curl command failed with status code: $response5" + cat /tmp/embedding-res.log error_occurred=1 fi @@ -99,11 +154,23 @@ fi echo "----------------------" echo "Log load model:" -cat /tmp/response1.log +cat /tmp/load-llm-model-res.log + +echo "----------------------" +echo "Log run test:" +cat /tmp/completion-res.log + +echo "----------------------" +echo "Log run test:" +cat /tmp/unload-model-res.log + +echo "----------------------" +echo "Log run test:" +cat /tmp/load-embedding-model-res.log echo "----------------------" echo "Log run test:" -cat /tmp/response2.log +cat /tmp/embedding-res.log echo "Nitro test run successfully!" diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index bcb6095ac..e1b57b4c1 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -187,16 +187,7 @@ jobs: run: | # run e2e testing cd nitro - chmod +x ../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro ${{ env.LLM_MODEL_URL }} - rm -rf uploads/ - - - name: Run e2e testing - Embedding - shell: bash - if: ${{ matrix.build != 'arm64' && matrix.build != 'amd64-vulkan' && matrix.build != 'amd64-avx512' }} - run: | - # run e2e testing - cd nitro - chmod +x ../.github/scripts/e2e-test-embedding-linux-and-mac.sh && ../.github/scripts/e2e-test-embedding-linux-and-mac.sh ./nitro ${{ env.EMBEDDING_MODEL_URL }} + chmod +x ../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro ${{ env.LLM_MODEL_URL }} ${{ env.EMBEDDING_MODEL_URL }} rm -rf uploads/ - name: Run e2e testing - Whisper.CPP @@ -317,15 +308,7 @@ jobs: run: | # run e2e testing cd nitro/ - chmod +x ../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro ${{ env.LLM_MODEL_URL }} - rm -rf uploads/ - - - name: Run e2e testing - Embedding - shell: bash - run: | - # run e2e testing - cd nitro/ - chmod +x ../.github/scripts/e2e-test-embedding-linux-and-mac.sh && ../.github/scripts/e2e-test-embedding-linux-and-mac.sh ./nitro ${{ env.EMBEDDING_MODEL_URL }} + chmod +x ../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro ${{ env.LLM_MODEL_URL }} ${{ env.EMBEDDING_MODEL_URL }} rm -rf uploads/ - name: Run e2e testing - Whisper.CPP @@ -391,15 +374,7 @@ jobs: run: | # run e2e testing cd nitro - chmod +x ../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro ${{ env.LLM_MODEL_URL }} - rm -rf uploads/ - - - name: Run e2e testing - Embedding - shell: bash - run: | - # run e2e testing - cd nitro - chmod +x ../.github/scripts/e2e-test-embedding-linux-and-mac.sh && ../.github/scripts/e2e-test-embedding-linux-and-mac.sh ./nitro ${{ env.EMBEDDING_MODEL_URL }} + chmod +x ../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro ${{ env.LLM_MODEL_URL }} ${{ env.EMBEDDING_MODEL_URL }} rm -rf uploads/ - name: Run e2e testing - Whisper.CPP From af83b1461fc68df5cb457866cb88d536b3ef50b6 Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Tue, 16 Apr 2024 13:40:43 +0700 Subject: [PATCH 4/6] fix: move e2e ebedding windows to llama --- .github/scripts/e2e-test-llama-windows.bat | 84 ++++++++++++++++++---- 1 file changed, 71 insertions(+), 13 deletions(-) diff --git a/.github/scripts/e2e-test-llama-windows.bat b/.github/scripts/e2e-test-llama-windows.bat index a6526f358..9ceb9e5c5 100644 --- a/.github/scripts/e2e-test-llama-windows.bat +++ b/.github/scripts/e2e-test-llama-windows.bat @@ -1,16 +1,18 @@ @echo off set "TEMP=C:\Users\%UserName%\AppData\Local\Temp" -set "MODEL_PATH=%TEMP%\testllm" +set "MODEL_LLM_PATH=%TEMP%\testllm" +set "MODEL_EMBEDDING_PATH=%TEMP%\test-embedding" rem Check for required arguments -if "%~2"=="" ( - echo Usage: %~0 ^ ^ +if "%~3"=="" ( + echo Usage: %~0 ^ ^ ^ exit /b 1 ) set "BINARY_PATH=%~1" -set "DOWNLOAD_URL=%~2" +set "DOWNLOAD_LLM_URL=%~2" +set "DOWNLOAD_EMBEDDING_URL=%~3" for %%i in ("%BINARY_PATH%") do set "BINARY_NAME=%%~nxi" @@ -18,6 +20,9 @@ echo BINARY_NAME=%BINARY_NAME% del %TEMP%\response1.log 2>nul del %TEMP%\response2.log 2>nul +del %TEMP%\response3.log 2>nul +del %TEMP%\response4.log 2>nul +del %TEMP%\response5.log 2>nul del %TEMP%\nitro.log 2>nul set /a min=9999 @@ -46,33 +51,56 @@ if not defined pid ( rem Wait for a few seconds to let the server start rem Check if %TEMP%\testmodel exists, if not, download it -if not exist "%MODEL_PATH%" ( - curl.exe --connect-timeout 300 %DOWNLOAD_URL% --output "%MODEL_PATH%" +if not exist "%MODEL_LLM_PATH%" ( + curl.exe --connect-timeout 300 %DOWNLOAD_LLM_URL% --output "%MODEL_LLM_PATH%" +) + +if not exist "%MODEL_EMBEDDING_PATH%" ( + curl.exe --connect-timeout 300 %DOWNLOAD_EMBEDDING_URL% --output "%MODEL_EMBEDDING_PATH%" ) rem Define JSON strings for curl data -call set "MODEL_PATH_STRING=%%MODEL_PATH:\=\\%%" -set "curl_data1={\"llama_model_path\":\"%MODEL_PATH_STRING%\"}" +call set "MODEL_LLM_PATH_STRING=%%MODEL_LLM_PATH:\=\\%%" +call set "MODEL_EMBEDDING_PATH_STRING=%%MODEL_EMBEDDING_PATH:\=\\%%" +set "curl_data1={\"llama_model_path\":\"%MODEL_LLM_PATH_STRING%\"}" set "curl_data2={\"messages\":[{\"content\":\"Hello there\",\"role\":\"assistant\"},{\"content\":\"Write a long and sad story for me\",\"role\":\"user\"}],\"stream\":true,\"model\":\"gpt-3.5-turbo\",\"max_tokens\":50,\"stop\":[\"hello\"],\"frequency_penalty\":0,\"presence_penalty\":0,\"temperature\":0.1}" +set "curl_data3={\"llama_model_path\":\"%MODEL_LLM_PATH_STRING%\"}" +set "curl_data4={\"llama_model_path\":\"%MODEL_EMBEDDING_PATH_STRING%\", \"embedding\": true, \"model_type\": \"embedding\"}" +set "curl_data5={\"input\": \"Hello\", \"model\": \"test-embedding\", \"encoding_format\": \"float\"}" -rem Print the values of curl_data1 and curl_data2 for debugging +rem Print the values of curl_data for debugging echo curl_data1=%curl_data1% echo curl_data2=%curl_data2% +echo curl_data3=%curl_data3% +echo curl_data4=%curl_data4% +echo curl_data5=%curl_data5% rem Run the curl commands and capture the status code curl.exe --connect-timeout 60 -o "%TEMP%\response1.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/loadModel" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1.log 2>&1 curl.exe --connect-timeout 60 -o "%TEMP%\response2.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/chat_completion" ^ --header "Content-Type: application/json" ^ ---header "Accept: text/event-stream" ^ ---header "Access-Control-Allow-Origin: *" ^ --data "%curl_data2%" > %TEMP%\response2.log 2>&1 +rem give it some time to receive full response +timeout /t 5 + +curl.exe --connect-timeout 60 -o "%TEMP%\response3.log" --request GET -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/unloadModel" --header "Content-Type: application/json" --data "%curl_data3%" > %TEMP%\response3.log 2>&1 + +curl.exe --connect-timeout 60 -o "%TEMP%\response4.log" --request POST -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/loadModel" --header "Content-Type: application/json" --data "%curl_data4%" > %TEMP%\response4.log 2>&1 + +curl.exe --connect-timeout 60 -o "%TEMP%\response5.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/v1/embeddings" ^ +--header "Content-Type: application/json" ^ +--data "%curl_data5%" > %TEMP%\response5.log 2>&1 + set "error_occurred=0" rem Read the status codes from the log files for /f %%a in (%TEMP%\response1.log) do set "response1=%%a" for /f %%a in (%TEMP%\response2.log) do set "response2=%%a" +for /f %%a in (%TEMP%\response3.log) do set "response3=%%a" +for /f %%a in (%TEMP%\response4.log) do set "response4=%%a" +for /f %%a in (%TEMP%\response5.log) do set "response5=%%a" if "%response1%" neq "200" ( echo The first curl command failed with status code: %response1% @@ -86,6 +114,24 @@ if "%response2%" neq "200" ( set "error_occurred=1" ) +if "%response3%" neq "200" ( + echo The third curl command failed with status code: %response3% + type %TEMP%\response3.log + set "error_occurred=1" +) + +if "%response4%" neq "200" ( + echo The fourth curl command failed with status code: %response4% + type %TEMP%\response4.log + set "error_occurred=1" +) + +if "%response5%" neq "200" ( + echo The fifth curl command failed with status code: %response5% + type %TEMP%\response5.log + set "error_occurred=1" +) + if "%error_occurred%"=="1" ( echo Nitro test run failed!!!!!!!!!!!!!!!!!!!!!! echo Nitro Error Logs: @@ -96,13 +142,25 @@ if "%error_occurred%"=="1" ( echo ---------------------- -echo Log load model: +echo Log load llm model: type %TEMP%\response1.log echo ---------------------- -echo "Log run test:" +echo Log run test: type %TEMP%\response2.log +echo ---------------------- +echo Log unload model: +type %TEMP%\response3.log + +echo ---------------------- +echo Log load embedding model: +type %TEMP%\response3.log + +echo ---------------------- +echo Log run embedding test: +type %TEMP%\response5.log + echo Nitro test run successfully! rem Kill the server process From abcbed2debf55a90d42fce59b7967befb23529fd Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Tue, 16 Apr 2024 13:55:42 +0700 Subject: [PATCH 5/6] fix: remove embedding e2e --- .../scripts/e2e-test-embedding-windows.bat | 110 ------------------ .github/workflows/build.yml | 10 +- 2 files changed, 1 insertion(+), 119 deletions(-) delete mode 100644 .github/scripts/e2e-test-embedding-windows.bat diff --git a/.github/scripts/e2e-test-embedding-windows.bat b/.github/scripts/e2e-test-embedding-windows.bat deleted file mode 100644 index 9358230b1..000000000 --- a/.github/scripts/e2e-test-embedding-windows.bat +++ /dev/null @@ -1,110 +0,0 @@ -@echo off - -set "TEMP=C:\Users\%UserName%\AppData\Local\Temp" -set "MODEL_PATH=%TEMP%\test-embedding" - -rem Check for required arguments -if "%~2"=="" ( - echo Usage: %~0 ^ ^ - exit /b 1 -) - -set "BINARY_PATH=%~1" -set "DOWNLOAD_URL=%~2" - -for %%i in ("%BINARY_PATH%") do set "BINARY_NAME=%%~nxi" - -echo BINARY_NAME=%BINARY_NAME% - -del %TEMP%\response1.log 2>nul -del %TEMP%\response2.log 2>nul -del %TEMP%\nitro.log 2>nul - -set /a min=9999 -set /a max=11000 -set /a range=max-min+1 -set /a PORT=%min% + %RANDOM% %% %range% - -rem Start the binary file -start /B "" "%BINARY_PATH%" 1 "127.0.0.1" %PORT% > %TEMP%\nitro.log 2>&1 - -ping -n 6 127.0.0.1 %PORT% > nul - -rem Capture the PID of the started process with "nitro" in its name -for /f "tokens=2" %%a in ('tasklist /fi "imagename eq %BINARY_NAME%" /fo list ^| findstr /B "PID:"') do ( - set "pid=%%a" -) - -echo pid=%pid% - -if not defined pid ( - echo nitro failed to start. Logs: - type %TEMP%\nitro.log - exit /b 1 -) - -rem Wait for a few seconds to let the server start - -rem Check if %TEMP%\testmodel exists, if not, download it -if not exist "%MODEL_PATH%" ( - curl.exe --connect-timeout 300 %DOWNLOAD_URL% --output "%MODEL_PATH%" -) - -rem Define JSON strings for curl data -call set "MODEL_PATH_STRING=%%MODEL_PATH:\=\\%%" -set "curl_data1={\"llama_model_path\":\"%MODEL_PATH_STRING%\", \"embedding\": true, \"model_type\": \"embedding\"}" -set "curl_data2={\"input\": \"Hello\", \"model\": \"test-embedding\", \"encoding_format\": \"float\"}" - -rem Print the values of curl_data1 and curl_data2 for debugging -echo curl_data1=%curl_data1% -echo curl_data2=%curl_data2% - -rem Run the curl commands and capture the status code -curl.exe --connect-timeout 60 -o "%TEMP%\response1.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/loadModel" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1.log 2>&1 - -curl.exe --connect-timeout 60 -o "%TEMP%\response2.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/v1/embeddings" ^ ---header "Content-Type: application/json" ^ ---header "Accept: text/event-stream" ^ ---header "Access-Control-Allow-Origin: *" ^ ---data "%curl_data2%" > %TEMP%\response2.log 2>&1 - -set "error_occurred=0" - -rem Read the status codes from the log files -for /f %%a in (%TEMP%\response1.log) do set "response1=%%a" -for /f %%a in (%TEMP%\response2.log) do set "response2=%%a" - -if "%response1%" neq "200" ( - echo The first curl command failed with status code: %response1% - type %TEMP%\response1.log - set "error_occurred=1" -) - -if "%response2%" neq "200" ( - echo The second curl command failed with status code: %response2% - type %TEMP%\response2.log - set "error_occurred=1" -) - -if "%error_occurred%"=="1" ( - echo Nitro test run failed!!!!!!!!!!!!!!!!!!!!!! - echo Nitro Error Logs: - type %TEMP%\nitro.log - taskkill /f /pid %pid% - exit /b 1 -) - - -echo ---------------------- -echo Log load model: -type %TEMP%\response1.log - -echo ---------------------- -echo "Log run test:" -type %TEMP%\response2.log - -echo Nitro test run successfully! - -rem Kill the server process -@REM taskkill /f /pid %pid% -taskkill /f /im nitro.exe 2>nul || exit /B 0 \ No newline at end of file diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e1b57b4c1..2705ba701 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -520,15 +520,7 @@ jobs: if: ${{ matrix.build != 'arm64' && matrix.build != 'amd64-vulkan' && matrix.build != 'amd64-avx512' }} run: | cd build\Release - ..\..\.github\scripts\e2e-test-llama-windows.bat nitro.exe ${{ env.LLM_MODEL_URL }} - rmdir /S /Q .\build\Release\uploads - - - name: Run e2e testing - Embedding - shell: cmd - if: ${{ matrix.build != 'arm64' && matrix.build != 'amd64-vulkan' && matrix.build != 'amd64-avx512' }} - run: | - cd build\Release - ..\..\.github\scripts\e2e-test-embedding-windows.bat nitro.exe ${{ env.EMBEDDING_MODEL_URL }} + ..\..\.github\scripts\e2e-test-llama-windows.bat nitro.exe ${{ env.LLM_MODEL_URL }} ${{ env.EMBEDDING_MODEL_URL }} rmdir /S /Q .\build\Release\uploads - name: Run e2e testing - Whisper.cpp From bb5a6b1bcad06468bad5b85c3cb5e85ccd18c27e Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Tue, 16 Apr 2024 14:25:45 +0700 Subject: [PATCH 6/6] fix: e2e windows --- .github/scripts/e2e-test-llama-windows.bat | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/scripts/e2e-test-llama-windows.bat b/.github/scripts/e2e-test-llama-windows.bat index 9ceb9e5c5..cddca1e0b 100644 --- a/.github/scripts/e2e-test-llama-windows.bat +++ b/.github/scripts/e2e-test-llama-windows.bat @@ -63,7 +63,7 @@ rem Define JSON strings for curl data call set "MODEL_LLM_PATH_STRING=%%MODEL_LLM_PATH:\=\\%%" call set "MODEL_EMBEDDING_PATH_STRING=%%MODEL_EMBEDDING_PATH:\=\\%%" set "curl_data1={\"llama_model_path\":\"%MODEL_LLM_PATH_STRING%\"}" -set "curl_data2={\"messages\":[{\"content\":\"Hello there\",\"role\":\"assistant\"},{\"content\":\"Write a long and sad story for me\",\"role\":\"user\"}],\"stream\":true,\"model\":\"gpt-3.5-turbo\",\"max_tokens\":50,\"stop\":[\"hello\"],\"frequency_penalty\":0,\"presence_penalty\":0,\"temperature\":0.1}" +set "curl_data2={\"messages\":[{\"content\":\"Hello there\",\"role\":\"assistant\"},{\"content\":\"Write a long and sad story for me\",\"role\":\"user\"}],\"stream\":false,\"model\":\"gpt-3.5-turbo\",\"max_tokens\":50,\"stop\":[\"hello\"],\"frequency_penalty\":0,\"presence_penalty\":0,\"temperature\":0.1}" set "curl_data3={\"llama_model_path\":\"%MODEL_LLM_PATH_STRING%\"}" set "curl_data4={\"llama_model_path\":\"%MODEL_EMBEDDING_PATH_STRING%\", \"embedding\": true, \"model_type\": \"embedding\"}" set "curl_data5={\"input\": \"Hello\", \"model\": \"test-embedding\", \"encoding_format\": \"float\"}" @@ -82,9 +82,6 @@ curl.exe --connect-timeout 60 -o "%TEMP%\response2.log" -s -w "%%{http_code}" -- --header "Content-Type: application/json" ^ --data "%curl_data2%" > %TEMP%\response2.log 2>&1 -rem give it some time to receive full response -timeout /t 5 - curl.exe --connect-timeout 60 -o "%TEMP%\response3.log" --request GET -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/unloadModel" --header "Content-Type: application/json" --data "%curl_data3%" > %TEMP%\response3.log 2>&1 curl.exe --connect-timeout 60 -o "%TEMP%\response4.log" --request POST -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/loadModel" --header "Content-Type: application/json" --data "%curl_data4%" > %TEMP%\response4.log 2>&1