Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit 7ebab2e

Browse files
committed
Revert "feat: e2e embedding endpoint scripts (#511)"
This reverts commit d820e06.
1 parent 84ae2aa commit 7ebab2e

File tree

3 files changed

+32
-155
lines changed

3 files changed

+32
-155
lines changed

.github/scripts/e2e-test-llama-linux-and-mac.sh

Lines changed: 13 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,15 @@
44
# ./linux-and-mac.sh './jan/plugins/@janhq/inference-plugin/dist/nitro/nitro_mac_arm64' https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q2_K.gguf
55

66
# Check for required arguments
7-
if [[ $# -ne 3 ]]; then
8-
echo "Usage: $0 <path_to_binary> <url_to_download_llm> <url_to_download_embedding>"
7+
if [[ $# -ne 2 ]]; then
8+
echo "Usage: $0 <path_to_binary> <url_to_download>"
99
exit 1
1010
fi
1111

12-
rm /tmp/load-llm-model-res.log /tmp/completion-res.log /tmp/unload-model-res.log /tmp/load-embedding-model-res.log /tmp/embedding-res.log /tmp/nitro.log
12+
rm /tmp/response1.log /tmp/response2.log /tmp/nitro.log
1313

1414
BINARY_PATH=$1
15-
DOWNLOAD_LLM_URL=$2
16-
DOWNLOAD_EMBEDDING_URL=$3
15+
DOWNLOAD_URL=$2
1716

1817
# Random port to ensure it's not used
1918
min=10000
@@ -38,16 +37,11 @@ sleep 5
3837

3938
# Check if /tmp/testllm exists, if not, download it
4039
if [[ ! -f "/tmp/testllm" ]]; then
41-
curl --connect-timeout 300 $DOWNLOAD_LLM_URL --output /tmp/testllm
42-
fi
43-
44-
# Check if /tmp/test-embedding exists, if not, download it
45-
if [[ ! -f "/tmp/test-embedding" ]]; then
46-
curl --connect-timeout 300 $DOWNLOAD_EMBEDDING_URL --output /tmp/test-embedding
40+
curl --connect-timeout 300 $DOWNLOAD_URL --output /tmp/testllm
4741
fi
4842

4943
# Run the curl commands
50-
response1=$(curl --connect-timeout 60 -o /tmp/load-llm-model-res.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/llamacpp/loadModel" \
44+
response1=$(curl --connect-timeout 60 -o /tmp/response1.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/llamacpp/loadModel" \
5145
--header 'Content-Type: application/json' \
5246
--data '{
5347
"llama_model_path": "/tmp/testllm",
@@ -63,7 +57,7 @@ if ! ps -p $pid >/dev/null; then
6357
fi
6458

6559
response2=$(
66-
curl --connect-timeout 60 -o /tmp/completion-res.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/v1/chat/completions" \
60+
curl --connect-timeout 60 -o /tmp/response2.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/v1/chat/completions" \
6761
--header 'Content-Type: application/json' \
6862
--header 'Accept: text/event-stream' \
6963
--header 'Access-Control-Allow-Origin: *' \
@@ -82,65 +76,16 @@ response2=$(
8276
}'
8377
)
8478

85-
# unload model
86-
response3=$(curl --connect-timeout 60 -o /tmp/unload-model-res.log --request GET -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/llamacpp/unloadModel" \
87-
--header 'Content-Type: application/json' \
88-
--data '{
89-
"llama_model_path": "/tmp/testllm"
90-
}')
91-
92-
# load embedding model
93-
response4=$(curl --connect-timeout 60 -o /tmp/load-embedding-model-res.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/llamacpp/loadModel" \
94-
--header 'Content-Type: application/json' \
95-
--data '{
96-
"llama_model_path": "/tmp/test-embedding",
97-
"ctx_len": 50,
98-
"ngl": 32,
99-
"embedding": true,
100-
"model_type": "embedding"
101-
}')
102-
103-
# request embedding
104-
response5=$(
105-
curl --connect-timeout 60 -o /tmp/embedding-res.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/v1/embeddings" \
106-
--header 'Content-Type: application/json' \
107-
--header 'Accept: text/event-stream' \
108-
--header 'Access-Control-Allow-Origin: *' \
109-
--data '{
110-
"input": "Hello",
111-
"model": "test-embedding",
112-
"encoding_format": "float"
113-
}'
114-
)
115-
11679
error_occurred=0
11780
if [[ "$response1" -ne 200 ]]; then
118-
echo "The load llm model curl command failed with status code: $response1"
119-
cat /tmp/load-llm-model-res.log
81+
echo "The first curl command failed with status code: $response1"
82+
cat /tmp/response1.log
12083
error_occurred=1
12184
fi
12285

12386
if [[ "$response2" -ne 200 ]]; then
124-
echo "The completion curl command failed with status code: $response2"
125-
cat /tmp/completion-res.log
126-
error_occurred=1
127-
fi
128-
129-
if [[ "$response3" -ne 200 ]]; then
130-
echo "The unload model curl command failed with status code: $response3"
131-
cat /tmp/unload-model-res.log
132-
error_occurred=1
133-
fi
134-
135-
if [[ "$response4" -ne 200 ]]; then
136-
echo "The load embedding model curl command failed with status code: $response4"
137-
cat /tmp/load-embedding-model-res.log
138-
error_occurred=1
139-
fi
140-
141-
if [[ "$response5" -ne 200 ]]; then
142-
echo "The embedding curl command failed with status code: $response5"
143-
cat /tmp/embedding-res.log
87+
echo "The second curl command failed with status code: $response2"
88+
cat /tmp/response2.log
14489
error_occurred=1
14590
fi
14691

@@ -154,23 +99,11 @@ fi
15499

155100
echo "----------------------"
156101
echo "Log load model:"
157-
cat /tmp/load-llm-model-res.log
158-
159-
echo "----------------------"
160-
echo "Log run test:"
161-
cat /tmp/completion-res.log
162-
163-
echo "----------------------"
164-
echo "Log run test:"
165-
cat /tmp/unload-model-res.log
166-
167-
echo "----------------------"
168-
echo "Log run test:"
169-
cat /tmp/load-embedding-model-res.log
102+
cat /tmp/response1.log
170103

171104
echo "----------------------"
172105
echo "Log run test:"
173-
cat /tmp/embedding-res.log
106+
cat /tmp/response2.log
174107

175108
echo "Nitro test run successfully!"
176109

.github/scripts/e2e-test-llama-windows.bat

Lines changed: 15 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,23 @@
11
@echo off
22

33
set "TEMP=C:\Users\%UserName%\AppData\Local\Temp"
4-
set "MODEL_LLM_PATH=%TEMP%\testllm"
5-
set "MODEL_EMBEDDING_PATH=%TEMP%\test-embedding"
4+
set "MODEL_PATH=%TEMP%\testllm"
65

76
rem Check for required arguments
8-
if "%~3"=="" (
9-
echo Usage: %~0 ^<path_to_binary^> ^<url_to_download_llm^> ^<url_to_download_embedding^>
7+
if "%~2"=="" (
8+
echo Usage: %~0 ^<path_to_binary^> ^<url_to_download^>
109
exit /b 1
1110
)
1211

1312
set "BINARY_PATH=%~1"
14-
set "DOWNLOAD_LLM_URL=%~2"
15-
set "DOWNLOAD_EMBEDDING_URL=%~3"
13+
set "DOWNLOAD_URL=%~2"
1614

1715
for %%i in ("%BINARY_PATH%") do set "BINARY_NAME=%%~nxi"
1816

1917
echo BINARY_NAME=%BINARY_NAME%
2018

2119
del %TEMP%\response1.log 2>nul
2220
del %TEMP%\response2.log 2>nul
23-
del %TEMP%\response3.log 2>nul
24-
del %TEMP%\response4.log 2>nul
25-
del %TEMP%\response5.log 2>nul
2621
del %TEMP%\nitro.log 2>nul
2722

2823
set /a min=9999
@@ -51,53 +46,33 @@ if not defined pid (
5146
rem Wait for a few seconds to let the server start
5247

5348
rem Check if %TEMP%\testmodel exists, if not, download it
54-
if not exist "%MODEL_LLM_PATH%" (
55-
curl.exe --connect-timeout 300 %DOWNLOAD_LLM_URL% --output "%MODEL_LLM_PATH%"
56-
)
57-
58-
if not exist "%MODEL_EMBEDDING_PATH%" (
59-
curl.exe --connect-timeout 300 %DOWNLOAD_EMBEDDING_URL% --output "%MODEL_EMBEDDING_PATH%"
49+
if not exist "%MODEL_PATH%" (
50+
curl.exe --connect-timeout 300 %DOWNLOAD_URL% --output "%MODEL_PATH%"
6051
)
6152

6253
rem Define JSON strings for curl data
63-
call set "MODEL_LLM_PATH_STRING=%%MODEL_LLM_PATH:\=\\%%"
64-
call set "MODEL_EMBEDDING_PATH_STRING=%%MODEL_EMBEDDING_PATH:\=\\%%"
65-
set "curl_data1={\"llama_model_path\":\"%MODEL_LLM_PATH_STRING%\"}"
66-
set "curl_data2={\"messages\":[{\"content\":\"Hello there\",\"role\":\"assistant\"},{\"content\":\"Write a long and sad story for me\",\"role\":\"user\"}],\"stream\":false,\"model\":\"gpt-3.5-turbo\",\"max_tokens\":50,\"stop\":[\"hello\"],\"frequency_penalty\":0,\"presence_penalty\":0,\"temperature\":0.1}"
67-
set "curl_data3={\"llama_model_path\":\"%MODEL_LLM_PATH_STRING%\"}"
68-
set "curl_data4={\"llama_model_path\":\"%MODEL_EMBEDDING_PATH_STRING%\", \"embedding\": true, \"model_type\": \"embedding\"}"
69-
set "curl_data5={\"input\": \"Hello\", \"model\": \"test-embedding\", \"encoding_format\": \"float\"}"
70-
71-
rem Print the values of curl_data for debugging
54+
call set "MODEL_PATH_STRING=%%MODEL_PATH:\=\\%%"
55+
set "curl_data1={\"llama_model_path\":\"%MODEL_PATH_STRING%\"}"
56+
set "curl_data2={\"messages\":[{\"content\":\"Hello there\",\"role\":\"assistant\"},{\"content\":\"Write a long and sad story for me\",\"role\":\"user\"}],\"stream\":true,\"model\":\"gpt-3.5-turbo\",\"max_tokens\":50,\"stop\":[\"hello\"],\"frequency_penalty\":0,\"presence_penalty\":0,\"temperature\":0.1}"
57+
58+
rem Print the values of curl_data1 and curl_data2 for debugging
7259
echo curl_data1=%curl_data1%
7360
echo curl_data2=%curl_data2%
74-
echo curl_data3=%curl_data3%
75-
echo curl_data4=%curl_data4%
76-
echo curl_data5=%curl_data5%
7761

7862
rem Run the curl commands and capture the status code
7963
curl.exe --connect-timeout 60 -o "%TEMP%\response1.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/loadModel" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1.log 2>&1
8064

8165
curl.exe --connect-timeout 60 -o "%TEMP%\response2.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/chat_completion" ^
8266
--header "Content-Type: application/json" ^
67+
--header "Accept: text/event-stream" ^
68+
--header "Access-Control-Allow-Origin: *" ^
8369
--data "%curl_data2%" > %TEMP%\response2.log 2>&1
8470

85-
curl.exe --connect-timeout 60 -o "%TEMP%\response3.log" --request GET -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/unloadModel" --header "Content-Type: application/json" --data "%curl_data3%" > %TEMP%\response3.log 2>&1
86-
87-
curl.exe --connect-timeout 60 -o "%TEMP%\response4.log" --request POST -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/loadModel" --header "Content-Type: application/json" --data "%curl_data4%" > %TEMP%\response4.log 2>&1
88-
89-
curl.exe --connect-timeout 60 -o "%TEMP%\response5.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/v1/embeddings" ^
90-
--header "Content-Type: application/json" ^
91-
--data "%curl_data5%" > %TEMP%\response5.log 2>&1
92-
9371
set "error_occurred=0"
9472

9573
rem Read the status codes from the log files
9674
for /f %%a in (%TEMP%\response1.log) do set "response1=%%a"
9775
for /f %%a in (%TEMP%\response2.log) do set "response2=%%a"
98-
for /f %%a in (%TEMP%\response3.log) do set "response3=%%a"
99-
for /f %%a in (%TEMP%\response4.log) do set "response4=%%a"
100-
for /f %%a in (%TEMP%\response5.log) do set "response5=%%a"
10176

10277
if "%response1%" neq "200" (
10378
echo The first curl command failed with status code: %response1%
@@ -111,24 +86,6 @@ if "%response2%" neq "200" (
11186
set "error_occurred=1"
11287
)
11388

114-
if "%response3%" neq "200" (
115-
echo The third curl command failed with status code: %response3%
116-
type %TEMP%\response3.log
117-
set "error_occurred=1"
118-
)
119-
120-
if "%response4%" neq "200" (
121-
echo The fourth curl command failed with status code: %response4%
122-
type %TEMP%\response4.log
123-
set "error_occurred=1"
124-
)
125-
126-
if "%response5%" neq "200" (
127-
echo The fifth curl command failed with status code: %response5%
128-
type %TEMP%\response5.log
129-
set "error_occurred=1"
130-
)
131-
13289
if "%error_occurred%"=="1" (
13390
echo Nitro test run failed!!!!!!!!!!!!!!!!!!!!!!
13491
echo Nitro Error Logs:
@@ -139,25 +96,13 @@ if "%error_occurred%"=="1" (
13996

14097

14198
echo ----------------------
142-
echo Log load llm model:
99+
echo Log load model:
143100
type %TEMP%\response1.log
144101

145102
echo ----------------------
146-
echo Log run test:
103+
echo "Log run test:"
147104
type %TEMP%\response2.log
148105

149-
echo ----------------------
150-
echo Log unload model:
151-
type %TEMP%\response3.log
152-
153-
echo ----------------------
154-
echo Log load embedding model:
155-
type %TEMP%\response3.log
156-
157-
echo ----------------------
158-
echo Log run embedding test:
159-
type %TEMP%\response5.log
160-
161106
echo Nitro test run successfully!
162107

163108
rem Kill the server process

.github/workflows/build.yml

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@ env:
4949
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
5050
LLM_MODEL_URL: https://delta.jan.ai/tinyllama-1.1b-chat-v0.3.Q2_K.gguf
5151
WHISPER_MODEL_URL: https://delta.jan.ai/ggml-tiny-q5_1.bin
52-
EMBEDDING_MODEL_URL: https://catalog.jan.ai/dist/models/embeds/nomic-embed-text-v1.5.f16.gguf
5352

5453
jobs:
5554
create-draft-release:
@@ -187,7 +186,7 @@ jobs:
187186
run: |
188187
# run e2e testing
189188
cd nitro
190-
chmod +x ../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro ${{ env.LLM_MODEL_URL }} ${{ env.EMBEDDING_MODEL_URL }}
189+
chmod +x ../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro ${{ env.LLM_MODEL_URL }}
191190
rm -rf uploads/
192191
193192
- name: Run e2e testing - Whisper.CPP
@@ -308,7 +307,7 @@ jobs:
308307
run: |
309308
# run e2e testing
310309
cd nitro/
311-
chmod +x ../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro ${{ env.LLM_MODEL_URL }} ${{ env.EMBEDDING_MODEL_URL }}
310+
chmod +x ../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro ${{ env.LLM_MODEL_URL }}
312311
rm -rf uploads/
313312
314313
- name: Run e2e testing - Whisper.CPP
@@ -374,7 +373,7 @@ jobs:
374373
run: |
375374
# run e2e testing
376375
cd nitro
377-
chmod +x ../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro ${{ env.LLM_MODEL_URL }} ${{ env.EMBEDDING_MODEL_URL }}
376+
chmod +x ../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro ${{ env.LLM_MODEL_URL }}
378377
rm -rf uploads/
379378
380379
- name: Run e2e testing - Whisper.CPP
@@ -520,7 +519,7 @@ jobs:
520519
if: ${{ matrix.build != 'arm64' && matrix.build != 'amd64-vulkan' && matrix.build != 'amd64-avx512' }}
521520
run: |
522521
cd build\Release
523-
..\..\.github\scripts\e2e-test-llama-windows.bat nitro.exe ${{ env.LLM_MODEL_URL }} ${{ env.EMBEDDING_MODEL_URL }}
522+
..\..\.github\scripts\e2e-test-llama-windows.bat nitro.exe ${{ env.LLM_MODEL_URL }}
524523
rmdir /S /Q .\build\Release\uploads
525524
526525
- name: Run e2e testing - Whisper.cpp

0 commit comments

Comments
 (0)