janhq
diff --git a/‎.github/scripts/e2e-test-llama-linux-and-mac.sh‎
Lines changed: 14 additions & 14 deletions b/‎.github/scripts/e2e-test-llama-linux-and-mac.sh‎
Lines changed: 14 additions & 14 deletions
diff --git a/‎.github/scripts/e2e-test-llama-windows.bat‎
Lines changed: 14 additions & 14 deletions b/‎.github/scripts/e2e-test-llama-windows.bat‎
Lines changed: 14 additions & 14 deletions
@@ -1,15 +1,15 @@
 #!/bin/bash
 
 ## Example run command
-# ./linux-and-mac.sh './jan/plugins/@janhq/inference-plugin/dist/nitro/nitro_mac_arm64' https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q2_K.gguf
+# ./linux-and-mac.sh './jan/plugins/@janhq/inference-plugin/dist/cortex-cpp/nitro_mac_arm64' https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q2_K.gguf
 
 # Check for required arguments
 if [[ $# -ne 3 ]]; then
     echo "Usage: $0 <path_to_binary> <url_to_download_llm> <url_to_download_embedding>"
     exit 1
 fi
 
-rm /tmp/load-llm-model-res.log /tmp/completion-res.log /tmp/unload-model-res.log /tmp/load-embedding-model-res.log /tmp/embedding-res.log /tmp/nitro.log
+rm /tmp/load-llm-model-res.log /tmp/completion-res.log /tmp/unload-model-res.log /tmp/load-embedding-model-res.log /tmp/embedding-res.log /tmp/cortex-cpp.log
 
 BINARY_PATH=$1
 DOWNLOAD_LLM_URL=$2
@@ -22,14 +22,14 @@ range=$((max - min + 1))
 PORT=$((RANDOM % range + min))
 
 # Start the binary file
-"$BINARY_PATH" 1 127.0.0.1 $PORT >/tmp/nitro.log &
+"$BINARY_PATH" 1 127.0.0.1 $PORT >/tmp/cortex-cpp.log &
 
 # Get the process id of the binary file
 pid=$!
 
 if ! ps -p $pid >/dev/null; then
-    echo "nitro failed to start. Logs:"
-    cat /tmp/nitro.log
+    echo "cortex-cpp failed to start. Logs:"
+    cat /tmp/cortex-cpp.log
     exit 1
 fi
 
@@ -47,7 +47,7 @@ if [[ ! -f "/tmp/test-embedding" ]]; then
 fi
 
 # Run the curl commands
-response1=$(curl --connect-timeout 60 -o /tmp/load-llm-model-res.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/llamacpp/loadModel" \
+response1=$(curl --connect-timeout 60 -o /tmp/load-llm-model-res.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/server/loadModel" \
     --header 'Content-Type: application/json' \
     --data '{
     "llama_model_path": "/tmp/testllm",
@@ -57,8 +57,8 @@ response1=$(curl --connect-timeout 60 -o /tmp/load-llm-model-res.log -s -w "%{ht
 }')
 
 if ! ps -p $pid >/dev/null; then
-    echo "nitro failed to load model. Logs:"
-    cat /tmp/nitro.log
+    echo "cortex-cpp failed to load model. Logs:"
+    cat /tmp/cortex-cpp.log
     exit 1
 fi
 
@@ -83,14 +83,14 @@ response2=$(
 )
 
 # unload model
-response3=$(curl --connect-timeout 60 -o /tmp/unload-model-res.log --request GET -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/llamacpp/unloadModel" \
+response3=$(curl --connect-timeout 60 -o /tmp/unload-model-res.log --request GET -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/server/unloadModel" \
     --header 'Content-Type: application/json' \
     --data '{
     "llama_model_path": "/tmp/testllm"
 }')
 
 # load embedding model
-response4=$(curl --connect-timeout 60 -o /tmp/load-embedding-model-res.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/llamacpp/loadModel" \
+response4=$(curl --connect-timeout 60 -o /tmp/load-embedding-model-res.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/server/loadModel" \
     --header 'Content-Type: application/json' \
     --data '{
     "llama_model_path": "/tmp/test-embedding",
@@ -145,9 +145,9 @@ if [[ "$response5" -ne 200 ]]; then
 fi
 
 if [[ "$error_occurred" -eq 1 ]]; then
-    echo "Nitro test run failed!!!!!!!!!!!!!!!!!!!!!!"
-    echo "Nitro Error Logs:"
-    cat /tmp/nitro.log
+    echo "cortex-cpp test run failed!!!!!!!!!!!!!!!!!!!!!!"
+    echo "cortex-cpp Error Logs:"
+    cat /tmp/cortex-cpp.log
     kill $pid
     exit 1
 fi
@@ -172,7 +172,7 @@ echo "----------------------"
 echo "Log run test:"
 cat /tmp/embedding-res.log
 
-echo "Nitro test run successfully!"
+echo "cortex-cpp test run successfully!"
 
 # Kill the server process
 kill $pid
@@ -23,28 +23,28 @@ del %TEMP%\response2.log 2>nul
 del %TEMP%\response3.log 2>nul
 del %TEMP%\response4.log 2>nul
 del %TEMP%\response5.log 2>nul
-del %TEMP%\nitro.log 2>nul
+del %TEMP%\cortex-cpp.log 2>nul
 
 set /a min=9999
 set /a max=11000
 set /a range=max-min+1
 set /a PORT=%min% + %RANDOM% %% %range%
 
 rem Start the binary file
-start /B "" "%BINARY_PATH%" 1 "127.0.0.1" %PORT% > %TEMP%\nitro.log 2>&1
+start /B "" "%BINARY_PATH%" 1 "127.0.0.1" %PORT% > %TEMP%\cortex-cpp.log 2>&1
 
 ping -n 6 127.0.0.1 %PORT% > nul
 
-rem Capture the PID of the started process with "nitro" in its name
+rem Capture the PID of the started process with "cortex-cpp" in its name
 for /f "tokens=2" %%a in ('tasklist /fi "imagename eq %BINARY_NAME%" /fo list ^| findstr /B "PID:"') do (
     set "pid=%%a"
 )
 
 echo pid=%pid%
 
 if not defined pid (
-    echo nitro failed to start. Logs:
-    type %TEMP%\nitro.log
+    echo cortex-cpp failed to start. Logs:
+    type %TEMP%\cortex-cpp.log
     exit /b 1
 )
 
@@ -76,15 +76,15 @@ echo curl_data4=%curl_data4%
 echo curl_data5=%curl_data5%
 
 rem Run the curl commands and capture the status code
-curl.exe --connect-timeout 60 -o "%TEMP%\response1.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/loadModel" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1.log 2>&1
+curl.exe --connect-timeout 60 -o "%TEMP%\response1.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/server/loadModel" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1.log 2>&1
 
-curl.exe --connect-timeout 60 -o "%TEMP%\response2.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/chat_completion" ^
+curl.exe --connect-timeout 60 -o "%TEMP%\response2.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/server/chat_completion" ^
 --header "Content-Type: application/json" ^
 --data "%curl_data2%" > %TEMP%\response2.log 2>&1
 
-curl.exe --connect-timeout 60 -o "%TEMP%\response3.log" --request GET -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/unloadModel" --header "Content-Type: application/json" --data "%curl_data3%" > %TEMP%\response3.log 2>&1
+curl.exe --connect-timeout 60 -o "%TEMP%\response3.log" --request GET -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/server/unloadModel" --header "Content-Type: application/json" --data "%curl_data3%" > %TEMP%\response3.log 2>&1
 
-curl.exe --connect-timeout 60 -o "%TEMP%\response4.log" --request POST -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/loadModel" --header "Content-Type: application/json" --data "%curl_data4%" > %TEMP%\response4.log 2>&1
+curl.exe --connect-timeout 60 -o "%TEMP%\response4.log" --request POST -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/server/loadModel" --header "Content-Type: application/json" --data "%curl_data4%" > %TEMP%\response4.log 2>&1
 
 curl.exe --connect-timeout 60 -o "%TEMP%\response5.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/v1/embeddings" ^
 --header "Content-Type: application/json" ^
@@ -130,9 +130,9 @@ if "%response5%" neq "200" (
 )
 
 if "%error_occurred%"=="1" (
-    echo Nitro test run failed!!!!!!!!!!!!!!!!!!!!!!
-    echo Nitro Error Logs:
-    type %TEMP%\nitro.log
+    echo cortex-cpp test run failed!!!!!!!!!!!!!!!!!!!!!!
+    echo cortex-cpp Error Logs:
+    type %TEMP%\cortex-cpp.log
     taskkill /f /pid %pid%
     exit /b 1
 )
@@ -158,8 +158,8 @@ echo ----------------------
 echo Log run embedding test:
 type %TEMP%\response5.log
 
-echo Nitro test run successfully!
+echo cortex-cpp test run successfully!
 
 rem Kill the server process
 @REM taskkill /f /pid %pid%
-taskkill /f /im nitro.exe 2>nul || exit /B 0
+taskkill /f /im cortex-cpp.exe 2>nul || exit /B 0