janhq · vansangpfiev · May 9, 2024 · May 7, 2024 · May 9, 2024 · May 9, 2024
diff --git a/.github/scripts/e2e-test-llama-linux-and-mac.sh b/.github/scripts/e2e-test-llama-linux-and-mac.sh
@@ -1,15 +1,15 @@
 #!/bin/bash
 
 ## Example run command
-# ./linux-and-mac.sh './jan/plugins/@janhq/inference-plugin/dist/nitro/nitro_mac_arm64' https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q2_K.gguf
+# ./linux-and-mac.sh './jan/plugins/@janhq/inference-plugin/dist/cortex-cpp/nitro_mac_arm64' https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q2_K.gguf
 
 # Check for required arguments
 if [[ $# -ne 3 ]]; then
     echo "Usage: $0 <path_to_binary> <url_to_download_llm> <url_to_download_embedding>"
     exit 1
 fi
 
-rm /tmp/load-llm-model-res.log /tmp/completion-res.log /tmp/unload-model-res.log /tmp/load-embedding-model-res.log /tmp/embedding-res.log /tmp/nitro.log
+rm /tmp/load-llm-model-res.log /tmp/completion-res.log /tmp/unload-model-res.log /tmp/load-embedding-model-res.log /tmp/embedding-res.log /tmp/cortex-cpp.log
 
 BINARY_PATH=$1
 DOWNLOAD_LLM_URL=$2
@@ -22,14 +22,14 @@ range=$((max - min + 1))
 PORT=$((RANDOM % range + min))
 
 # Start the binary file
-"$BINARY_PATH" 1 127.0.0.1 $PORT >/tmp/nitro.log &
+"$BINARY_PATH" 1 127.0.0.1 $PORT >/tmp/cortex-cpp.log &
 
 # Get the process id of the binary file
 pid=$!
 
 if ! ps -p $pid >/dev/null; then
-    echo "nitro failed to start. Logs:"
-    cat /tmp/nitro.log
+    echo "cortex-cpp failed to start. Logs:"
+    cat /tmp/cortex-cpp.log
     exit 1
 fi
 
@@ -47,7 +47,7 @@ if [[ ! -f "/tmp/test-embedding" ]]; then
 fi
 
 # Run the curl commands
-response1=$(curl --connect-timeout 60 -o /tmp/load-llm-model-res.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/llamacpp/loadModel" \
+response1=$(curl --connect-timeout 60 -o /tmp/load-llm-model-res.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/server/loadModel" \
     --header 'Content-Type: application/json' \
     --data '{
     "llama_model_path": "/tmp/testllm",
@@ -57,8 +57,8 @@ response1=$(curl --connect-timeout 60 -o /tmp/load-llm-model-res.log -s -w "%{ht
 }')
 
 if ! ps -p $pid >/dev/null; then
-    echo "nitro failed to load model. Logs:"
-    cat /tmp/nitro.log
+    echo "cortex-cpp failed to load model. Logs:"
+    cat /tmp/cortex-cpp.log
     exit 1
 fi
 
@@ -83,14 +83,14 @@ response2=$(
 )
 
 # unload model
-response3=$(curl --connect-timeout 60 -o /tmp/unload-model-res.log --request GET -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/llamacpp/unloadModel" \
+response3=$(curl --connect-timeout 60 -o /tmp/unload-model-res.log --request GET -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/server/unloadModel" \
     --header 'Content-Type: application/json' \
     --data '{
     "llama_model_path": "/tmp/testllm"
 }')
 
 # load embedding model
-response4=$(curl --connect-timeout 60 -o /tmp/load-embedding-model-res.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/llamacpp/loadModel" \
+response4=$(curl --connect-timeout 60 -o /tmp/load-embedding-model-res.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/server/loadModel" \
     --header 'Content-Type: application/json' \
     --data '{
     "llama_model_path": "/tmp/test-embedding",
@@ -145,9 +145,9 @@ if [[ "$response5" -ne 200 ]]; then
 fi
 
 if [[ "$error_occurred" -eq 1 ]]; then
-    echo "Nitro test run failed!!!!!!!!!!!!!!!!!!!!!!"
-    echo "Nitro Error Logs:"
-    cat /tmp/nitro.log
+    echo "cortex-cpp test run failed!!!!!!!!!!!!!!!!!!!!!!"
+    echo "cortex-cpp Error Logs:"
+    cat /tmp/cortex-cpp.log
     kill $pid
     exit 1
 fi
@@ -172,7 +172,7 @@ echo "----------------------"
 echo "Log run test:"
 cat /tmp/embedding-res.log
 
-echo "Nitro test run successfully!"
+echo "cortex-cpp test run successfully!"
 
 # Kill the server process
 kill $pid
diff --git a/.github/scripts/e2e-test-llama-windows.bat b/.github/scripts/e2e-test-llama-windows.bat
@@ -23,28 +23,28 @@ del %TEMP%\response2.log 2>nul
 del %TEMP%\response3.log 2>nul
 del %TEMP%\response4.log 2>nul
 del %TEMP%\response5.log 2>nul
-del %TEMP%\nitro.log 2>nul
+del %TEMP%\cortex-cpp.log 2>nul
 
 set /a min=9999
 set /a max=11000
 set /a range=max-min+1
 set /a PORT=%min% + %RANDOM% %% %range%
 
 rem Start the binary file
-start /B "" "%BINARY_PATH%" 1 "127.0.0.1" %PORT% > %TEMP%\nitro.log 2>&1
+start /B "" "%BINARY_PATH%" 1 "127.0.0.1" %PORT% > %TEMP%\cortex-cpp.log 2>&1
 
 ping -n 6 127.0.0.1 %PORT% > nul
 
-rem Capture the PID of the started process with "nitro" in its name
+rem Capture the PID of the started process with "cortex-cpp" in its name
 for /f "tokens=2" %%a in ('tasklist /fi "imagename eq %BINARY_NAME%" /fo list ^| findstr /B "PID:"') do (
     set "pid=%%a"
 )
 
 echo pid=%pid%
 
 if not defined pid (
-    echo nitro failed to start. Logs:
-    type %TEMP%\nitro.log
+    echo cortex-cpp failed to start. Logs:
+    type %TEMP%\cortex-cpp.log
     exit /b 1
 )
 
@@ -76,15 +76,15 @@ echo curl_data4=%curl_data4%
 echo curl_data5=%curl_data5%
 
 rem Run the curl commands and capture the status code
-curl.exe --connect-timeout 60 -o "%TEMP%\response1.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/loadModel" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1.log 2>&1
+curl.exe --connect-timeout 60 -o "%TEMP%\response1.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/server/loadModel" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1.log 2>&1
 
-curl.exe --connect-timeout 60 -o "%TEMP%\response2.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/chat_completion" ^
+curl.exe --connect-timeout 60 -o "%TEMP%\response2.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/server/chat_completion" ^
 --header "Content-Type: application/json" ^
 --data "%curl_data2%" > %TEMP%\response2.log 2>&1
 
-curl.exe --connect-timeout 60 -o "%TEMP%\response3.log" --request GET -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/unloadModel" --header "Content-Type: application/json" --data "%curl_data3%" > %TEMP%\response3.log 2>&1
+curl.exe --connect-timeout 60 -o "%TEMP%\response3.log" --request GET -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/server/unloadModel" --header "Content-Type: application/json" --data "%curl_data3%" > %TEMP%\response3.log 2>&1
 
-curl.exe --connect-timeout 60 -o "%TEMP%\response4.log" --request POST -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/loadModel" --header "Content-Type: application/json" --data "%curl_data4%" > %TEMP%\response4.log 2>&1
+curl.exe --connect-timeout 60 -o "%TEMP%\response4.log" --request POST -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/server/loadModel" --header "Content-Type: application/json" --data "%curl_data4%" > %TEMP%\response4.log 2>&1
 
 curl.exe --connect-timeout 60 -o "%TEMP%\response5.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/v1/embeddings" ^
 --header "Content-Type: application/json" ^
@@ -130,9 +130,9 @@ if "%response5%" neq "200" (
 )
 
 if "%error_occurred%"=="1" (
-    echo Nitro test run failed!!!!!!!!!!!!!!!!!!!!!!
-    echo Nitro Error Logs:
-    type %TEMP%\nitro.log
+    echo cortex-cpp test run failed!!!!!!!!!!!!!!!!!!!!!!
+    echo cortex-cpp Error Logs:
+    type %TEMP%\cortex-cpp.log
     taskkill /f /pid %pid%
     exit /b 1
 )
@@ -158,8 +158,8 @@ echo ----------------------
 echo Log run embedding test:
 type %TEMP%\response5.log
 
-echo Nitro test run successfully!
+echo cortex-cpp test run successfully!
 
 rem Kill the server process
 @REM taskkill /f /pid %pid%
-taskkill /f /im nitro.exe 2>nul || exit /B 0
+taskkill /f /im cortex-cpp.exe 2>nul || exit /B 0
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -185,15 +185,15 @@ jobs:
       - name: Upload Artifact
         uses: actions/upload-artifact@v2
         with:
-          name: cortex-llamacpp-engine-${{ matrix.os }}-${{ matrix.name }}
-          path: ./cortex-cpp/cortex
+          name: cortex-cpp-${{ matrix.os }}-${{ matrix.name }}
+          path: ./cortex-cpp/cortex-cpp
 
       - uses: actions/upload-release-asset@v1.0.1
         if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
         with:
           upload_url: ${{ needs.create-draft-release.outputs.upload_url }}
-          asset_path: ./cortex-cpp/cortex.tar.gz
-          asset_name: cortex-llamacpp-engine-${{ needs.create-draft-release.outputs.version }}-${{ matrix.os }}-${{ matrix.name }}.tar.gz
+          asset_path: ./cortex-cpp/cortex-cpp.tar.gz
+          asset_name: cortex-cpp-${{ needs.create-draft-release.outputs.version }}-${{ matrix.os }}-${{ matrix.name }}.tar.gz
           asset_content_type: application/gzip
diff --git a/.github/workflows/quality-gate.yml b/.github/workflows/quality-gate.yml
@@ -159,5 +159,5 @@ jobs:
       - name: Upload Artifact
         uses: actions/upload-artifact@v2
         with:
-          name: cortex-llamacpp-engine-${{ matrix.os }}-${{ matrix.name }}
-          path: ./cortex-cpp/cortex
+          name: cortex-cpp-${{ matrix.os }}-${{ matrix.name }}
+          path: ./cortex-cpp/cortex-cpp
diff --git a/.github/workflows/update-release-url.yml b/.github/workflows/update-release-url.yml
diff --git a/cortex-cpp/.gitignore b/cortex-cpp/.gitignore
@@ -85,7 +85,6 @@ CMakeCache.txt
 CMakeFiles
 CMakeScripts
 Testing
-!nitro-node/Makefile
 cmake_install.cmake
 install_manifest.txt
 compile_commands.json
@@ -561,7 +560,7 @@ FodyWeavers.xsd
 
 # End of https://www.toptal.com/developers/gitignore/api/intellij+all,visualstudio,visualstudiocode,cmake,c,c++
 build
-build_deps
+build-deps
 .DS_Store
 
 uploads/**
diff --git a/cortex-cpp/CMakeLists.txt b/cortex-cpp/CMakeLists.txt
@@ -1,5 +1,5 @@
 cmake_minimum_required(VERSION 3.5)
-project(nitro C CXX)
+project(cortex-cpp C CXX)
 
 include(engines/cortex.llamacpp/engine.cmake)
 include(CheckIncludeFileCXX)
@@ -21,7 +21,7 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
 set(CMAKE_CXX_EXTENSIONS OFF)
 set(OPENSSL_USE_STATIC_LIBS TRUE)
 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
-set(CMAKE_PREFIX_PATH ${CMAKE_CURRENT_SOURCE_DIR}/build_deps/_install)
+set(CMAKE_PREFIX_PATH ${CMAKE_CURRENT_SOURCE_DIR}/build-deps/_install)
 # This is the critical line for installing another package
 
 if(LLAMA_CUDA)
@@ -35,12 +35,12 @@ if(LLAMA_CUDA)
 endif()
 
 if(DEBUG)
-  message(STATUS "NITRO DEBUG IS ON")
+  message(STATUS "CORTEX-CPP DEBUG IS ON")
   add_compile_definitions(ALLOW_ALL_CORS)
 endif()
 
-if(NOT DEFINED NITRO_VERSION)
-  set(NITRO_VERSION "default_version")
+if(NOT DEFINED CORTEX_CPP_VERSION)
+  set(CORTEX_CPP_VERSION "default_version")
 endif()
 
 if(APPLE)
@@ -54,16 +54,16 @@ if(APPLE)
   endif()
 endif()
 
-add_compile_definitions(NITRO_VERSION="${NITRO_VERSION}")
+add_compile_definitions(CORTEX_CPP_VERSION="${CORTEX_CPP_VERSION}")
 
 add_subdirectory(test)
 
 add_executable(${PROJECT_NAME} main.cc)
 
 # ##############################################################################
 # If you include the drogon source code locally in your project, use this method
-# to add drogon add_subdirectory(nitro_deps)
-# target_link_libraries(${PROJECT_NAME} PRIVATE nitro_deps)
+# to add drogon add_subdirectory(cortex-cpp-deps)
+# target_link_libraries(${PROJECT_NAME} PRIVATE cortex-cpp-deps)
 #
 # and comment out the following lines
 

diff --git a/cortex-cpp/Makefile b/cortex-cpp/Makefile
@@ -14,8 +14,8 @@ all:
 # Build the Cortex engine
 build:
 ifeq ($(OS),Windows_NT)
-	@powershell -Command "cmake -S ./nitro_deps -B ./build_deps/nitro_deps;"
-	@powershell -Command "cmake --build ./build_deps/nitro_deps --config Release -j4;"
+	@powershell -Command "cmake -S ./cortex-cpp-deps -B ./build-deps/cortex-cpp-deps;"
+	@powershell -Command "cmake --build ./build-deps/cortex-cpp-deps --config Release -j4;"
 	@powershell -Command "mkdir -p build; cd build; cmake .. $(CMAKE_EXTRA_FLAGS); cmake --build . --config Release -j4;"
 else ifeq ($(shell uname -s),Linux)
 	@./install_deps.sh;
@@ -31,23 +31,23 @@ endif
 
 package:
 ifeq ($(OS),Windows_NT)
-	@powershell -Command "mkdir -p cortex\engines\cortex.llamacpp\; cp build\engines\cortex.llamacpp\engine.dll cortex\engines\cortex.llamacpp\;"
-	@powershell -Command "cp build\Release\nitro.exe .\cortex\;"
-	@powershell -Command "cp build_deps\_install\bin\zlib.dll .\cortex\;"
-	@powershell -Command "cp ..\.github\patches\windows\msvcp140.dll .\cortex\;"
-	@powershell -Command "cp ..\.github\patches\windows\vcruntime140_1.dll .\cortex\;"
-	@powershell -Command "cp ..\.github\patches\windows\vcruntime140.dll .\cortex\;"
-	@powershell -Command "7z a -ttar temp.tar cortex\\*; 7z a -tgzip cortex.tar.gz temp.tar;"
+	@powershell -Command "mkdir -p cortex-cpp\engines\cortex.llamacpp\; cp build\engines\cortex.llamacpp\engine.dll cortex-cpp\engines\cortex.llamacpp\;"
+	@powershell -Command "cp build\Release\cortex-cpp.exe .\cortex-cpp\;"
+	@powershell -Command "cp build-deps\_install\bin\zlib.dll .\cortex-cpp\;"
+	@powershell -Command "cp ..\.github\patches\windows\msvcp140.dll .\cortex-cpp\;"
+	@powershell -Command "cp ..\.github\patches\windows\vcruntime140_1.dll .\cortex-cpp\;"
+	@powershell -Command "cp ..\.github\patches\windows\vcruntime140.dll .\cortex-cpp\;"
+	@powershell -Command "7z a -ttar temp.tar cortex-cpp\\*; 7z a -tgzip cortex-cpp.tar.gz temp.tar;"
 else ifeq ($(shell uname -s),Linux)
-	@mkdir -p cortex/engines/cortex.llamacpp; \
-	cp build/engines/cortex.llamacpp/libengine.so cortex/engines/cortex.llamacpp/; \
-	cp build/nitro cortex/; \
-	tar -czvf cortex.tar.gz cortex;
+	@mkdir -p cortex-cpp/engines/cortex.llamacpp; \
+	cp build/engines/cortex.llamacpp/libengine.so cortex-cpp/engines/cortex.llamacpp/; \
+	cp build/cortex-cpp cortex-cpp/; \
+	tar -czvf cortex-cpp.tar.gz cortex-cpp;
 else
-	@mkdir -p cortex/engines/cortex.llamacpp; \
-	cp build/engines/cortex.llamacpp/libengine.dylib cortex/engines/cortex.llamacpp/; \
-	cp build/nitro cortex/; \
-	tar -czvf cortex.llamacpp.tar.gz cortex;
+	@mkdir -p cortex-cpp/engines/cortex.llamacpp; \
+	cp build/engines/cortex.llamacpp/libengine.dylib cortex-cpp/engines/cortex.llamacpp/; \
+	cp build/cortex-cpp cortex-cpp/; \
+	tar -czvf cortex-cpp.tar.gz cortex-cpp;
 endif
 
 run-e2e-test:
@@ -56,13 +56,13 @@ ifeq ($(RUN_TESTS),false)
 	@exit 0
 endif
 ifeq ($(OS),Windows_NT)
-	@powershell -Command "cd cortex; ..\..\.github\scripts\e2e-test-llama-windows.bat nitro.exe $(LLM_MODEL_URL) $(EMBEDDING_MODEL_URL);"
+	@powershell -Command "cd cortex-cpp; ..\..\.github\scripts\e2e-test-llama-windows.bat cortex-cpp.exe $(LLM_MODEL_URL) $(EMBEDDING_MODEL_URL);"
 else ifeq ($(shell uname -s),Linux)
-	@cd cortex; \
-	chmod +x ../../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro $(LLM_MODEL_URL) $(EMBEDDING_MODEL_URL); \
+	@cd cortex-cpp; \
+	chmod +x ../../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../../.github/scripts/e2e-test-llama-linux-and-mac.sh ./cortex-cpp $(LLM_MODEL_URL) $(EMBEDDING_MODEL_URL); \
 	rm -rf uploads/;
 else
-	@cd cortex; \
-	chmod +x ../../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro $(LLM_MODEL_URL) $(EMBEDDING_MODEL_URL); \
+	@cd cortex-cpp; \
+	chmod +x ../../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../../.github/scripts/e2e-test-llama-linux-and-mac.sh ./cortex-cpp $(LLM_MODEL_URL) $(EMBEDDING_MODEL_URL); \
 	rm -rf uploads/;
 endif