Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 14 additions & 14 deletions .github/scripts/e2e-test-llama-linux-and-mac.sh
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
#!/bin/bash

## Example run command
# ./linux-and-mac.sh './jan/plugins/@janhq/inference-plugin/dist/nitro/nitro_mac_arm64' https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q2_K.gguf
# ./linux-and-mac.sh './jan/plugins/@janhq/inference-plugin/dist/cortex-cpp/nitro_mac_arm64' https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q2_K.gguf

# Check for required arguments
if [[ $# -ne 3 ]]; then
echo "Usage: $0 <path_to_binary> <url_to_download_llm> <url_to_download_embedding>"
exit 1
fi

rm /tmp/load-llm-model-res.log /tmp/completion-res.log /tmp/unload-model-res.log /tmp/load-embedding-model-res.log /tmp/embedding-res.log /tmp/nitro.log
rm /tmp/load-llm-model-res.log /tmp/completion-res.log /tmp/unload-model-res.log /tmp/load-embedding-model-res.log /tmp/embedding-res.log /tmp/cortex-cpp.log

BINARY_PATH=$1
DOWNLOAD_LLM_URL=$2
Expand All @@ -22,14 +22,14 @@ range=$((max - min + 1))
PORT=$((RANDOM % range + min))

# Start the binary file
"$BINARY_PATH" 1 127.0.0.1 $PORT >/tmp/nitro.log &
"$BINARY_PATH" 1 127.0.0.1 $PORT >/tmp/cortex-cpp.log &

# Get the process id of the binary file
pid=$!

if ! ps -p $pid >/dev/null; then
echo "nitro failed to start. Logs:"
cat /tmp/nitro.log
echo "cortex-cpp failed to start. Logs:"
cat /tmp/cortex-cpp.log
exit 1
fi

Expand All @@ -47,7 +47,7 @@ if [[ ! -f "/tmp/test-embedding" ]]; then
fi

# Run the curl commands
response1=$(curl --connect-timeout 60 -o /tmp/load-llm-model-res.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/llamacpp/loadModel" \
response1=$(curl --connect-timeout 60 -o /tmp/load-llm-model-res.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/server/loadModel" \
--header 'Content-Type: application/json' \
--data '{
"llama_model_path": "/tmp/testllm",
Expand All @@ -57,8 +57,8 @@ response1=$(curl --connect-timeout 60 -o /tmp/load-llm-model-res.log -s -w "%{ht
}')

if ! ps -p $pid >/dev/null; then
echo "nitro failed to load model. Logs:"
cat /tmp/nitro.log
echo "cortex-cpp failed to load model. Logs:"
cat /tmp/cortex-cpp.log
exit 1
fi

Expand All @@ -83,14 +83,14 @@ response2=$(
)

# unload model
response3=$(curl --connect-timeout 60 -o /tmp/unload-model-res.log --request GET -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/llamacpp/unloadModel" \
response3=$(curl --connect-timeout 60 -o /tmp/unload-model-res.log --request GET -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/server/unloadModel" \
--header 'Content-Type: application/json' \
--data '{
"llama_model_path": "/tmp/testllm"
}')

# load embedding model
response4=$(curl --connect-timeout 60 -o /tmp/load-embedding-model-res.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/llamacpp/loadModel" \
response4=$(curl --connect-timeout 60 -o /tmp/load-embedding-model-res.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/server/loadModel" \
--header 'Content-Type: application/json' \
--data '{
"llama_model_path": "/tmp/test-embedding",
Expand Down Expand Up @@ -145,9 +145,9 @@ if [[ "$response5" -ne 200 ]]; then
fi

if [[ "$error_occurred" -eq 1 ]]; then
echo "Nitro test run failed!!!!!!!!!!!!!!!!!!!!!!"
echo "Nitro Error Logs:"
cat /tmp/nitro.log
echo "cortex-cpp test run failed!!!!!!!!!!!!!!!!!!!!!!"
echo "cortex-cpp Error Logs:"
cat /tmp/cortex-cpp.log
kill $pid
exit 1
fi
Expand All @@ -172,7 +172,7 @@ echo "----------------------"
echo "Log run test:"
cat /tmp/embedding-res.log

echo "Nitro test run successfully!"
echo "cortex-cpp test run successfully!"

# Kill the server process
kill $pid
28 changes: 14 additions & 14 deletions .github/scripts/e2e-test-llama-windows.bat
Original file line number Diff line number Diff line change
Expand Up @@ -23,28 +23,28 @@ del %TEMP%\response2.log 2>nul
del %TEMP%\response3.log 2>nul
del %TEMP%\response4.log 2>nul
del %TEMP%\response5.log 2>nul
del %TEMP%\nitro.log 2>nul
del %TEMP%\cortex-cpp.log 2>nul

set /a min=9999
set /a max=11000
set /a range=max-min+1
set /a PORT=%min% + %RANDOM% %% %range%

rem Start the binary file
start /B "" "%BINARY_PATH%" 1 "127.0.0.1" %PORT% > %TEMP%\nitro.log 2>&1
start /B "" "%BINARY_PATH%" 1 "127.0.0.1" %PORT% > %TEMP%\cortex-cpp.log 2>&1

ping -n 6 127.0.0.1 %PORT% > nul

rem Capture the PID of the started process with "nitro" in its name
rem Capture the PID of the started process with "cortex-cpp" in its name
for /f "tokens=2" %%a in ('tasklist /fi "imagename eq %BINARY_NAME%" /fo list ^| findstr /B "PID:"') do (
set "pid=%%a"
)

echo pid=%pid%

if not defined pid (
echo nitro failed to start. Logs:
type %TEMP%\nitro.log
echo cortex-cpp failed to start. Logs:
type %TEMP%\cortex-cpp.log
exit /b 1
)

Expand Down Expand Up @@ -76,15 +76,15 @@ echo curl_data4=%curl_data4%
echo curl_data5=%curl_data5%

rem Run the curl commands and capture the status code
curl.exe --connect-timeout 60 -o "%TEMP%\response1.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/loadModel" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1.log 2>&1
curl.exe --connect-timeout 60 -o "%TEMP%\response1.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/server/loadModel" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1.log 2>&1

curl.exe --connect-timeout 60 -o "%TEMP%\response2.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/chat_completion" ^
curl.exe --connect-timeout 60 -o "%TEMP%\response2.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/server/chat_completion" ^
--header "Content-Type: application/json" ^
--data "%curl_data2%" > %TEMP%\response2.log 2>&1

curl.exe --connect-timeout 60 -o "%TEMP%\response3.log" --request GET -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/unloadModel" --header "Content-Type: application/json" --data "%curl_data3%" > %TEMP%\response3.log 2>&1
curl.exe --connect-timeout 60 -o "%TEMP%\response3.log" --request GET -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/server/unloadModel" --header "Content-Type: application/json" --data "%curl_data3%" > %TEMP%\response3.log 2>&1

curl.exe --connect-timeout 60 -o "%TEMP%\response4.log" --request POST -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/loadModel" --header "Content-Type: application/json" --data "%curl_data4%" > %TEMP%\response4.log 2>&1
curl.exe --connect-timeout 60 -o "%TEMP%\response4.log" --request POST -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/server/loadModel" --header "Content-Type: application/json" --data "%curl_data4%" > %TEMP%\response4.log 2>&1

curl.exe --connect-timeout 60 -o "%TEMP%\response5.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/v1/embeddings" ^
--header "Content-Type: application/json" ^
Expand Down Expand Up @@ -130,9 +130,9 @@ if "%response5%" neq "200" (
)

if "%error_occurred%"=="1" (
echo Nitro test run failed!!!!!!!!!!!!!!!!!!!!!!
echo Nitro Error Logs:
type %TEMP%\nitro.log
echo cortex-cpp test run failed!!!!!!!!!!!!!!!!!!!!!!
echo cortex-cpp Error Logs:
type %TEMP%\cortex-cpp.log
taskkill /f /pid %pid%
exit /b 1
)
Expand All @@ -158,8 +158,8 @@ echo ----------------------
echo Log run embedding test:
type %TEMP%\response5.log

echo Nitro test run successfully!
echo cortex-cpp test run successfully!

rem Kill the server process
@REM taskkill /f /pid %pid%
taskkill /f /im nitro.exe 2>nul || exit /B 0
taskkill /f /im cortex-cpp.exe 2>nul || exit /B 0
8 changes: 4 additions & 4 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -185,15 +185,15 @@ jobs:
- name: Upload Artifact
uses: actions/upload-artifact@v2
with:
name: cortex-llamacpp-engine-${{ matrix.os }}-${{ matrix.name }}
path: ./cortex-cpp/cortex
name: cortex-cpp-${{ matrix.os }}-${{ matrix.name }}
path: ./cortex-cpp/cortex-cpp

- uses: actions/upload-release-asset@v1.0.1
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
upload_url: ${{ needs.create-draft-release.outputs.upload_url }}
asset_path: ./cortex-cpp/cortex.tar.gz
asset_name: cortex-llamacpp-engine-${{ needs.create-draft-release.outputs.version }}-${{ matrix.os }}-${{ matrix.name }}.tar.gz
asset_path: ./cortex-cpp/cortex-cpp.tar.gz
asset_name: cortex-cpp-${{ needs.create-draft-release.outputs.version }}-${{ matrix.os }}-${{ matrix.name }}.tar.gz
asset_content_type: application/gzip
4 changes: 2 additions & 2 deletions .github/workflows/quality-gate.yml
Original file line number Diff line number Diff line change
Expand Up @@ -159,5 +159,5 @@ jobs:
- name: Upload Artifact
uses: actions/upload-artifact@v2
with:
name: cortex-llamacpp-engine-${{ matrix.os }}-${{ matrix.name }}
path: ./cortex-cpp/cortex
name: cortex-cpp-${{ matrix.os }}-${{ matrix.name }}
path: ./cortex-cpp/cortex-cpp
50 changes: 0 additions & 50 deletions .github/workflows/update-release-url.yml

This file was deleted.

3 changes: 1 addition & 2 deletions cortex-cpp/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,6 @@ CMakeCache.txt
CMakeFiles
CMakeScripts
Testing
!nitro-node/Makefile
cmake_install.cmake
install_manifest.txt
compile_commands.json
Expand Down Expand Up @@ -561,7 +560,7 @@ FodyWeavers.xsd

# End of https://www.toptal.com/developers/gitignore/api/intellij+all,visualstudio,visualstudiocode,cmake,c,c++
build
build_deps
build-deps
.DS_Store

uploads/**
16 changes: 8 additions & 8 deletions cortex-cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
cmake_minimum_required(VERSION 3.5)
project(nitro C CXX)
project(cortex-cpp C CXX)

include(engines/cortex.llamacpp/engine.cmake)
include(CheckIncludeFileCXX)
Expand All @@ -21,7 +21,7 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
set(OPENSSL_USE_STATIC_LIBS TRUE)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_PREFIX_PATH ${CMAKE_CURRENT_SOURCE_DIR}/build_deps/_install)
set(CMAKE_PREFIX_PATH ${CMAKE_CURRENT_SOURCE_DIR}/build-deps/_install)
# This is the critical line for installing another package

if(LLAMA_CUDA)
Expand All @@ -35,12 +35,12 @@ if(LLAMA_CUDA)
endif()

if(DEBUG)
message(STATUS "NITRO DEBUG IS ON")
message(STATUS "CORTEX-CPP DEBUG IS ON")
add_compile_definitions(ALLOW_ALL_CORS)
endif()

if(NOT DEFINED NITRO_VERSION)
set(NITRO_VERSION "default_version")
if(NOT DEFINED CORTEX_CPP_VERSION)
set(CORTEX_CPP_VERSION "default_version")
endif()

if(APPLE)
Expand All @@ -54,16 +54,16 @@ if(APPLE)
endif()
endif()

add_compile_definitions(NITRO_VERSION="${NITRO_VERSION}")
add_compile_definitions(CORTEX_CPP_VERSION="${CORTEX_CPP_VERSION}")

add_subdirectory(test)

add_executable(${PROJECT_NAME} main.cc)

# ##############################################################################
# If you include the drogon source code locally in your project, use this method
# to add drogon add_subdirectory(nitro_deps)
# target_link_libraries(${PROJECT_NAME} PRIVATE nitro_deps)
# to add drogon add_subdirectory(cortex-cpp-deps)
# target_link_libraries(${PROJECT_NAME} PRIVATE cortex-cpp-deps)
#
# and comment out the following lines

Expand Down
44 changes: 22 additions & 22 deletions cortex-cpp/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ all:
# Build the Cortex engine
build:
ifeq ($(OS),Windows_NT)
@powershell -Command "cmake -S ./nitro_deps -B ./build_deps/nitro_deps;"
@powershell -Command "cmake --build ./build_deps/nitro_deps --config Release -j4;"
@powershell -Command "cmake -S ./cortex-cpp-deps -B ./build-deps/cortex-cpp-deps;"
@powershell -Command "cmake --build ./build-deps/cortex-cpp-deps --config Release -j4;"
@powershell -Command "mkdir -p build; cd build; cmake .. $(CMAKE_EXTRA_FLAGS); cmake --build . --config Release -j4;"
else ifeq ($(shell uname -s),Linux)
@./install_deps.sh;
Expand All @@ -31,23 +31,23 @@ endif

package:
ifeq ($(OS),Windows_NT)
@powershell -Command "mkdir -p cortex\engines\cortex.llamacpp\; cp build\engines\cortex.llamacpp\engine.dll cortex\engines\cortex.llamacpp\;"
@powershell -Command "cp build\Release\nitro.exe .\cortex\;"
@powershell -Command "cp build_deps\_install\bin\zlib.dll .\cortex\;"
@powershell -Command "cp ..\.github\patches\windows\msvcp140.dll .\cortex\;"
@powershell -Command "cp ..\.github\patches\windows\vcruntime140_1.dll .\cortex\;"
@powershell -Command "cp ..\.github\patches\windows\vcruntime140.dll .\cortex\;"
@powershell -Command "7z a -ttar temp.tar cortex\\*; 7z a -tgzip cortex.tar.gz temp.tar;"
@powershell -Command "mkdir -p cortex-cpp\engines\cortex.llamacpp\; cp build\engines\cortex.llamacpp\engine.dll cortex-cpp\engines\cortex.llamacpp\;"
@powershell -Command "cp build\Release\cortex-cpp.exe .\cortex-cpp\;"
@powershell -Command "cp build-deps\_install\bin\zlib.dll .\cortex-cpp\;"
@powershell -Command "cp ..\.github\patches\windows\msvcp140.dll .\cortex-cpp\;"
@powershell -Command "cp ..\.github\patches\windows\vcruntime140_1.dll .\cortex-cpp\;"
@powershell -Command "cp ..\.github\patches\windows\vcruntime140.dll .\cortex-cpp\;"
@powershell -Command "7z a -ttar temp.tar cortex-cpp\\*; 7z a -tgzip cortex-cpp.tar.gz temp.tar;"
else ifeq ($(shell uname -s),Linux)
@mkdir -p cortex/engines/cortex.llamacpp; \
cp build/engines/cortex.llamacpp/libengine.so cortex/engines/cortex.llamacpp/; \
cp build/nitro cortex/; \
tar -czvf cortex.tar.gz cortex;
@mkdir -p cortex-cpp/engines/cortex.llamacpp; \
cp build/engines/cortex.llamacpp/libengine.so cortex-cpp/engines/cortex.llamacpp/; \
cp build/cortex-cpp cortex-cpp/; \
tar -czvf cortex-cpp.tar.gz cortex-cpp;
else
@mkdir -p cortex/engines/cortex.llamacpp; \
cp build/engines/cortex.llamacpp/libengine.dylib cortex/engines/cortex.llamacpp/; \
cp build/nitro cortex/; \
tar -czvf cortex.llamacpp.tar.gz cortex;
@mkdir -p cortex-cpp/engines/cortex.llamacpp; \
cp build/engines/cortex.llamacpp/libengine.dylib cortex-cpp/engines/cortex.llamacpp/; \
cp build/cortex-cpp cortex-cpp/; \
tar -czvf cortex-cpp.tar.gz cortex-cpp;
endif

run-e2e-test:
Expand All @@ -56,13 +56,13 @@ ifeq ($(RUN_TESTS),false)
@exit 0
endif
ifeq ($(OS),Windows_NT)
@powershell -Command "cd cortex; ..\..\.github\scripts\e2e-test-llama-windows.bat nitro.exe $(LLM_MODEL_URL) $(EMBEDDING_MODEL_URL);"
@powershell -Command "cd cortex-cpp; ..\..\.github\scripts\e2e-test-llama-windows.bat cortex-cpp.exe $(LLM_MODEL_URL) $(EMBEDDING_MODEL_URL);"
else ifeq ($(shell uname -s),Linux)
@cd cortex; \
chmod +x ../../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro $(LLM_MODEL_URL) $(EMBEDDING_MODEL_URL); \
@cd cortex-cpp; \
chmod +x ../../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../../.github/scripts/e2e-test-llama-linux-and-mac.sh ./cortex-cpp $(LLM_MODEL_URL) $(EMBEDDING_MODEL_URL); \
rm -rf uploads/;
else
@cd cortex; \
chmod +x ../../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro $(LLM_MODEL_URL) $(EMBEDDING_MODEL_URL); \
@cd cortex-cpp; \
chmod +x ../../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../../.github/scripts/e2e-test-llama-linux-and-mac.sh ./cortex-cpp $(LLM_MODEL_URL) $(EMBEDDING_MODEL_URL); \
rm -rf uploads/;
endif
Loading