diff --git a/.github/scripts/e2e-test-linux-and-mac.sh b/.github/scripts/e2e-test-linux-and-mac.sh index edccab52c..0d850ea48 100644 --- a/.github/scripts/e2e-test-linux-and-mac.sh +++ b/.github/scripts/e2e-test-linux-and-mac.sh @@ -15,7 +15,7 @@ BINARY_PATH=$1 DOWNLOAD_URL=$2 # Start the binary file -"$BINARY_PATH" > /tmp/nitro.log 2>&1 & +"$BINARY_PATH" 1 127.0.0.1 5000 > /tmp/nitro.log 2>&1 & # Get the process id of the binary file pid=$! @@ -37,16 +37,16 @@ if [[ ! -f "/tmp/testmodel" ]]; then fi # Run the curl commands -response1=$(curl -o /tmp/response1.log -s -w "%{http_code}" --location 'http://localhost:3928/inferences/llamacpp/loadModel' \ +response1=$(curl -o /tmp/response1.log -s -w "%{http_code}" --location 'http://127.0.0.1:5000/inferences/llamacpp/loadModel' \ --header 'Content-Type: application/json' \ --data '{ "llama_model_path": "/tmp/testmodel", - "ctx_len": 2048, + "ctx_len": 50, "ngl": 32, "embedding": false }' 2>&1) -response2=$(curl -o /tmp/response2.log -s -w "%{http_code}" --location 'http://localhost:3928/inferences/llamacpp/chat_completion' \ +response2=$(curl -o /tmp/response2.log -s -w "%{http_code}" --location 'http://127.0.0.1:5000/inferences/llamacpp/chat_completion' \ --header 'Content-Type: application/json' \ --header 'Accept: text/event-stream' \ --header 'Access-Control-Allow-Origin: *' \ @@ -57,11 +57,11 @@ response2=$(curl -o /tmp/response2.log -s -w "%{http_code}" --location 'http://l ], "stream": true, "model": "gpt-3.5-turbo", - "max_tokens": 100, + "max_tokens": 50, "stop": ["hello"], "frequency_penalty": 0, "presence_penalty": 0, - "temperature": 0.7 + "temperature": 0.1 }' 2>&1 ) diff --git a/.github/scripts/e2e-test-windows.bat b/.github/scripts/e2e-test-windows.bat index c1d52c64c..4389531c1 100644 --- a/.github/scripts/e2e-test-windows.bat +++ b/.github/scripts/e2e-test-windows.bat @@ -21,9 +21,9 @@ del %TEMP%\response2.log 2>nul del %TEMP%\nitro.log 2>nul rem Start the binary file -start /B "" "%BINARY_PATH%" > %TEMP%\nitro.log 2>&1 +start /B "" "%BINARY_PATH%" 1 "127.0.0.1" 5000 > %TEMP%\nitro.log 2>&1 -ping -n 6 127.0.0.1 > nul +ping -n 6 127.0.0.1 5000 > nul rem Capture the PID of the started process with "nitro" in its name for /f "tokens=2" %%a in ('tasklist /fi "imagename eq %BINARY_NAME%" /fo list ^| findstr /B "PID:"') do ( @@ -48,16 +48,16 @@ if not exist "%MODEL_PATH%" ( rem Define JSON strings for curl data call set "MODEL_PATH_STRING=%%MODEL_PATH:\=\\%%" set "curl_data1={\"llama_model_path\":\"%MODEL_PATH_STRING%\"}" -set "curl_data2={\"messages\":[{\"content\":\"Hello there\",\"role\":\"assistant\"},{\"content\":\"Write a long and sad story for me\",\"role\":\"user\"}],\"stream\":true,\"model\":\"gpt-3.5-turbo\",\"max_tokens\":100,\"stop\":[\"hello\"],\"frequency_penalty\":0,\"presence_penalty\":0,\"temperature\":0.7}" +set "curl_data2={\"messages\":[{\"content\":\"Hello there\",\"role\":\"assistant\"},{\"content\":\"Write a long and sad story for me\",\"role\":\"user\"}],\"stream\":true,\"model\":\"gpt-3.5-turbo\",\"max_tokens\":50,\"stop\":[\"hello\"],\"frequency_penalty\":0,\"presence_penalty\":0,\"temperature\":0.1}" rem Print the values of curl_data1 and curl_data2 for debugging echo curl_data1=%curl_data1% echo curl_data2=%curl_data2% rem Run the curl commands and capture the status code -curl.exe -o %TEMP%\response1.log -s -w "%%{http_code}" --location "http://localhost:3928/inferences/llamacpp/loadModel" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1_code.log 2>&1 +curl.exe -o %TEMP%\response1.log -s -w "%%{http_code}" --location "http://127.0.0.1:5000/inferences/llamacpp/loadModel" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1_code.log 2>&1 -curl.exe -o %TEMP%\response2.log -s -w "%%{http_code}" --location "http://localhost:3928/inferences/llamacpp/chat_completion" ^ +curl.exe -o %TEMP%\response2.log -s -w "%%{http_code}" --location "http://127.0.0.1:5000/inferences/llamacpp/chat_completion" ^ --header "Content-Type: application/json" ^ --header "Accept: text/event-stream" ^ --header "Access-Control-Allow-Origin: *" ^ diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 17451e64a..1e3436995 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -163,7 +163,7 @@ jobs: ./install_deps.sh mkdir build && cd build cmake .. - CC=gcc-8 make -j $(nproc) + CC=gcc-8 make -j $(sysctl -n hw.ncp) ls -la - name: Package @@ -213,7 +213,7 @@ jobs: ./install_deps.sh mkdir build && cd build cmake -DLLAMA_METAL=OFF .. - CC=gcc-8 make -j $(nproc) + CC=gcc-8 make -j $(sysctl -n hw.ncp) ls -la - name: Package @@ -284,7 +284,7 @@ jobs: mkdir -p build cd build cmake .. - cmake --build . --config Release -j 4 + cmake --build . --config Release -j "%NUMBER_OF_PROCESSORS%" - name: Pack artifacts id: pack_artifacts @@ -342,14 +342,13 @@ jobs: mkdir -p build cd build cmake .. -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUBLAS=ON - cmake --build . --config Release -j 4 + cmake --build . --config Release -j "%NUMBER_OF_PROCESSORS%" - name: Pack artifacts id: pack_artifacts shell: cmd run: | set PATH=%PATH%;C:\Program Files\7-Zip\ - echo %PATH% robocopy build_deps\_install\bin .\build\Release zlib.dll robocopy build\bin\Release .\build\Release llama.dll 7z a nitro.zip .\build\Release\*