janhq · hiento09 · Oct 31, 2023 · Oct 30, 2023 · Oct 31, 2023 · Oct 31, 2023
diff --git a/.github/scripts/e2e-test-linux-and-mac.sh b/.github/scripts/e2e-test-linux-and-mac.sh
@@ -0,0 +1,101 @@
+#!/bin/bash
+
+## Example run command
+# ./linux-and-mac.sh './jan/plugins/@janhq/inference-plugin/dist/nitro/nitro_mac_arm64' https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q2_K.gguf
+
+# Check for required arguments
+if [[ $# -ne 2 ]]; then
+    echo "Usage: $0 <path_to_binary> <url_to_download>"
+    exit 1
+fi
+
+rm /tmp/response1.log /tmp/response2.log /tmp/nitro.log
+
+BINARY_PATH=$1
+DOWNLOAD_URL=$2
+
+# Start the binary file
+"$BINARY_PATH" > /tmp/nitro.log 2>&1 &
+
+# Get the process id of the binary file
+pid=$!
+
+if ! ps -p $pid > /dev/null; then
+    echo "nitro failed to start. Logs:"
+    cat /tmp/nitro.log
+    exit 1
+fi
+
+# Wait for a few seconds to let the server start
+sleep 5
+
+
+
+# Check if /tmp/testmodel exists, if not, download it
+if [[ ! -f "/tmp/testmodel" ]]; then
+    wget $DOWNLOAD_URL -O /tmp/testmodel
+fi
+
+# Run the curl commands
+response1=$(curl -o /tmp/response1.log -s -w "%{http_code}" --location 'http://localhost:3928/inferences/llamacpp/loadModel' \
+--header 'Content-Type: application/json' \
+--data '{
+    "llama_model_path": "/tmp/testmodel",
+    "ctx_len": 2048,
+    "ngl": 32,
+    "embedding": false
+}' 2>&1)
+
+response2=$(curl -o /tmp/response2.log -s -w "%{http_code}" --location 'http://localhost:3928/inferences/llamacpp/chat_completion' \
+--header 'Content-Type: application/json' \
+--header 'Accept: text/event-stream' \
+--header 'Access-Control-Allow-Origin: *' \
+--data '{
+        "messages": [
+            {"content": "Hello there", "role": "assistant"},
+            {"content": "Write a long and sad story for me", "role": "user"}
+        ],
+        "stream": true,
+        "model": "gpt-3.5-turbo",
+        "max_tokens": 2048,
+        "stop": ["hello"],
+        "frequency_penalty": 0,
+        "presence_penalty": 0,
+        "temperature": 0.7
+     }' 2>&1
+)
+
+error_occurred=0
+if [[ "$response1" -ne 200 ]]; then
+    echo "The first curl command failed with status code: $response1"
+    cat /tmp/response1.log
+    error_occurred=1
+fi
+
+if [[ "$response2" -ne 200 ]]; then
+    echo "The second curl command failed with status code: $response2"
+    cat /tmp/response2.log
+    error_occurred=1
+fi
+
+if [[ "$error_occurred" -eq 1 ]]; then
+    echo "Nitro test run failed!!!!!!!!!!!!!!!!!!!!!!"
+    echo "Nitro Error Logs:"
+    cat /tmp/nitro.log
+    kill $pid
+    exit 1
+fi
+
+echo "----------------------"
+echo "Log load model:"
+cat /tmp/response1.log
+
+echo "----------------------"
+echo "Log run test:"
+cat /tmp/response2.log
+
+
+echo "Nitro test run successfully!"
+
+# Kill the server process
+kill $pid
diff --git a/.github/scripts/e2e-test-windows.bat b/.github/scripts/e2e-test-windows.bat
@@ -0,0 +1,104 @@
+@echo off
+
+set "TEMP=C:\Users\%UserName%\AppData\Local\Temp"
+set "MODEL_PATH=%TEMP%\testmodel"
+
+rem Check for required arguments
+if "%~2"=="" (
+    echo Usage: %~0 ^<path_to_binary^> ^<url_to_download^>
+    exit /b 1
+)
+
+set "BINARY_PATH=%~1"
+set "DOWNLOAD_URL=%~2"
+
+for %%i in ("%BINARY_PATH%") do set "BINARY_NAME=%%~nxi"
+
+echo BINARY_NAME=%BINARY_NAME%
+
+del %TEMP%\response1.log 2>nul
+del %TEMP%\response2.log 2>nul
+del %TEMP%\nitro.log 2>nul
+
+rem Start the binary file
+start /B "" "%BINARY_PATH%" > %TEMP%\nitro.log 2>&1
+
+ping -n 6 127.0.0.1 > nul
+
+rem Capture the PID of the started process with "nitro" in its name
+for /f "tokens=2" %%a in ('tasklist /fi "imagename eq %BINARY_NAME%" /fo list ^| findstr /B "PID:"') do (
+    set "pid=%%a"
+)
+
+echo pid=%pid%
+
+if not defined pid (
+    echo nitro failed to start. Logs:
+    type %TEMP%\nitro.log
+    exit /b 1
+)
+
+rem Wait for a few seconds to let the server start
+
+rem Check if %TEMP%\testmodel exists, if not, download it
+if not exist "%MODEL_PATH%" (
+    bitsadmin.exe /transfer "DownloadTestModel" %DOWNLOAD_URL% "%MODEL_PATH%"
+)
+
+rem Define JSON strings for curl data
+call set "MODEL_PATH_STRING=%%MODEL_PATH:\=\\%%"
+set "curl_data1={\"llama_model_path\":\"%MODEL_PATH_STRING%\"}"
+set "curl_data2={\"messages\":[{\"content\":\"Hello there\",\"role\":\"assistant\"},{\"content\":\"Write a long and sad story for me\",\"role\":\"user\"}],\"stream\":true,\"model\":\"gpt-3.5-turbo\",\"max_tokens\":2048,\"stop\":[\"hello\"],\"frequency_penalty\":0,\"presence_penalty\":0,\"temperature\":0.7}"
+
+rem Print the values of curl_data1 and curl_data2 for debugging
+echo curl_data1=%curl_data1%
+echo curl_data2=%curl_data2%
+
+rem Run the curl commands and capture the status code
+curl.exe -o %TEMP%\response1.log -s -w "%%{http_code}" --location "http://localhost:3928/inferences/llamacpp/loadModel" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1_code.log 2>&1
+
+curl.exe -o %TEMP%\response2.log -s -w "%%{http_code}" --location "http://localhost:3928/inferences/llamacpp/chat_completion" ^
+--header "Content-Type: application/json" ^
+--header "Accept: text/event-stream" ^
+--header "Access-Control-Allow-Origin: *" ^
+--data "%curl_data2%" > %TEMP%\response2_code.log 2>&1
+
+set "error_occurred=0"
+
+rem Read the status codes from the log files
+for /f %%a in (%TEMP%\response1_code.log) do set "response1=%%a"
+for /f %%a in (%TEMP%\response2_code.log) do set "response2=%%a"
+
+if "%response1%" neq "200" (
+    echo The first curl command failed with status code: %response1%
+    type %TEMP%\response1.log
+    set "error_occurred=1"
+)
+
+if "%response2%" neq "200" (
+    echo The second curl command failed with status code: %response2%
+    type %TEMP%\response2.log
+    set "error_occurred=1"
+)
+
+if "%error_occurred%"=="1" (
+    echo Nitro test run failed!!!!!!!!!!!!!!!!!!!!!!
+    echo Nitro Error Logs:
+    type %TEMP%\nitro.log
+    taskkill /f /pid %pid%
+    exit /b 1
+)
+
+
+echo ----------------------
+echo Log load model:
+type %TEMP%\response1.log
+
+echo ----------------------
+echo "Log run test:"
+type %TEMP%\response2.log
+
+echo Nitro test run successfully!
+
+rem Kill the server process
+taskkill /f /pid %pid%
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -5,13 +5,14 @@ on:
     branches:
       - main
     tags: ['v*.*.*']
-    paths: ['.github/workflows/**', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu']
+    paths: ['.github/scripts/**','.github/workflows/**', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu']
   pull_request:
     types: [opened, synchronize, reopened]
-    paths: ['**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu']
+    paths: ['.github/scripts/**','.github/workflows/**', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu']
 
 env:
   BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
+  MODEL_URL: https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q2_K.gguf
 
 jobs:
   create-draft-release:
@@ -72,6 +73,12 @@ jobs:
         run: |
           mkdir -p nitro
           cp build/nitro nitro/
+
+          # run e2e testing
+          cd nitro
+          chmod +x ../.github/scripts/e2e-test-linux-and-mac.sh && ../.github/scripts/e2e-test-linux-and-mac.sh ./nitro ${{ env.MODEL_URL }}
+          cd ..
+
           zip -r nitro.zip nitro
 
       - uses: actions/upload-release-asset@v1.0.1
@@ -117,6 +124,12 @@ jobs:
         run: |
           mkdir -p nitro
           cp build/nitro nitro/
+
+          # run e2e testing
+          cd nitro
+          chmod +x ../.github/scripts/e2e-test-linux-and-mac.sh && ../.github/scripts/e2e-test-linux-and-mac.sh ./nitro ${{ env.MODEL_URL }}
+          cd ..
+
           zip -r nitro.zip nitro
 
       - uses: actions/upload-release-asset@v1.0.1
@@ -164,6 +177,12 @@ jobs:
           mkdir -p nitro
           cp llama.cpp/ggml-metal.metal nitro/
           cp build/nitro nitro/
+
+          # run e2e testing
+          cd nitro
+          chmod +x ../.github/scripts/e2e-test-linux-and-mac.sh && ../.github/scripts/e2e-test-linux-and-mac.sh ./nitro ${{ env.MODEL_URL }}
+          cd ..
+
           zip -r nitro.zip nitro
 
       - uses: actions/upload-release-asset@v1.0.1
@@ -209,6 +228,12 @@ jobs:
         run: |
           mkdir -p nitro
           cp build/nitro nitro/
+
+          # run e2e testing
+          cd nitro
+          chmod +x ../.github/scripts/e2e-test-linux-and-mac.sh && ../.github/scripts/e2e-test-linux-and-mac.sh ./nitro ${{ env.MODEL_URL }}
+          cd ..
+
           zip -r nitro.zip nitro
 
       - uses: actions/upload-release-asset@v1.0.1
@@ -278,6 +303,11 @@ jobs:
           robocopy build\bin\Release .\build\Release llama.dll
           robocopy ext_libs .\build\Release libcrypto-3-x64.dll
           robocopy ext_libs .\build\Release libssl-3-x64.dll
+
+          cd .\build\Release
+          ..\..\.github\scripts\e2e-test-windows.bat .\nitro.exe ${{ env.MODEL_URL }}
+          cd ..\..
+
           7z a nitro.zip .\build\Release\*
 
       - uses: actions/upload-release-asset@v1.0.1
@@ -287,7 +317,7 @@ jobs:
         with:
           upload_url: ${{ needs.create-draft-release.outputs.upload_url }}
           asset_path: ./nitro.zip
-          asset_name: nitro-${{ needs.create-draft-release.outputs.version }}-win-amd64-${{ matrix.build }}.zip
+          asset_name: nitro-${{ needs.create-draft-release.outputs.version }}-win-amd64.zip
           asset_content_type: application/zip
 
   windows-amd64-cuda-build:
@@ -338,6 +368,11 @@ jobs:
           robocopy build\bin\Release .\build\Release llama.dll
           robocopy ext_libs .\build\Release libcrypto-3-x64.dll
           robocopy ext_libs .\build\Release libssl-3-x64.dll
+
+          cd .\build\Release
+          ..\..\.github\scripts\e2e-test-windows.bat .\nitro.exe ${{ env.MODEL_URL }}
+          cd ..\..
+
           7z a nitro.zip .\build\Release\*
 
       - uses: actions/upload-release-asset@v1.0.1
@@ -347,7 +382,7 @@ jobs:
         with:
           upload_url: ${{ needs.create-draft-release.outputs.upload_url }}
           asset_path: ./nitro.zip
-          asset_name: nitro-${{ needs.create-draft-release.outputs.version }}-win-amd64-${{ matrix.build }}-cu${{ matrix.cuda }}.zip
+          asset_name: nitro-${{ needs.create-draft-release.outputs.version }}-win-amd64-cuda.zip
           asset_content_type: application/zip        
 
   update_release_draft: