diff --git a/.github/scripts/e2e-test-llama-linux-and-mac.sh b/.github/scripts/e2e-test-llama-linux-and-mac.sh index d7f1b5ab8..e97c51f63 100644 --- a/.github/scripts/e2e-test-llama-linux-and-mac.sh +++ b/.github/scripts/e2e-test-llama-linux-and-mac.sh @@ -21,7 +21,7 @@ range=$((max - min + 1)) PORT=$((RANDOM % range + min)) # Start the binary file -"$BINARY_PATH" 1 127.0.0.1 $PORT >/tmp/nitro.log 2>&1 & +"$BINARY_PATH" 1 127.0.0.1 $PORT >/tmp/nitro.log & # Get the process id of the binary file pid=$! @@ -37,21 +37,27 @@ sleep 5 # Check if /tmp/testllm exists, if not, download it if [[ ! -f "/tmp/testllm" ]]; then - wget $DOWNLOAD_URL -O /tmp/testllm + curl --connect-timeout 300 $DOWNLOAD_URL --output /tmp/testllm fi # Run the curl commands -response1=$(curl -o /tmp/response1.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/llamacpp/loadModel" \ +response1=$(curl --connect-timeout 60 -o /tmp/response1.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/llamacpp/loadModel" \ --header 'Content-Type: application/json' \ --data '{ "llama_model_path": "/tmp/testllm", "ctx_len": 50, "ngl": 32, "embedding": false -}' 2>&1) +}') + +if ! ps -p $pid >/dev/null; then + echo "nitro failed to load model. Logs:" + cat /tmp/nitro.log + exit 1 +fi response2=$( - curl -o /tmp/response2.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/v1/chat/completions" \ + curl --connect-timeout 60 -o /tmp/response2.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/v1/chat/completions" \ --header 'Content-Type: application/json' \ --header 'Accept: text/event-stream' \ --header 'Access-Control-Allow-Origin: *' \ @@ -67,7 +73,7 @@ response2=$( "frequency_penalty": 0, "presence_penalty": 0, "temperature": 0.1 - }' 2>&1 + }' ) error_occurred=0 diff --git a/.github/scripts/e2e-test-llama-windows.bat b/.github/scripts/e2e-test-llama-windows.bat index 7de9c5b67..a6526f358 100644 --- a/.github/scripts/e2e-test-llama-windows.bat +++ b/.github/scripts/e2e-test-llama-windows.bat @@ -47,7 +47,7 @@ rem Wait for a few seconds to let the server start rem Check if %TEMP%\testmodel exists, if not, download it if not exist "%MODEL_PATH%" ( - bitsadmin.exe /transfer "DownloadTestModel" %DOWNLOAD_URL% "%MODEL_PATH%" + curl.exe --connect-timeout 300 %DOWNLOAD_URL% --output "%MODEL_PATH%" ) rem Define JSON strings for curl data diff --git a/.github/scripts/e2e-test-whisper-linux-and-mac.sh b/.github/scripts/e2e-test-whisper-linux-and-mac.sh index 90421dff3..4c8a1e9eb 100755 --- a/.github/scripts/e2e-test-whisper-linux-and-mac.sh +++ b/.github/scripts/e2e-test-whisper-linux-and-mac.sh @@ -21,7 +21,7 @@ range=$((max - min + 1)) PORT=$((RANDOM % range + min)) # Start the binary file -"$BINARY_PATH" 1 127.0.0.1 $PORT >/tmp/nitro.log 2>&1 & +"$BINARY_PATH" 1 127.0.0.1 $PORT >/tmp/nitro.log & # Get the process id of the binary file pid=$! @@ -37,25 +37,25 @@ sleep 5 # Check if /tmp/testwhisper exists, if not, download it if [[ ! -f "/tmp/testwhisper" ]]; then - wget $DOWNLOAD_URL -O /tmp/testwhisper + curl --connect-timeout 300 $DOWNLOAD_URL --output /tmp/testwhisper fi # Run the curl commands -response1=$(curl -o /tmp/response1.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/v1/audio/load_model" \ +response1=$(curl --connect-timeout 60 -o /tmp/response1.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/v1/audio/load_model" \ --header 'Content-Type: application/json' \ --data '{ "model_path": "/tmp/testwhisper", "model_id": "whisper.cpp" -}' 2>&1) +}') response2=$( - curl -o /tmp/response2.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/v1/audio/transcriptions" \ + curl --connect-timeout 60 -o /tmp/response2.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/v1/audio/transcriptions" \ --header 'Access-Control-Allow-Origin: *' \ --form 'file=@"../whisper.cpp/samples/jfk.wav"' \ --form 'model_id="whisper.cpp"' \ --form 'temperature="0.0"' \ --form 'prompt="The transcript is about OpenAI which makes technology like DALLĀ·E, GPT-3, and ChatGPT with the hope of one day building an AGI system that benefits all of humanity. The president is trying to raly people to support the cause."' \ - 2>&1 + ) error_occurred=0 diff --git a/.github/scripts/e2e-test-whisper-windows.bat b/.github/scripts/e2e-test-whisper-windows.bat index a47b0e004..6eb2037ea 100644 --- a/.github/scripts/e2e-test-whisper-windows.bat +++ b/.github/scripts/e2e-test-whisper-windows.bat @@ -47,7 +47,7 @@ rem Wait for a few seconds to let the server start rem Check if %TEMP%\testwhisper exists, if not, download it if not exist "%MODEL_PATH%" ( - bitsadmin.exe /transfer "DownloadTestModel" %DOWNLOAD_URL% "%MODEL_PATH%" + curl.exe --connect-timeout 300 %DOWNLOAD_URL% --output "%MODEL_PATH%" ) rem Define JSON strings for curl data @@ -55,9 +55,9 @@ call set "MODEL_PATH_STRING=%%MODEL_PATH:\=\\%%" set "curl_data1={\"model_path\":\"%MODEL_PATH_STRING%\",\"model_id\":\"whisper\"}" rem Run the curl commands and capture the status code -curl.exe -o %TEMP%\response1.log -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/v1/audio/load_model" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1_code.log 2>&1 +curl.exe --connect-timeout 60 -o %TEMP%\response1.log -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/v1/audio/load_model" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1_code.log 2>&1 -curl -o %TEMP%\response2.log -s -w "%%{http_code}" --location "http://localhost:%PORT%/v1/audio/transcriptions" ^ +curl --connect-timeout 60 -o %TEMP%\response2.log -s -w "%%{http_code}" --location "http://localhost:%PORT%/v1/audio/transcriptions" ^ --form "file=@../..//whisper.cpp/samples/jfk.wav" ^ --form "model_id=whisper" > %TEMP%\response2_code.log 2>&1 diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 50a3d23ec..be924d3d8 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -49,8 +49,8 @@ on: env: BRANCH_NAME: ${{ github.head_ref || github.ref_name }} - LLM_MODEL_URL: https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q2_K.gguf - WHISPER_MODEL_URL: https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny-q5_1.bin + LLM_MODEL_URL: https://delta.jan.ai/tinyllama-1.1b-chat-v0.3.Q2_K.gguf + WHISPER_MODEL_URL: https://delta.jan.ai/ggml-tiny-q5_1.bin jobs: create-draft-release: @@ -125,6 +125,7 @@ jobs: runs-on: ubuntu-18-04-cuda-11-7 needs: [create-draft-release, set-nitro-version] if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.set-nitro-version.result == 'success' + timeout-minutes: 20 permissions: contents: write steps: @@ -188,6 +189,7 @@ jobs: runs-on: ubuntu-18-04-cuda-11-7 needs: [create-draft-release, set-nitro-version] if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.set-nitro-version.result == 'success' + timeout-minutes: 20 permissions: contents: write steps: @@ -231,22 +233,6 @@ jobs: name: nitro-linux-amd64-vulkan path: ./nitro - # - name: Run e2e testing - LLama.CPP - # shell: bash - # run: | - # # run e2e testing - # cd nitro - # chmod +x ../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro ${{ env.LLM_MODEL_URL }} - # rm -rf uploads/ - - # - name: Run e2e testing - Whisper.CPP - # shell: bash - # run: | - # # run e2e testing - # cd nitro - # chmod +x ../.github/scripts/e2e-test-whisper-linux-and-mac.sh && ../.github/scripts/e2e-test-whisper-linux-and-mac.sh ./nitro ${{ env.WHISPER_MODEL_URL }} - # rm -rf uploads/ - - uses: actions/upload-release-asset@v1.0.1 if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') env: @@ -261,6 +247,7 @@ jobs: runs-on: ubuntu-18-04-cuda-${{ matrix.cuda }} needs: [create-draft-release, set-nitro-version] if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.set-nitro-version.result == 'success' + timeout-minutes: 20 permissions: contents: write strategy: @@ -297,21 +284,21 @@ jobs: name: nitro-linux-amd64-cuda-${{ matrix.cuda }} path: ./nitro - - name: Run e2e testing - LLama.CPP - shell: bash - run: | - # run e2e testing - cd nitro - chmod +x ../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro ${{ env.LLM_MODEL_URL }} - rm -rf uploads/ + # - name: Run e2e testing - LLama.CPP + # shell: bash + # run: | + # # run e2e testing + # cd nitro + # chmod +x ../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro ${{ env.LLM_MODEL_URL }} + # rm -rf uploads/ - - name: Run e2e testing - Whisper.CPP - shell: bash - run: | - # run e2e testing - cd nitro - chmod +x ../.github/scripts/e2e-test-whisper-linux-and-mac.sh && ../.github/scripts/e2e-test-whisper-linux-and-mac.sh ./nitro ${{ env.WHISPER_MODEL_URL }} - rm -rf uploads/ + # - name: Run e2e testing - Whisper.CPP + # shell: bash + # run: | + # # run e2e testing + # cd nitro + # chmod +x ../.github/scripts/e2e-test-whisper-linux-and-mac.sh && ../.github/scripts/e2e-test-whisper-linux-and-mac.sh ./nitro ${{ env.WHISPER_MODEL_URL }} + # rm -rf uploads/ - uses: actions/upload-release-asset@v1.0.1 if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') @@ -327,6 +314,7 @@ jobs: runs-on: mac-silicon needs: [create-draft-release, set-nitro-version] if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.set-nitro-version.result == 'success' + timeout-minutes: 20 permissions: contents: write steps: @@ -405,6 +393,7 @@ jobs: runs-on: macos-latest needs: [create-draft-release, set-nitro-version] if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.set-nitro-version.result == 'success' + timeout-minutes: 20 permissions: contents: write steps: @@ -470,86 +459,11 @@ jobs: asset_name: nitro-${{ needs.create-draft-release.outputs.version }}-mac-amd64.tar.gz asset_content_type: application/gzip - # macOS-amd64-vulkan-build: - # runs-on: macos-latest - # needs: [create-draft-release, set-nitro-version] - # if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.set-nitro-version.result == 'success' - # permissions: - # contents: write - # steps: - # - name: Clone - # id: checkout - # uses: actions/checkout@v3 - # with: - # submodules: recursive - - # - name: Dependencies - # id: depends - # continue-on-error: true - # run: | - # brew update - # brew install sdl2 - - # - name: Prepare Vulkan SDK - # uses: humbletim/setup-vulkan-sdk@v1.2.0 - # with: - # vulkan-query-version: 1.3.204.0 - # vulkan-components: Vulkan-Headers, Vulkan-Loader - # vulkan-use-cache: true - - # - name: Build - # id: cmake_build - # run: | - # ./install_deps.sh - # mkdir build && cd build - # cmake -DNITRO_VERSION=${{ needs.set-nitro-version.outputs.version }} -DLLAMA_VULKAN=ON -DLLAMA_METAL=OFF .. - # CC=gcc-8 make -j $(sysctl -n hw.ncp) - # ls -la - - # - name: Package - # shell: bash - # run: | - # mkdir -p nitro - # cp build/nitro nitro/ - # tar -czvf nitro.tar.gz nitro - - # - name: Upload Artifact - # uses: actions/upload-artifact@v2 - # if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || github.event_name == 'pull_request' - # with: - # name: nitro-mac-amd64 - # path: ./nitro - - # - name: Run e2e testing - LLama.CPP - # shell: bash - # run: | - # # run e2e testing - # cd nitro - # chmod +x ../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro ${{ env.LLM_MODEL_URL }} - # rm -rf uploads/ - - # - name: Run e2e testing - Whisper.CPP - # shell: bash - # run: | - # # run e2e testing - # cd nitro - # chmod +x ../.github/scripts/e2e-test-whisper-linux-and-mac.sh && ../.github/scripts/e2e-test-whisper-linux-and-mac.sh ./nitro ${{ env.WHISPER_MODEL_URL }} - # rm -rf uploads/ - - # - uses: actions/upload-release-asset@v1.0.1 - # if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') - # env: - # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # with: - # upload_url: ${{ needs.create-draft-release.outputs.upload_url }} - # asset_path: ./nitro.tar.gz - # asset_name: nitro-${{ needs.create-draft-release.outputs.version }}-mac-amd64-vulkan.tar.gz - # asset_content_type: application/gzip - windows-amd64-build: runs-on: windows-latest needs: [create-draft-release, set-nitro-version] if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.set-nitro-version.result == 'success' + timeout-minutes: 20 permissions: contents: write @@ -643,6 +557,7 @@ jobs: runs-on: windows-latest needs: [create-draft-release, set-nitro-version] if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.set-nitro-version.result == 'success' + timeout-minutes: 20 permissions: contents: write @@ -729,6 +644,7 @@ jobs: runs-on: windows-cuda-${{ matrix.cuda }} needs: [create-draft-release, set-nitro-version] if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.set-nitro-version.result == 'success' + timeout-minutes: 20 permissions: contents: write @@ -822,6 +738,7 @@ jobs: update_release_draft: if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') + timeout-minutes: 20 needs: [ ubuntu-amd64-build, @@ -830,7 +747,6 @@ jobs: macOS-amd64-build, windows-amd64-build, windows-amd64-cuda-build, - # macOS-amd64-vulkan-build, ubuntu-amd64-vulkan-build, windows-amd64-vulkan-build, ] @@ -844,7 +760,8 @@ jobs: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} noti-discord-nightly: - if: always() && github.event_name == 'schedule' && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.ubuntu-amd64-build.result == 'success' && needs.ubuntu-amd64-cuda-build.result == 'success' && needs.macOS-silicon-build.result == 'success' && needs.macOS-amd64-build.result == 'success' && needs.windows-amd64-build.result == 'success' && needs.windows-amd64-cuda-build.result == 'success' + timeout-minutes: 20 + if: github.event_name == 'schedule' && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.ubuntu-amd64-build.result == 'success' && needs.ubuntu-amd64-cuda-build.result == 'success' && needs.macOS-silicon-build.result == 'success' && needs.macOS-amd64-build.result == 'success' && needs.windows-amd64-build.result == 'success' && needs.windows-amd64-cuda-build.result == 'success' needs: [ create-draft-release, @@ -854,7 +771,6 @@ jobs: macOS-amd64-build, windows-amd64-build, windows-amd64-cuda-build, - # macOS-amd64-vulkan-build, ubuntu-amd64-vulkan-build, windows-amd64-vulkan-build, ] @@ -883,7 +799,8 @@ jobs: GITHUB_RUN_ID: ${{ github.run_id }} noti-discord-manual: - if: always() && github.event_name == 'workflow_dispatch' && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.ubuntu-amd64-build.result == 'success' && needs.ubuntu-amd64-cuda-build.result == 'success' && needs.macOS-silicon-build.result == 'success' && needs.macOS-amd64-build.result == 'success' && needs.windows-amd64-build.result == 'success' && needs.windows-amd64-cuda-build.result == 'success' + timeout-minutes: 20 + if: github.event_name == 'workflow_dispatch' && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.ubuntu-amd64-build.result == 'success' && needs.ubuntu-amd64-cuda-build.result == 'success' && needs.macOS-silicon-build.result == 'success' && needs.macOS-amd64-build.result == 'success' && needs.windows-amd64-build.result == 'success' && needs.windows-amd64-cuda-build.result == 'success' needs: [ create-draft-release, @@ -893,7 +810,6 @@ jobs: macOS-amd64-build, windows-amd64-build, windows-amd64-cuda-build, - # macOS-amd64-vulkan-build, ubuntu-amd64-vulkan-build, windows-amd64-vulkan-build, ]