janhq · hiento09 · Mar 6, 2024 · Mar 5, 2024 · Mar 5, 2024 · Mar 5, 2024
diff --git a/.github/scripts/e2e-test-llama-linux-and-mac.sh b/.github/scripts/e2e-test-llama-linux-and-mac.sh
@@ -21,7 +21,7 @@ range=$((max - min + 1))
 PORT=$((RANDOM % range + min))
 
 # Start the binary file
-"$BINARY_PATH" 1 127.0.0.1 $PORT >/tmp/nitro.log 2>&1 &
+"$BINARY_PATH" 1 127.0.0.1 $PORT >/tmp/nitro.log &
 
 # Get the process id of the binary file
 pid=$!
@@ -37,21 +37,27 @@ sleep 5
 
 # Check if /tmp/testllm exists, if not, download it
 if [[ ! -f "/tmp/testllm" ]]; then
-    wget $DOWNLOAD_URL -O /tmp/testllm
+    curl --connect-timeout 300 $DOWNLOAD_URL --output /tmp/testllm
 fi
 
 # Run the curl commands
-response1=$(curl -o /tmp/response1.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/llamacpp/loadModel" \
+response1=$(curl --connect-timeout 60 -o /tmp/response1.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/llamacpp/loadModel" \
     --header 'Content-Type: application/json' \
     --data '{
     "llama_model_path": "/tmp/testllm",
     "ctx_len": 50,
     "ngl": 32,
     "embedding": false
-}' 2>&1)
+}')
+
+if ! ps -p $pid >/dev/null; then
+    echo "nitro failed to load model. Logs:"
+    cat /tmp/nitro.log
+    exit 1
+fi
 
 response2=$(
-    curl -o /tmp/response2.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/v1/chat/completions" \
+    curl --connect-timeout 60 -o /tmp/response2.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/v1/chat/completions" \
         --header 'Content-Type: application/json' \
         --header 'Accept: text/event-stream' \
         --header 'Access-Control-Allow-Origin: *' \
@@ -67,7 +73,7 @@ response2=$(
         "frequency_penalty": 0,
         "presence_penalty": 0,
         "temperature": 0.1
-     }' 2>&1
+     }'
 )
 
 error_occurred=0

diff --git a/.github/scripts/e2e-test-llama-windows.bat b/.github/scripts/e2e-test-llama-windows.bat
@@ -47,7 +47,7 @@ rem Wait for a few seconds to let the server start
 
 rem Check if %TEMP%\testmodel exists, if not, download it
 if not exist "%MODEL_PATH%" (
-    bitsadmin.exe /transfer "DownloadTestModel" %DOWNLOAD_URL% "%MODEL_PATH%"
+    curl.exe --connect-timeout 300 %DOWNLOAD_URL% --output "%MODEL_PATH%"
 )
 
 rem Define JSON strings for curl data

diff --git a/.github/scripts/e2e-test-whisper-linux-and-mac.sh b/.github/scripts/e2e-test-whisper-linux-and-mac.sh
@@ -21,7 +21,7 @@ range=$((max - min + 1))
 PORT=$((RANDOM % range + min))
 
 # Start the binary file
-"$BINARY_PATH" 1 127.0.0.1 $PORT >/tmp/nitro.log 2>&1 &
+"$BINARY_PATH" 1 127.0.0.1 $PORT >/tmp/nitro.log &
 
 # Get the process id of the binary file
 pid=$!
@@ -37,25 +37,25 @@ sleep 5
 
 # Check if /tmp/testwhisper exists, if not, download it
 if [[ ! -f "/tmp/testwhisper" ]]; then
-    wget $DOWNLOAD_URL -O /tmp/testwhisper
+    curl --connect-timeout 300 $DOWNLOAD_URL --output /tmp/testwhisper
 fi
 
 # Run the curl commands
-response1=$(curl -o /tmp/response1.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/v1/audio/load_model" \
+response1=$(curl --connect-timeout 60 -o /tmp/response1.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/v1/audio/load_model" \
     --header 'Content-Type: application/json' \
     --data '{
     "model_path": "/tmp/testwhisper",
     "model_id": "whisper.cpp"
-}' 2>&1)
+}')
 
 response2=$(
-    curl -o /tmp/response2.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/v1/audio/transcriptions" \
+    curl --connect-timeout 60 -o /tmp/response2.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/v1/audio/transcriptions" \
         --header 'Access-Control-Allow-Origin: *' \
         --form 'file=@"../whisper.cpp/samples/jfk.wav"' \
         --form 'model_id="whisper.cpp"' \
         --form 'temperature="0.0"' \
         --form 'prompt="The transcript is about OpenAI which makes technology like DALL·E, GPT-3, and ChatGPT with the hope of one day building an AGI system that benefits all of humanity. The president is trying to raly people to support the cause."' \
-        2>&1
+
 )
 
 error_occurred=0

diff --git a/.github/scripts/e2e-test-whisper-windows.bat b/.github/scripts/e2e-test-whisper-windows.bat
@@ -47,17 +47,17 @@ rem Wait for a few seconds to let the server start
 
 rem Check if %TEMP%\testwhisper exists, if not, download it
 if not exist "%MODEL_PATH%" (
-    bitsadmin.exe /transfer "DownloadTestModel" %DOWNLOAD_URL% "%MODEL_PATH%"
+    curl.exe --connect-timeout 300 %DOWNLOAD_URL% --output "%MODEL_PATH%"
 )
 
 rem Define JSON strings for curl data
 call set "MODEL_PATH_STRING=%%MODEL_PATH:\=\\%%"
 set "curl_data1={\"model_path\":\"%MODEL_PATH_STRING%\",\"model_id\":\"whisper\"}"
 
 rem Run the curl commands and capture the status code
-curl.exe -o %TEMP%\response1.log -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/v1/audio/load_model" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1_code.log 2>&1
+curl.exe --connect-timeout 60 -o %TEMP%\response1.log -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/v1/audio/load_model" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1_code.log 2>&1
 
-curl -o %TEMP%\response2.log -s -w "%%{http_code}" --location "http://localhost:%PORT%/v1/audio/transcriptions" ^
+curl --connect-timeout 60 -o %TEMP%\response2.log -s -w "%%{http_code}" --location "http://localhost:%PORT%/v1/audio/transcriptions" ^
 --form "file=@../..//whisper.cpp/samples/jfk.wav" ^
 --form "model_id=whisper" > %TEMP%\response2_code.log 2>&1
 

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -49,8 +49,8 @@ on:
 
 env:
   BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
-  LLM_MODEL_URL: https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q2_K.gguf
-  WHISPER_MODEL_URL: https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny-q5_1.bin
+  LLM_MODEL_URL: https://delta.jan.ai/tinyllama-1.1b-chat-v0.3.Q2_K.gguf
+  WHISPER_MODEL_URL: https://delta.jan.ai/ggml-tiny-q5_1.bin
 
 jobs:
   create-draft-release:
@@ -125,6 +125,7 @@ jobs:
     runs-on: ubuntu-18-04-cuda-11-7
     needs: [create-draft-release, set-nitro-version]
     if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.set-nitro-version.result == 'success'
+    timeout-minutes: 20
     permissions:
       contents: write
     steps:
@@ -188,6 +189,7 @@ jobs:
     runs-on: ubuntu-18-04-cuda-11-7
     needs: [create-draft-release, set-nitro-version]
     if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.set-nitro-version.result == 'success'
+    timeout-minutes: 20
     permissions:
       contents: write
     steps:
@@ -231,22 +233,6 @@ jobs:
           name: nitro-linux-amd64-vulkan
           path: ./nitro
 
-      # - name: Run e2e testing - LLama.CPP
-      #   shell: bash
-      #   run: |
-      #     # run e2e testing
-      #     cd nitro
-      #     chmod +x ../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro ${{ env.LLM_MODEL_URL }}
-      #     rm -rf uploads/
-
-      # - name: Run e2e testing - Whisper.CPP
-      #   shell: bash
-      #   run: |
-      #     # run e2e testing
-      #     cd nitro
-      #     chmod +x ../.github/scripts/e2e-test-whisper-linux-and-mac.sh && ../.github/scripts/e2e-test-whisper-linux-and-mac.sh ./nitro ${{ env.WHISPER_MODEL_URL }}
-      #     rm -rf uploads/
-
       - uses: actions/upload-release-asset@v1.0.1
         if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
         env:
@@ -261,6 +247,7 @@ jobs:
     runs-on: ubuntu-18-04-cuda-${{ matrix.cuda }}
     needs: [create-draft-release, set-nitro-version]
     if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.set-nitro-version.result == 'success'
+    timeout-minutes: 20
     permissions:
       contents: write
     strategy:
@@ -297,21 +284,21 @@ jobs:
           name: nitro-linux-amd64-cuda-${{ matrix.cuda }}
           path: ./nitro
 
-      - name: Run e2e testing - LLama.CPP
-        shell: bash
-        run: |
-          # run e2e testing
-          cd nitro
-          chmod +x ../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro ${{ env.LLM_MODEL_URL }}
-          rm -rf uploads/
+      # - name: Run e2e testing - LLama.CPP
+      #   shell: bash
+      #   run: |
+      #     # run e2e testing
+      #     cd nitro
+      #     chmod +x ../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro ${{ env.LLM_MODEL_URL }}
+      #     rm -rf uploads/
 
-      - name: Run e2e testing - Whisper.CPP
-        shell: bash
-        run: |
-          # run e2e testing
-          cd nitro
-          chmod +x ../.github/scripts/e2e-test-whisper-linux-and-mac.sh && ../.github/scripts/e2e-test-whisper-linux-and-mac.sh ./nitro ${{ env.WHISPER_MODEL_URL }}
-          rm -rf uploads/
+      # - name: Run e2e testing - Whisper.CPP
+      #   shell: bash
+      #   run: |
+      #     # run e2e testing
+      #     cd nitro
+      #     chmod +x ../.github/scripts/e2e-test-whisper-linux-and-mac.sh && ../.github/scripts/e2e-test-whisper-linux-and-mac.sh ./nitro ${{ env.WHISPER_MODEL_URL }}
+      #     rm -rf uploads/
 
       - uses: actions/upload-release-asset@v1.0.1
         if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
@@ -327,6 +314,7 @@ jobs:
     runs-on: mac-silicon
     needs: [create-draft-release, set-nitro-version]
     if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.set-nitro-version.result == 'success'
+    timeout-minutes: 20
     permissions:
       contents: write
     steps:
@@ -405,6 +393,7 @@ jobs:
     runs-on: macos-latest
     needs: [create-draft-release, set-nitro-version]
     if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.set-nitro-version.result == 'success'
+    timeout-minutes: 20
     permissions:
       contents: write
     steps:
@@ -470,86 +459,11 @@ jobs:
           asset_name: nitro-${{ needs.create-draft-release.outputs.version }}-mac-amd64.tar.gz
           asset_content_type: application/gzip
 
-  # macOS-amd64-vulkan-build:
-  #   runs-on: macos-latest
-  #   needs: [create-draft-release, set-nitro-version]
-  #   if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.set-nitro-version.result == 'success'
-  #   permissions:
-  #     contents: write
-  #   steps:
-  #     - name: Clone
-  #       id: checkout
-  #       uses: actions/checkout@v3
-  #       with:
-  #         submodules: recursive
-
-  #     - name: Dependencies
-  #       id: depends
-  #       continue-on-error: true
-  #       run: |
-  #         brew update
-  #         brew install sdl2
-
-  #     - name: Prepare Vulkan SDK
-  #       uses: humbletim/setup-vulkan-sdk@v1.2.0
-  #       with:
-  #         vulkan-query-version: 1.3.204.0
-  #         vulkan-components: Vulkan-Headers, Vulkan-Loader
-  #         vulkan-use-cache: true
-
-  #     - name: Build
-  #       id: cmake_build
-  #       run: |
-  #         ./install_deps.sh
-  #         mkdir build && cd build
-  #         cmake -DNITRO_VERSION=${{ needs.set-nitro-version.outputs.version }} -DLLAMA_VULKAN=ON -DLLAMA_METAL=OFF .. 
-  #         CC=gcc-8 make -j $(sysctl -n hw.ncp)
-  #         ls -la
-
-  #     - name: Package
-  #       shell: bash
-  #       run: |
-  #         mkdir -p nitro
-  #         cp build/nitro nitro/
-  #         tar -czvf nitro.tar.gz nitro
-
-  #     - name: Upload Artifact
-  #       uses: actions/upload-artifact@v2
-  #       if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || github.event_name == 'pull_request'
-  #       with:
-  #         name: nitro-mac-amd64
-  #         path: ./nitro
-
-  #     - name: Run e2e testing - LLama.CPP
-  #       shell: bash
-  #       run: |
-  #         # run e2e testing
-  #         cd nitro
-  #         chmod +x ../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro ${{ env.LLM_MODEL_URL }}
-  #         rm -rf uploads/
-
-  #     - name: Run e2e testing - Whisper.CPP
-  #       shell: bash
-  #       run: |
-  #         # run e2e testing
-  #         cd nitro
-  #         chmod +x ../.github/scripts/e2e-test-whisper-linux-and-mac.sh && ../.github/scripts/e2e-test-whisper-linux-and-mac.sh ./nitro ${{ env.WHISPER_MODEL_URL }}
-  #         rm -rf uploads/
-
-  #     - uses: actions/upload-release-asset@v1.0.1
-  #       if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
-  #       env:
-  #         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-  #       with:
-  #         upload_url: ${{ needs.create-draft-release.outputs.upload_url }}
-  #         asset_path: ./nitro.tar.gz
-  #         asset_name: nitro-${{ needs.create-draft-release.outputs.version }}-mac-amd64-vulkan.tar.gz
-  #         asset_content_type: application/gzip
-
   windows-amd64-build:
     runs-on: windows-latest
     needs: [create-draft-release, set-nitro-version]
     if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.set-nitro-version.result == 'success'
+    timeout-minutes: 20
     permissions:
       contents: write
 
@@ -643,6 +557,7 @@ jobs:
     runs-on: windows-latest
     needs: [create-draft-release, set-nitro-version]
     if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.set-nitro-version.result == 'success'
+    timeout-minutes: 20
     permissions:
       contents: write
 
@@ -729,6 +644,7 @@ jobs:
     runs-on: windows-cuda-${{ matrix.cuda }}
     needs: [create-draft-release, set-nitro-version]
     if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.set-nitro-version.result == 'success'
+    timeout-minutes: 20
     permissions:
       contents: write
 
@@ -822,6 +738,7 @@ jobs:
 
   update_release_draft:
     if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
+    timeout-minutes: 20
     needs:
       [
         ubuntu-amd64-build,
@@ -830,7 +747,6 @@ jobs:
         macOS-amd64-build,
         windows-amd64-build,
         windows-amd64-cuda-build,
-        # macOS-amd64-vulkan-build,
         ubuntu-amd64-vulkan-build,
         windows-amd64-vulkan-build,
       ]
@@ -844,7 +760,8 @@ jobs:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 
   noti-discord-nightly:
-    if: always() && github.event_name == 'schedule' && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.ubuntu-amd64-build.result == 'success' && needs.ubuntu-amd64-cuda-build.result == 'success' && needs.macOS-silicon-build.result == 'success' && needs.macOS-amd64-build.result == 'success' && needs.windows-amd64-build.result == 'success' && needs.windows-amd64-cuda-build.result == 'success'
+    timeout-minutes: 20
+    if: github.event_name == 'schedule' && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.ubuntu-amd64-build.result == 'success' && needs.ubuntu-amd64-cuda-build.result == 'success' && needs.macOS-silicon-build.result == 'success' && needs.macOS-amd64-build.result == 'success' && needs.windows-amd64-build.result == 'success' && needs.windows-amd64-cuda-build.result == 'success'
     needs:
       [
         create-draft-release,
@@ -854,7 +771,6 @@ jobs:
         macOS-amd64-build,
         windows-amd64-build,
         windows-amd64-cuda-build,
-        # macOS-amd64-vulkan-build,
         ubuntu-amd64-vulkan-build,
         windows-amd64-vulkan-build,
       ]
@@ -883,7 +799,8 @@ jobs:
           GITHUB_RUN_ID: ${{ github.run_id }}
 
   noti-discord-manual:
-    if: always() && github.event_name == 'workflow_dispatch' && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.ubuntu-amd64-build.result == 'success' && needs.ubuntu-amd64-cuda-build.result == 'success' && needs.macOS-silicon-build.result == 'success' && needs.macOS-amd64-build.result == 'success' && needs.windows-amd64-build.result == 'success' && needs.windows-amd64-cuda-build.result == 'success'
+    timeout-minutes: 20
+    if: github.event_name == 'workflow_dispatch' && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.ubuntu-amd64-build.result == 'success' && needs.ubuntu-amd64-cuda-build.result == 'success' && needs.macOS-silicon-build.result == 'success' && needs.macOS-amd64-build.result == 'success' && needs.windows-amd64-build.result == 'success' && needs.windows-amd64-cuda-build.result == 'success'
     needs:
       [
         create-draft-release,
@@ -893,7 +810,6 @@ jobs:
         macOS-amd64-build,
         windows-amd64-build,
         windows-amd64-cuda-build,
-        # macOS-amd64-vulkan-build,
         ubuntu-amd64-vulkan-build,
         windows-amd64-vulkan-build,
       ]