diff --git a/.github/scripts/e2e-test-llama-windows.bat b/.github/scripts/e2e-test-llama-windows.bat index 4302bc946..7de9c5b67 100644 --- a/.github/scripts/e2e-test-llama-windows.bat +++ b/.github/scripts/e2e-test-llama-windows.bat @@ -25,10 +25,6 @@ set /a max=11000 set /a range=max-min+1 set /a PORT=%min% + %RANDOM% %% %range% -rem Kill any existing Nitro processes -echo Killing any existing Nitro processes... -taskkill /f /im nitro.exe 2>nul - rem Start the binary file start /B "" "%BINARY_PATH%" 1 "127.0.0.1" %PORT% > %TEMP%\nitro.log 2>&1 @@ -64,9 +60,9 @@ echo curl_data1=%curl_data1% echo curl_data2=%curl_data2% rem Run the curl commands and capture the status code -curl.exe -o "%TEMP%\response1.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/loadModel" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1.log 2>&1 +curl.exe --connect-timeout 60 -o "%TEMP%\response1.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/loadModel" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1.log 2>&1 -curl.exe -o "%TEMP%\response2.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/chat_completion" ^ +curl.exe --connect-timeout 60 -o "%TEMP%\response2.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/chat_completion" ^ --header "Content-Type: application/json" ^ --header "Accept: text/event-stream" ^ --header "Access-Control-Allow-Origin: *" ^ @@ -78,13 +74,13 @@ rem Read the status codes from the log files for /f %%a in (%TEMP%\response1.log) do set "response1=%%a" for /f %%a in (%TEMP%\response2.log) do set "response2=%%a" -if "%response1%" neq "000" ( +if "%response1%" neq "200" ( echo The first curl command failed with status code: %response1% type %TEMP%\response1.log set "error_occurred=1" ) -if "%response2%" neq "000" ( +if "%response2%" neq "200" ( echo The second curl command failed with status code: %response2% type %TEMP%\response2.log set "error_occurred=1" @@ -111,4 +107,4 @@ echo Nitro test run successfully! rem Kill the server process @REM taskkill /f /pid %pid% -taskkill /f /im nitro.exe 2>nul || exit /B 0 +taskkill /f /im nitro.exe 2>nul || exit /B 0 \ No newline at end of file diff --git a/.github/scripts/e2e-test-whisper-windows.bat b/.github/scripts/e2e-test-whisper-windows.bat index b4a06bc09..a47b0e004 100644 --- a/.github/scripts/e2e-test-whisper-windows.bat +++ b/.github/scripts/e2e-test-whisper-windows.bat @@ -25,10 +25,6 @@ set /a max=11000 set /a range=max-min+1 set /a PORT=%min% + %RANDOM% %% %range% -rem Kill any existing Nitro processes -echo Killing any existing Nitro processes... -taskkill /f /im nitro.exe 2>nul - rem Start the binary file start /B "" "%BINARY_PATH%" 1 "127.0.0.1" %PORT% > %TEMP%\nitro.log 2>&1 @@ -49,28 +45,21 @@ if not defined pid ( rem Wait for a few seconds to let the server start -rem Check if %TEMP%\testmodel exists, if not, download it +rem Check if %TEMP%\testwhisper exists, if not, download it if not exist "%MODEL_PATH%" ( bitsadmin.exe /transfer "DownloadTestModel" %DOWNLOAD_URL% "%MODEL_PATH%" ) rem Define JSON strings for curl data call set "MODEL_PATH_STRING=%%MODEL_PATH:\=\\%%" -set "curl_data1={\"model_path\":\"%MODEL_PATH_STRING%\",\"model_id\":\"whisper.cpp\"}" - -rem Print the values of curl_data1 for debugging -echo curl_data1=%curl_data1% +set "curl_data1={\"model_path\":\"%MODEL_PATH_STRING%\",\"model_id\":\"whisper\"}" rem Run the curl commands and capture the status code -curl.exe -o %TEMP%\response1_code.log -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/v1/audio/load_model" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1_code.log 2>&1 +curl.exe -o %TEMP%\response1.log -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/v1/audio/load_model" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1_code.log 2>&1 -curl.exe -o %TEMP%\response2_code.log -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/v1/audio/transcriptions" ^ ---header "Access-Control-Allow-Origin: *" ^ ---form 'model_id="whisper.cpp"' ^ ---form 'file=@"..\whisper.cpp\samples\jfk.wav"' ^ ---form 'temperature="0.0"' ^ ---form 'prompt="The transcript is about OpenAI which makes technology like DALLĀ·E, GPT-3, and ChatGPT with the hope of one day building an AGI system that benefits all of humanity. The president is trying to raly people to support the cause."' ^ -> %TEMP%\response2_code.log 2>&1 +curl -o %TEMP%\response2.log -s -w "%%{http_code}" --location "http://localhost:%PORT%/v1/audio/transcriptions" ^ +--form "file=@../..//whisper.cpp/samples/jfk.wav" ^ +--form "model_id=whisper" > %TEMP%\response2_code.log 2>&1 set "error_occurred=0" @@ -80,13 +69,13 @@ for /f %%a in (%TEMP%\response2_code.log) do set "response2=%%a" if "%response1%" neq "200" ( echo The first curl command failed with status code: %response1% - type %TEMP%\response1_code.log + type %TEMP%\response1.log set "error_occurred=1" ) -if "%response2%" neq "000" ( +if "%response2%" neq "200" ( echo The second curl command failed with status code: %response2% - type %TEMP%\response2_code.log + type %TEMP%\response2.log set "error_occurred=1" ) @@ -101,14 +90,13 @@ if "%error_occurred%"=="1" ( echo ---------------------- echo Log load model: -type %TEMP%\response1_code.log +type %TEMP%\response1.log echo ---------------------- echo "Log run test:" -type %TEMP%\response2_code.log +type %TEMP%\response2.log echo Nitro test run successfully! rem Kill the server process -@REM taskkill /f /pid %pid% taskkill /f /im nitro.exe 2>nul || exit /B 0 diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 5b46ef66e..faeaa4eb4 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -50,7 +50,7 @@ on: env: BRANCH_NAME: ${{ github.head_ref || github.ref_name }} LLM_MODEL_URL: https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q2_K.gguf - WHISPER_MODEL_URL: https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin + WHISPER_MODEL_URL: https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny-q5_1.bin jobs: create-draft-release: @@ -377,11 +377,11 @@ jobs: run: | # To test with CoreML if [[ ! -f "/tmp/testwhisper-encoder.mlmodelc" ]]; then - wget https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.en-encoder.mlmodelc.zip - unzip ggml-tiny.en-encoder.mlmodelc.zip - rm ggml-tiny.en-encoder.mlmodelc.zip + wget https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny-encoder.mlmodelc.zip + unzip ggml-tiny-encoder.mlmodelc.zip + rm ggml-tiny-encoder.mlmodelc.zip rm -rf /tmp/testwhisper-encoder.mlmodelc - mv ggml-tiny.en-encoder.mlmodelc /tmp/testwhisper-encoder.mlmodelc + mv ggml-tiny-encoder.mlmodelc /tmp/testwhisper-encoder.mlmodelc fi # run e2e testing cd nitro @@ -586,7 +586,7 @@ jobs: cmake --build ./build_deps/nitro_deps --config Release mkdir -p build cd build - cmake .. -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=RELEASE -DWHISPER_SDL2=ON -DNITRO_VERSION=${{ needs.set-nitro-version.outputs.version }} + cmake .. -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE -DWHISPER_SDL2=ON -DNITRO_VERSION=${{ needs.set-nitro-version.outputs.version }} cmake --build . --config Release -j "%NUMBER_OF_PROCESSORS%" - name: Pack artifacts @@ -602,15 +602,19 @@ jobs: 7z a -ttar temp.tar .\build\Release\* 7z a -tgzip nitro.tar.gz temp.tar - # - name: Run e2e testing - Llama.cpp - # shell: cmd - # run: | - # .\.github\scripts\e2e-test-llama-windows.bat .\build\Release\nitro.exe ${{ env.LLM_MODEL_URL }} + - name: Run e2e testing - Llama.cpp + shell: cmd + run: | + cd build\Release + ..\..\.github\scripts\e2e-test-llama-windows.bat nitro.exe ${{ env.LLM_MODEL_URL }} + rmdir /S /Q .\build\Release\uploads - # - name: Run e2e testing - Whisper.cpp - # shell: cmd - # run: | - # .\.github\scripts\e2e-test-whisper-windows.bat .\build\Release\nitro.exe ${{ env.WHISPER_MODEL_URL }} + - name: Run e2e testing - Whisper.cpp + shell: cmd + run: | + cd build\Release + ..\..\.github\scripts\e2e-test-whisper-windows.bat nitro.exe ${{ env.WHISPER_MODEL_URL }} + rmdir /S /Q .\build\Release\uploads - name: Upload Artifact uses: actions/upload-artifact@v2 @@ -679,7 +683,7 @@ jobs: cmake --build ./build_deps/nitro_deps --config Release mkdir -p build cd build - cmake .. -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_VULKAN=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=RELEASE -DWHISPER_SDL2=ON -DNITRO_VERSION=${{ needs.set-nitro-version.outputs.version }} + cmake .. -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_VULKAN=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE -DWHISPER_SDL2=ON -DNITRO_VERSION=${{ needs.set-nitro-version.outputs.version }} cmake --build . --config Release -j "%NUMBER_OF_PROCESSORS%" - name: Pack artifacts @@ -770,7 +774,7 @@ jobs: cmake --build ./build_deps/nitro_deps --config Release mkdir -p build cd build - cmake .. -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUBLAS=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=RELEASE -DWHISPER_SDL2=ON -DWHISPER_CUBLAS=ON -DNITRO_VERSION=${{ needs.set-nitro-version.outputs.version }} + cmake .. -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUBLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE -DWHISPER_SDL2=ON -DWHISPER_CUBLAS=ON -DNITRO_VERSION=${{ needs.set-nitro-version.outputs.version }} cmake --build . --config Release -j "%NUMBER_OF_PROCESSORS%" - name: Pack artifacts diff --git a/CMakeLists.txt b/CMakeLists.txt index d452110cd..78443b585 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -59,7 +59,7 @@ add_executable(${PROJECT_NAME} main.cc) # # and comment out the following lines find_package(Drogon CONFIG REQUIRED) -target_link_libraries(${PROJECT_NAME} PRIVATE Drogon::Drogon common llama llava whisper +target_link_libraries(${PROJECT_NAME} PRIVATE Drogon::Drogon common llama whisper llava ${CMAKE_THREAD_LIBS_INIT}) # ############################################################################## diff --git a/controllers/whisperCPP.cc b/controllers/whisperCPP.cc index b9b9b8bea..a2039f396 100644 --- a/controllers/whisperCPP.cc +++ b/controllers/whisperCPP.cc @@ -872,7 +872,9 @@ void whisperCPP::load_model( (*jsonBody)["warm_up_audio_path"].asString(); // Return 400 error if warm up audio path is not found if (!is_file_exist(warm_up_audio_path.c_str())) { - std::string error_msg = "Warm up audio " + warm_up_audio_path + " not found, please provide a valid path or don't specify it at all"; + std::string error_msg = + "Warm up audio " + warm_up_audio_path + + " not found, please provide a valid path or don't specify it at all"; LOG_INFO << error_msg; Json::Value jsonResp; jsonResp["message"] = error_msg; @@ -881,9 +883,10 @@ void whisperCPP::load_model( callback(resp); return; } else { - LOG_INFO << "Warming up model " << model_id << " with audio " << warm_up_audio_path << " ..."; - std::string warm_up_result = whisper.inference( - warm_up_audio_path, "en", "", text_format, 0, false); + LOG_INFO << "Warming up model " << model_id << " with audio " + << warm_up_audio_path << " ..."; + std::string warm_up_result = whisper.inference(warm_up_audio_path, "en", + "", text_format, 0, false); LOG_INFO << "Warm up model " << model_id << " completed"; } } else {