From 9cf63b1c3ebdec701e7ccf96a3286d1ed91bb82c Mon Sep 17 00:00:00 2001 From: hiro Date: Wed, 31 Jan 2024 21:49:30 +0700 Subject: [PATCH 01/11] fix: windows build with -DBUILD_SHARED_LIBS=OFF --- .github/workflows/build.yml | 6 +++--- CMakeLists.txt | 2 +- controllers/whisperCPP.cc | 13 +++++++------ 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 5b46ef66e..d4462d1fd 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -586,7 +586,7 @@ jobs: cmake --build ./build_deps/nitro_deps --config Release mkdir -p build cd build - cmake .. -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=RELEASE -DWHISPER_SDL2=ON -DNITRO_VERSION=${{ needs.set-nitro-version.outputs.version }} + cmake .. -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE -DWHISPER_SDL2=ON -DNITRO_VERSION=${{ needs.set-nitro-version.outputs.version }} cmake --build . --config Release -j "%NUMBER_OF_PROCESSORS%" - name: Pack artifacts @@ -679,7 +679,7 @@ jobs: cmake --build ./build_deps/nitro_deps --config Release mkdir -p build cd build - cmake .. -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_VULKAN=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=RELEASE -DWHISPER_SDL2=ON -DNITRO_VERSION=${{ needs.set-nitro-version.outputs.version }} + cmake .. -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_VULKAN=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE -DWHISPER_SDL2=ON -DNITRO_VERSION=${{ needs.set-nitro-version.outputs.version }} cmake --build . --config Release -j "%NUMBER_OF_PROCESSORS%" - name: Pack artifacts @@ -770,7 +770,7 @@ jobs: cmake --build ./build_deps/nitro_deps --config Release mkdir -p build cd build - cmake .. -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUBLAS=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=RELEASE -DWHISPER_SDL2=ON -DWHISPER_CUBLAS=ON -DNITRO_VERSION=${{ needs.set-nitro-version.outputs.version }} + cmake .. -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUBLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE -DWHISPER_SDL2=ON -DWHISPER_CUBLAS=ON -DNITRO_VERSION=${{ needs.set-nitro-version.outputs.version }} cmake --build . --config Release -j "%NUMBER_OF_PROCESSORS%" - name: Pack artifacts diff --git a/CMakeLists.txt b/CMakeLists.txt index d452110cd..78443b585 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -59,7 +59,7 @@ add_executable(${PROJECT_NAME} main.cc) # # and comment out the following lines find_package(Drogon CONFIG REQUIRED) -target_link_libraries(${PROJECT_NAME} PRIVATE Drogon::Drogon common llama llava whisper +target_link_libraries(${PROJECT_NAME} PRIVATE Drogon::Drogon common llama whisper llava ${CMAKE_THREAD_LIBS_INIT}) # ############################################################################## diff --git a/controllers/whisperCPP.cc b/controllers/whisperCPP.cc index b9b9b8bea..3a61c5bba 100644 --- a/controllers/whisperCPP.cc +++ b/controllers/whisperCPP.cc @@ -1,6 +1,4 @@ #include "whisperCPP.h" -// #include "whisper.h" -// #include "llama.h" bool read_wav(const std::string &fname, std::vector &pcmf32, std::vector> &pcmf32s, bool stereo) { @@ -872,7 +870,9 @@ void whisperCPP::load_model( (*jsonBody)["warm_up_audio_path"].asString(); // Return 400 error if warm up audio path is not found if (!is_file_exist(warm_up_audio_path.c_str())) { - std::string error_msg = "Warm up audio " + warm_up_audio_path + " not found, please provide a valid path or don't specify it at all"; + std::string error_msg = + "Warm up audio " + warm_up_audio_path + + " not found, please provide a valid path or don't specify it at all"; LOG_INFO << error_msg; Json::Value jsonResp; jsonResp["message"] = error_msg; @@ -881,9 +881,10 @@ void whisperCPP::load_model( callback(resp); return; } else { - LOG_INFO << "Warming up model " << model_id << " with audio " << warm_up_audio_path << " ..."; - std::string warm_up_result = whisper.inference( - warm_up_audio_path, "en", "", text_format, 0, false); + LOG_INFO << "Warming up model " << model_id << " with audio " + << warm_up_audio_path << " ..."; + std::string warm_up_result = whisper.inference(warm_up_audio_path, "en", + "", text_format, 0, false); LOG_INFO << "Warm up model " << model_id << " completed"; } } else { From 51c333aa251a1bff19137a2ec2c6d39efc34e7a9 Mon Sep 17 00:00:00 2001 From: hiro Date: Wed, 31 Jan 2024 22:07:35 +0700 Subject: [PATCH 02/11] fix(ci): Add e2e test for windows --- .github/scripts/e2e-test-llama-windows.bat | 6 ++--- .github/workflows/build.yml | 30 ++++++++++++++++------ 2 files changed, 25 insertions(+), 11 deletions(-) diff --git a/.github/scripts/e2e-test-llama-windows.bat b/.github/scripts/e2e-test-llama-windows.bat index 4302bc946..1e018ed20 100644 --- a/.github/scripts/e2e-test-llama-windows.bat +++ b/.github/scripts/e2e-test-llama-windows.bat @@ -66,7 +66,7 @@ echo curl_data2=%curl_data2% rem Run the curl commands and capture the status code curl.exe -o "%TEMP%\response1.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/loadModel" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1.log 2>&1 -curl.exe -o "%TEMP%\response2.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/chat_completion" ^ +curl.exe -o "%TEMP%\response2.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/v1/chat/completions" ^ --header "Content-Type: application/json" ^ --header "Accept: text/event-stream" ^ --header "Access-Control-Allow-Origin: *" ^ @@ -78,13 +78,13 @@ rem Read the status codes from the log files for /f %%a in (%TEMP%\response1.log) do set "response1=%%a" for /f %%a in (%TEMP%\response2.log) do set "response2=%%a" -if "%response1%" neq "000" ( +if "%response1%" neq "200" ( echo The first curl command failed with status code: %response1% type %TEMP%\response1.log set "error_occurred=1" ) -if "%response2%" neq "000" ( +if "%response2%" neq "200" ( echo The second curl command failed with status code: %response2% type %TEMP%\response2.log set "error_occurred=1" diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d4462d1fd..04f8b920e 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -602,15 +602,17 @@ jobs: 7z a -ttar temp.tar .\build\Release\* 7z a -tgzip nitro.tar.gz temp.tar - # - name: Run e2e testing - Llama.cpp - # shell: cmd - # run: | - # .\.github\scripts\e2e-test-llama-windows.bat .\build\Release\nitro.exe ${{ env.LLM_MODEL_URL }} + - name: Run e2e testing - Llama.cpp + shell: cmd + run: | + .\.github\scripts\e2e-test-llama-windows.bat .\build\Release\nitro.exe ${{ env.LLM_MODEL_URL }} + rmdir /S /Q .\build\Release\uploads - # - name: Run e2e testing - Whisper.cpp - # shell: cmd - # run: | - # .\.github\scripts\e2e-test-whisper-windows.bat .\build\Release\nitro.exe ${{ env.WHISPER_MODEL_URL }} + - name: Run e2e testing - Whisper.cpp + shell: cmd + run: | + .\.github\scripts\e2e-test-whisper-windows.bat .\build\Release\nitro.exe ${{ env.WHISPER_MODEL_URL }} + rmdir /S /Q .\build\Release\uploads - name: Upload Artifact uses: actions/upload-artifact@v2 @@ -773,6 +775,18 @@ jobs: cmake .. -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUBLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE -DWHISPER_SDL2=ON -DWHISPER_CUBLAS=ON -DNITRO_VERSION=${{ needs.set-nitro-version.outputs.version }} cmake --build . --config Release -j "%NUMBER_OF_PROCESSORS%" + - name: Run e2e testing - Llama.cpp + shell: cmd + run: | + .\.github\scripts\e2e-test-llama-windows.bat .\build\Release\nitro.exe ${{ env.LLM_MODEL_URL }} + rmdir /S /Q .\build\Release\uploads + + - name: Run e2e testing - Whisper.cpp + shell: cmd + run: | + .\.github\scripts\e2e-test-whisper-windows.bat .\build\Release\nitro.exe ${{ env.WHISPER_MODEL_URL }} + rmdir /S /Q .\build\Release\uploads + - name: Pack artifacts id: pack_artifacts shell: cmd From 1c658f57a51d160b8ad47742853efaf31ae3c56c Mon Sep 17 00:00:00 2001 From: hiro Date: Wed, 31 Jan 2024 22:36:27 +0700 Subject: [PATCH 03/11] fix(ci): Windows e2e test --- .github/scripts/e2e-test-whisper-windows.bat | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/scripts/e2e-test-whisper-windows.bat b/.github/scripts/e2e-test-whisper-windows.bat index b4a06bc09..a0c621cc9 100644 --- a/.github/scripts/e2e-test-whisper-windows.bat +++ b/.github/scripts/e2e-test-whisper-windows.bat @@ -56,7 +56,7 @@ if not exist "%MODEL_PATH%" ( rem Define JSON strings for curl data call set "MODEL_PATH_STRING=%%MODEL_PATH:\=\\%%" -set "curl_data1={\"model_path\":\"%MODEL_PATH_STRING%\",\"model_id\":\"whisper.cpp\"}" +set "curl_data1={\"model_path\":\"%MODEL_PATH_STRING%\",\"model_id\":\"whisper\"}" rem Print the values of curl_data1 for debugging echo curl_data1=%curl_data1% @@ -66,10 +66,8 @@ curl.exe -o %TEMP%\response1_code.log -s -w "%%{http_code}" --location "http://1 curl.exe -o %TEMP%\response2_code.log -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/v1/audio/transcriptions" ^ --header "Access-Control-Allow-Origin: *" ^ ---form 'model_id="whisper.cpp"' ^ +--form 'model_id="whisper"' ^ --form 'file=@"..\whisper.cpp\samples\jfk.wav"' ^ ---form 'temperature="0.0"' ^ ---form 'prompt="The transcript is about OpenAI which makes technology like DALLĀ·E, GPT-3, and ChatGPT with the hope of one day building an AGI system that benefits all of humanity. The president is trying to raly people to support the cause."' ^ > %TEMP%\response2_code.log 2>&1 set "error_occurred=0" From 3aab5f94e4b3b8ab956383a7f31f7a5fab118d0d Mon Sep 17 00:00:00 2001 From: hiro Date: Wed, 31 Jan 2024 23:11:39 +0700 Subject: [PATCH 04/11] fix(ci): Update whisper script test with correct wav file path --- .github/scripts/e2e-test-whisper-windows.bat | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/scripts/e2e-test-whisper-windows.bat b/.github/scripts/e2e-test-whisper-windows.bat index a0c621cc9..60b852ab6 100644 --- a/.github/scripts/e2e-test-whisper-windows.bat +++ b/.github/scripts/e2e-test-whisper-windows.bat @@ -67,7 +67,7 @@ curl.exe -o %TEMP%\response1_code.log -s -w "%%{http_code}" --location "http://1 curl.exe -o %TEMP%\response2_code.log -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/v1/audio/transcriptions" ^ --header "Access-Control-Allow-Origin: *" ^ --form 'model_id="whisper"' ^ ---form 'file=@"..\whisper.cpp\samples\jfk.wav"' ^ +--form 'file=@"whisper.cpp\samples\jfk.wav"' ^ > %TEMP%\response2_code.log 2>&1 set "error_occurred=0" From abaaf053ab5f08438b60203a07bfa66b8975658d Mon Sep 17 00:00:00 2001 From: hiro Date: Wed, 31 Jan 2024 23:35:11 +0700 Subject: [PATCH 05/11] fix(ci): Update windows e2e test for llama.cpp and whisper.cpp --- .github/scripts/e2e-test-llama-windows.bat | 10 +++++----- .github/scripts/e2e-test-whisper-windows.bat | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/scripts/e2e-test-llama-windows.bat b/.github/scripts/e2e-test-llama-windows.bat index 1e018ed20..c681b066d 100644 --- a/.github/scripts/e2e-test-llama-windows.bat +++ b/.github/scripts/e2e-test-llama-windows.bat @@ -57,16 +57,16 @@ if not exist "%MODEL_PATH%" ( rem Define JSON strings for curl data call set "MODEL_PATH_STRING=%%MODEL_PATH:\=\\%%" set "curl_data1={\"llama_model_path\":\"%MODEL_PATH_STRING%\"}" -set "curl_data2={\"messages\":[{\"content\":\"Hello there\",\"role\":\"assistant\"},{\"content\":\"Write a long and sad story for me\",\"role\":\"user\"}],\"stream\":true,\"model\":\"gpt-3.5-turbo\",\"max_tokens\":50,\"stop\":[\"hello\"],\"frequency_penalty\":0,\"presence_penalty\":0,\"temperature\":0.1}" +set "curl_data2={\"messages\":[{\"content\":\"Hello there\",\"role\":\"assistant\"},{\"content\":\"a\",\"role\":\"user\"}],\"stream\":false,\"model\":\"gpt-3.5-turbo\",\"max_tokens\":2,\"stop\":[\"hello\"],\"frequency_penalty\":0,\"presence_penalty\":0,\"temperature\":0.1}" rem Print the values of curl_data1 and curl_data2 for debugging echo curl_data1=%curl_data1% echo curl_data2=%curl_data2% rem Run the curl commands and capture the status code -curl.exe -o "%TEMP%\response1.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/loadModel" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1.log 2>&1 +curl.exe --connect-timeout 60 -o "%TEMP%\response1.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/loadModel" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1.log 2>&1 -curl.exe -o "%TEMP%\response2.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/v1/chat/completions" ^ +curl.exe --connect-timeout 60 -o "%TEMP%\response2.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/chat_completion" ^ --header "Content-Type: application/json" ^ --header "Accept: text/event-stream" ^ --header "Access-Control-Allow-Origin: *" ^ @@ -78,13 +78,13 @@ rem Read the status codes from the log files for /f %%a in (%TEMP%\response1.log) do set "response1=%%a" for /f %%a in (%TEMP%\response2.log) do set "response2=%%a" -if "%response1%" neq "200" ( +if "%response1%" neq "000" ( echo The first curl command failed with status code: %response1% type %TEMP%\response1.log set "error_occurred=1" ) -if "%response2%" neq "200" ( +if "%response2%" neq "000" ( echo The second curl command failed with status code: %response2% type %TEMP%\response2.log set "error_occurred=1" diff --git a/.github/scripts/e2e-test-whisper-windows.bat b/.github/scripts/e2e-test-whisper-windows.bat index 60b852ab6..119f58b5a 100644 --- a/.github/scripts/e2e-test-whisper-windows.bat +++ b/.github/scripts/e2e-test-whisper-windows.bat @@ -62,12 +62,12 @@ rem Print the values of curl_data1 for debugging echo curl_data1=%curl_data1% rem Run the curl commands and capture the status code -curl.exe -o %TEMP%\response1_code.log -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/v1/audio/load_model" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1_code.log 2>&1 +curl.exe --connect-timeout 60 -o %TEMP%\response1_code.log -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/v1/audio/load_model" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1_code.log 2>&1 -curl.exe -o %TEMP%\response2_code.log -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/v1/audio/transcriptions" ^ +curl.exe --connect-timeout 60 -o %TEMP%\response2_code.log -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/v1/audio/transcriptions" ^ --header "Access-Control-Allow-Origin: *" ^ --form 'model_id="whisper"' ^ ---form 'file=@"whisper.cpp\samples\jfk.wav"' ^ +--form 'file=@"..\whisper.cpp\samples\jfk.wav"' ^ > %TEMP%\response2_code.log 2>&1 set "error_occurred=0" @@ -82,7 +82,7 @@ if "%response1%" neq "200" ( set "error_occurred=1" ) -if "%response2%" neq "000" ( +if "%response2%" neq "200" ( echo The second curl command failed with status code: %response2% type %TEMP%\response2_code.log set "error_occurred=1" From 815216f3e64ca938f12d359eba0e253eadb2e21b Mon Sep 17 00:00:00 2001 From: hiro Date: Wed, 31 Jan 2024 23:57:28 +0700 Subject: [PATCH 06/11] fix(ci): Switch back to old script --- .github/scripts/e2e-test-llama-windows.bat | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/scripts/e2e-test-llama-windows.bat b/.github/scripts/e2e-test-llama-windows.bat index c681b066d..d84c2ccb7 100644 --- a/.github/scripts/e2e-test-llama-windows.bat +++ b/.github/scripts/e2e-test-llama-windows.bat @@ -57,16 +57,16 @@ if not exist "%MODEL_PATH%" ( rem Define JSON strings for curl data call set "MODEL_PATH_STRING=%%MODEL_PATH:\=\\%%" set "curl_data1={\"llama_model_path\":\"%MODEL_PATH_STRING%\"}" -set "curl_data2={\"messages\":[{\"content\":\"Hello there\",\"role\":\"assistant\"},{\"content\":\"a\",\"role\":\"user\"}],\"stream\":false,\"model\":\"gpt-3.5-turbo\",\"max_tokens\":2,\"stop\":[\"hello\"],\"frequency_penalty\":0,\"presence_penalty\":0,\"temperature\":0.1}" +set "curl_data2={\"messages\":[{\"content\":\"Hello there\",\"role\":\"assistant\"},{\"content\":\"Write a long and sad story for me\",\"role\":\"user\"}],\"stream\":true,\"model\":\"gpt-3.5-turbo\",\"max_tokens\":50,\"stop\":[\"hello\"],\"frequency_penalty\":0,\"presence_penalty\":0,\"temperature\":0.1}" rem Print the values of curl_data1 and curl_data2 for debugging echo curl_data1=%curl_data1% echo curl_data2=%curl_data2% rem Run the curl commands and capture the status code -curl.exe --connect-timeout 60 -o "%TEMP%\response1.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/loadModel" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1.log 2>&1 +curl.exe --connect-timeout 60 -o "%TEMP%\response1.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/loadModel" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1.log 2>&1 -curl.exe --connect-timeout 60 -o "%TEMP%\response2.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/chat_completion" ^ +curl.exe --connect-timeout 60 -o "%TEMP%\response2.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/chat_completion" ^ --header "Content-Type: application/json" ^ --header "Accept: text/event-stream" ^ --header "Access-Control-Allow-Origin: *" ^ @@ -111,4 +111,4 @@ echo Nitro test run successfully! rem Kill the server process @REM taskkill /f /pid %pid% -taskkill /f /im nitro.exe 2>nul || exit /B 0 +taskkill /f /im nitro.exe 2>nul || exit /B 0 \ No newline at end of file From a5488043c9341199185dbfea189c6f0aa920d8a8 Mon Sep 17 00:00:00 2001 From: hiro Date: Thu, 1 Feb 2024 00:12:15 +0700 Subject: [PATCH 07/11] chore: reset code as it should be in main --- .github/scripts/e2e-test-llama-windows.bat | 4 ++-- controllers/whisperCPP.cc | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/scripts/e2e-test-llama-windows.bat b/.github/scripts/e2e-test-llama-windows.bat index d84c2ccb7..7e02ee8e9 100644 --- a/.github/scripts/e2e-test-llama-windows.bat +++ b/.github/scripts/e2e-test-llama-windows.bat @@ -64,9 +64,9 @@ echo curl_data1=%curl_data1% echo curl_data2=%curl_data2% rem Run the curl commands and capture the status code -curl.exe --connect-timeout 60 -o "%TEMP%\response1.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/loadModel" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1.log 2>&1 +curl.exe -o "%TEMP%\response1.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/loadModel" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1.log 2>&1 -curl.exe --connect-timeout 60 -o "%TEMP%\response2.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/chat_completion" ^ +curl.exe -o "%TEMP%\response2.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/chat_completion" ^ --header "Content-Type: application/json" ^ --header "Accept: text/event-stream" ^ --header "Access-Control-Allow-Origin: *" ^ diff --git a/controllers/whisperCPP.cc b/controllers/whisperCPP.cc index 3a61c5bba..a2039f396 100644 --- a/controllers/whisperCPP.cc +++ b/controllers/whisperCPP.cc @@ -1,4 +1,6 @@ #include "whisperCPP.h" +// #include "whisper.h" +// #include "llama.h" bool read_wav(const std::string &fname, std::vector &pcmf32, std::vector> &pcmf32s, bool stereo) { From 1f6c1f639dd2075ac8627f70fb0b8d2749f7a89f Mon Sep 17 00:00:00 2001 From: hiro Date: Thu, 1 Feb 2024 00:29:39 +0700 Subject: [PATCH 08/11] fix(ci): Windows - e2e testing script llama.cpp --- .github/scripts/e2e-test-llama-windows.bat | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/.github/scripts/e2e-test-llama-windows.bat b/.github/scripts/e2e-test-llama-windows.bat index 7e02ee8e9..7de9c5b67 100644 --- a/.github/scripts/e2e-test-llama-windows.bat +++ b/.github/scripts/e2e-test-llama-windows.bat @@ -25,10 +25,6 @@ set /a max=11000 set /a range=max-min+1 set /a PORT=%min% + %RANDOM% %% %range% -rem Kill any existing Nitro processes -echo Killing any existing Nitro processes... -taskkill /f /im nitro.exe 2>nul - rem Start the binary file start /B "" "%BINARY_PATH%" 1 "127.0.0.1" %PORT% > %TEMP%\nitro.log 2>&1 @@ -64,9 +60,9 @@ echo curl_data1=%curl_data1% echo curl_data2=%curl_data2% rem Run the curl commands and capture the status code -curl.exe -o "%TEMP%\response1.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/loadModel" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1.log 2>&1 +curl.exe --connect-timeout 60 -o "%TEMP%\response1.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/loadModel" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1.log 2>&1 -curl.exe -o "%TEMP%\response2.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/chat_completion" ^ +curl.exe --connect-timeout 60 -o "%TEMP%\response2.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/chat_completion" ^ --header "Content-Type: application/json" ^ --header "Accept: text/event-stream" ^ --header "Access-Control-Allow-Origin: *" ^ @@ -78,13 +74,13 @@ rem Read the status codes from the log files for /f %%a in (%TEMP%\response1.log) do set "response1=%%a" for /f %%a in (%TEMP%\response2.log) do set "response2=%%a" -if "%response1%" neq "000" ( +if "%response1%" neq "200" ( echo The first curl command failed with status code: %response1% type %TEMP%\response1.log set "error_occurred=1" ) -if "%response2%" neq "000" ( +if "%response2%" neq "200" ( echo The second curl command failed with status code: %response2% type %TEMP%\response2.log set "error_occurred=1" From f79150d47e5f62b600bfcb4580e8305f5fe63015 Mon Sep 17 00:00:00 2001 From: hiro Date: Thu, 1 Feb 2024 10:20:29 +0700 Subject: [PATCH 09/11] fix(ci): Update windows e2e cd and whisper model url to tiny --- .github/workflows/build.yml | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 04f8b920e..8fdbf61e4 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -50,7 +50,7 @@ on: env: BRANCH_NAME: ${{ github.head_ref || github.ref_name }} LLM_MODEL_URL: https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q2_K.gguf - WHISPER_MODEL_URL: https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin + WHISPER_MODEL_URL: https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny-q5_1.bin jobs: create-draft-release: @@ -377,11 +377,11 @@ jobs: run: | # To test with CoreML if [[ ! -f "/tmp/testwhisper-encoder.mlmodelc" ]]; then - wget https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.en-encoder.mlmodelc.zip - unzip ggml-tiny.en-encoder.mlmodelc.zip - rm ggml-tiny.en-encoder.mlmodelc.zip + wget https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny-encoder.mlmodelc.zip + unzip ggml-tiny-encoder.mlmodelc.zip + rm ggml-tiny-encoder.mlmodelc.zip rm -rf /tmp/testwhisper-encoder.mlmodelc - mv ggml-tiny.en-encoder.mlmodelc /tmp/testwhisper-encoder.mlmodelc + mv ggml-tiny-encoder.mlmodelc /tmp/testwhisper-encoder.mlmodelc fi # run e2e testing cd nitro @@ -605,13 +605,15 @@ jobs: - name: Run e2e testing - Llama.cpp shell: cmd run: | - .\.github\scripts\e2e-test-llama-windows.bat .\build\Release\nitro.exe ${{ env.LLM_MODEL_URL }} + cd build\Release + ..\..\.github\scripts\e2e-test-whisper-windows.bat nitro.exe ${{ env.LLM_MODEL_URL }} rmdir /S /Q .\build\Release\uploads - name: Run e2e testing - Whisper.cpp shell: cmd run: | - .\.github\scripts\e2e-test-whisper-windows.bat .\build\Release\nitro.exe ${{ env.WHISPER_MODEL_URL }} + cd build\Release + ..\..\.github\scripts\e2e-test-whisper-windows.bat nitro.exe ${{ env.WHISPER_MODEL_URL }} rmdir /S /Q .\build\Release\uploads - name: Upload Artifact From 2a5201d7eb8d315b2ef831c0ac384dcb175c2ed4 Mon Sep 17 00:00:00 2001 From: hiro Date: Thu, 1 Feb 2024 10:20:40 +0700 Subject: [PATCH 10/11] fix(ci-windows): Update e2e script test --- .github/scripts/e2e-test-whisper-windows.bat | 30 +++++++------------- 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/.github/scripts/e2e-test-whisper-windows.bat b/.github/scripts/e2e-test-whisper-windows.bat index 119f58b5a..75219cded 100644 --- a/.github/scripts/e2e-test-whisper-windows.bat +++ b/.github/scripts/e2e-test-whisper-windows.bat @@ -1,7 +1,7 @@ @echo off set "TEMP=C:\Users\%UserName%\AppData\Local\Temp" -set "MODEL_PATH=%TEMP%\testwhisper" +set "MODEL_PATH=%TEMP%\testwhisper1" rem Check for required arguments if "%~2"=="" ( @@ -25,10 +25,6 @@ set /a max=11000 set /a range=max-min+1 set /a PORT=%min% + %RANDOM% %% %range% -rem Kill any existing Nitro processes -echo Killing any existing Nitro processes... -taskkill /f /im nitro.exe 2>nul - rem Start the binary file start /B "" "%BINARY_PATH%" 1 "127.0.0.1" %PORT% > %TEMP%\nitro.log 2>&1 @@ -49,7 +45,7 @@ if not defined pid ( rem Wait for a few seconds to let the server start -rem Check if %TEMP%\testmodel exists, if not, download it +rem Check if %TEMP%\testwhisper exists, if not, download it if not exist "%MODEL_PATH%" ( bitsadmin.exe /transfer "DownloadTestModel" %DOWNLOAD_URL% "%MODEL_PATH%" ) @@ -58,17 +54,12 @@ rem Define JSON strings for curl data call set "MODEL_PATH_STRING=%%MODEL_PATH:\=\\%%" set "curl_data1={\"model_path\":\"%MODEL_PATH_STRING%\",\"model_id\":\"whisper\"}" -rem Print the values of curl_data1 for debugging -echo curl_data1=%curl_data1% - rem Run the curl commands and capture the status code -curl.exe --connect-timeout 60 -o %TEMP%\response1_code.log -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/v1/audio/load_model" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1_code.log 2>&1 +curl.exe -o %TEMP%\response1.log -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/v1/audio/load_model" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1_code.log 2>&1 -curl.exe --connect-timeout 60 -o %TEMP%\response2_code.log -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/v1/audio/transcriptions" ^ ---header "Access-Control-Allow-Origin: *" ^ ---form 'model_id="whisper"' ^ ---form 'file=@"..\whisper.cpp\samples\jfk.wav"' ^ -> %TEMP%\response2_code.log 2>&1 +curl -o %TEMP%\response2.log -s -w "%%{http_code}" --location "http://localhost:%PORT%/v1/audio/transcriptions" ^ +--form "file=@../..//whisper.cpp/samples/jfk.wav" ^ +--form "model_id=whisper" > %TEMP%\response2_code.log 2>&1 set "error_occurred=0" @@ -78,13 +69,13 @@ for /f %%a in (%TEMP%\response2_code.log) do set "response2=%%a" if "%response1%" neq "200" ( echo The first curl command failed with status code: %response1% - type %TEMP%\response1_code.log + type %TEMP%\response1.log set "error_occurred=1" ) if "%response2%" neq "200" ( echo The second curl command failed with status code: %response2% - type %TEMP%\response2_code.log + type %TEMP%\response2.log set "error_occurred=1" ) @@ -99,14 +90,13 @@ if "%error_occurred%"=="1" ( echo ---------------------- echo Log load model: -type %TEMP%\response1_code.log +type %TEMP%\response1.log echo ---------------------- echo "Log run test:" -type %TEMP%\response2_code.log +type %TEMP%\response2.log echo Nitro test run successfully! rem Kill the server process -@REM taskkill /f /pid %pid% taskkill /f /im nitro.exe 2>nul || exit /B 0 From 3ac498948526ac07887081ae3f5cdc1b9f9813fb Mon Sep 17 00:00:00 2001 From: hiro Date: Thu, 1 Feb 2024 10:38:49 +0700 Subject: [PATCH 11/11] fix(ci-windows): Update build steps --- .github/scripts/e2e-test-whisper-windows.bat | 2 +- .github/workflows/build.yml | 14 +------------- 2 files changed, 2 insertions(+), 14 deletions(-) diff --git a/.github/scripts/e2e-test-whisper-windows.bat b/.github/scripts/e2e-test-whisper-windows.bat index 75219cded..a47b0e004 100644 --- a/.github/scripts/e2e-test-whisper-windows.bat +++ b/.github/scripts/e2e-test-whisper-windows.bat @@ -1,7 +1,7 @@ @echo off set "TEMP=C:\Users\%UserName%\AppData\Local\Temp" -set "MODEL_PATH=%TEMP%\testwhisper1" +set "MODEL_PATH=%TEMP%\testwhisper" rem Check for required arguments if "%~2"=="" ( diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 8fdbf61e4..faeaa4eb4 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -606,7 +606,7 @@ jobs: shell: cmd run: | cd build\Release - ..\..\.github\scripts\e2e-test-whisper-windows.bat nitro.exe ${{ env.LLM_MODEL_URL }} + ..\..\.github\scripts\e2e-test-llama-windows.bat nitro.exe ${{ env.LLM_MODEL_URL }} rmdir /S /Q .\build\Release\uploads - name: Run e2e testing - Whisper.cpp @@ -777,18 +777,6 @@ jobs: cmake .. -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUBLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE -DWHISPER_SDL2=ON -DWHISPER_CUBLAS=ON -DNITRO_VERSION=${{ needs.set-nitro-version.outputs.version }} cmake --build . --config Release -j "%NUMBER_OF_PROCESSORS%" - - name: Run e2e testing - Llama.cpp - shell: cmd - run: | - .\.github\scripts\e2e-test-llama-windows.bat .\build\Release\nitro.exe ${{ env.LLM_MODEL_URL }} - rmdir /S /Q .\build\Release\uploads - - - name: Run e2e testing - Whisper.cpp - shell: cmd - run: | - .\.github\scripts\e2e-test-whisper-windows.bat .\build\Release\nitro.exe ${{ env.WHISPER_MODEL_URL }} - rmdir /S /Q .\build\Release\uploads - - name: Pack artifacts id: pack_artifacts shell: cmd