diff --git a/.github/scripts/e2e-test-llama-windows.bat b/.github/scripts/e2e-test-llama-windows.bat
index 4302bc946..7de9c5b67 100644
--- a/.github/scripts/e2e-test-llama-windows.bat
+++ b/.github/scripts/e2e-test-llama-windows.bat
@@ -25,10 +25,6 @@ set /a max=11000
 set /a range=max-min+1
 set /a PORT=%min% + %RANDOM% %% %range%
 
-rem Kill any existing Nitro processes
-echo Killing any existing Nitro processes...
-taskkill /f /im nitro.exe 2>nul
-
 rem Start the binary file
 start /B "" "%BINARY_PATH%" 1 "127.0.0.1" %PORT% > %TEMP%\nitro.log 2>&1
 
@@ -64,9 +60,9 @@ echo curl_data1=%curl_data1%
 echo curl_data2=%curl_data2%
 
 rem Run the curl commands and capture the status code
-curl.exe -o "%TEMP%\response1.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/loadModel" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1.log 2>&1
+curl.exe --connect-timeout 60 -o "%TEMP%\response1.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/loadModel" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1.log 2>&1
 
-curl.exe -o "%TEMP%\response2.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/chat_completion" ^
+curl.exe --connect-timeout 60 -o "%TEMP%\response2.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/chat_completion" ^
 --header "Content-Type: application/json" ^
 --header "Accept: text/event-stream" ^
 --header "Access-Control-Allow-Origin: *" ^
@@ -78,13 +74,13 @@ rem Read the status codes from the log files
 for /f %%a in (%TEMP%\response1.log) do set "response1=%%a"
 for /f %%a in (%TEMP%\response2.log) do set "response2=%%a"
 
-if "%response1%" neq "000" (
+if "%response1%" neq "200" (
     echo The first curl command failed with status code: %response1%
     type %TEMP%\response1.log
     set "error_occurred=1"
 )
 
-if "%response2%" neq "000" (
+if "%response2%" neq "200" (
     echo The second curl command failed with status code: %response2%
     type %TEMP%\response2.log
     set "error_occurred=1"
@@ -111,4 +107,4 @@ echo Nitro test run successfully!
 
 rem Kill the server process
 @REM taskkill /f /pid %pid%
-taskkill /f /im nitro.exe 2>nul || exit /B 0
+taskkill /f /im nitro.exe 2>nul || exit /B 0
\ No newline at end of file
diff --git a/.github/scripts/e2e-test-whisper-windows.bat b/.github/scripts/e2e-test-whisper-windows.bat
index b4a06bc09..a47b0e004 100644
--- a/.github/scripts/e2e-test-whisper-windows.bat
+++ b/.github/scripts/e2e-test-whisper-windows.bat
@@ -25,10 +25,6 @@ set /a max=11000
 set /a range=max-min+1
 set /a PORT=%min% + %RANDOM% %% %range%
 
-rem Kill any existing Nitro processes
-echo Killing any existing Nitro processes...
-taskkill /f /im nitro.exe 2>nul
-
 rem Start the binary file
 start /B "" "%BINARY_PATH%" 1 "127.0.0.1" %PORT% > %TEMP%\nitro.log 2>&1
 
@@ -49,28 +45,21 @@ if not defined pid (
 
 rem Wait for a few seconds to let the server start
 
-rem Check if %TEMP%\testmodel exists, if not, download it
+rem Check if %TEMP%\testwhisper exists, if not, download it
 if not exist "%MODEL_PATH%" (
     bitsadmin.exe /transfer "DownloadTestModel" %DOWNLOAD_URL% "%MODEL_PATH%"
 )
 
 rem Define JSON strings for curl data
 call set "MODEL_PATH_STRING=%%MODEL_PATH:\=\\%%"
-set "curl_data1={\"model_path\":\"%MODEL_PATH_STRING%\",\"model_id\":\"whisper.cpp\"}"
-
-rem Print the values of curl_data1 for debugging
-echo curl_data1=%curl_data1%
+set "curl_data1={\"model_path\":\"%MODEL_PATH_STRING%\",\"model_id\":\"whisper\"}"
 
 rem Run the curl commands and capture the status code
-curl.exe -o %TEMP%\response1_code.log -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/v1/audio/load_model" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1_code.log 2>&1
+curl.exe -o %TEMP%\response1.log -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/v1/audio/load_model" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1_code.log 2>&1
 
-curl.exe -o %TEMP%\response2_code.log -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/v1/audio/transcriptions" ^
---header "Access-Control-Allow-Origin: *" ^
---form 'model_id="whisper.cpp"' ^
---form 'file=@"..\whisper.cpp\samples\jfk.wav"' ^
---form 'temperature="0.0"' ^
---form 'prompt="The transcript is about OpenAI which makes technology like DALL·E, GPT-3, and ChatGPT with the hope of one day building an AGI system that benefits all of humanity. The president is trying to raly people to support the cause."' ^
-> %TEMP%\response2_code.log 2>&1
+curl -o %TEMP%\response2.log -s -w "%%{http_code}" --location "http://localhost:%PORT%/v1/audio/transcriptions" ^
+--form "file=@../..//whisper.cpp/samples/jfk.wav" ^
+--form "model_id=whisper" > %TEMP%\response2_code.log 2>&1
 
 set "error_occurred=0"
 
@@ -80,13 +69,13 @@ for /f %%a in (%TEMP%\response2_code.log) do set "response2=%%a"
 
 if "%response1%" neq "200" (
     echo The first curl command failed with status code: %response1%
-    type %TEMP%\response1_code.log
+    type %TEMP%\response1.log
     set "error_occurred=1"
 )
 
-if "%response2%" neq "000" (
+if "%response2%" neq "200" (
     echo The second curl command failed with status code: %response2%
-    type %TEMP%\response2_code.log
+    type %TEMP%\response2.log
     set "error_occurred=1"
 )
 
@@ -101,14 +90,13 @@ if "%error_occurred%"=="1" (
 
 echo ----------------------
 echo Log load model:
-type %TEMP%\response1_code.log
+type %TEMP%\response1.log
 
 echo ----------------------
 echo "Log run test:"
-type %TEMP%\response2_code.log
+type %TEMP%\response2.log
 
 echo Nitro test run successfully!
 
 rem Kill the server process
-@REM taskkill /f /pid %pid%
 taskkill /f /im nitro.exe 2>nul || exit /B 0
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 5b46ef66e..faeaa4eb4 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -50,7 +50,7 @@ on:
 env:
   BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
   LLM_MODEL_URL: https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q2_K.gguf
-  WHISPER_MODEL_URL: https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin
+  WHISPER_MODEL_URL: https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny-q5_1.bin
 
 jobs:
   create-draft-release:
@@ -377,11 +377,11 @@ jobs:
         run: |
           # To test with CoreML
           if [[ ! -f "/tmp/testwhisper-encoder.mlmodelc" ]]; then
-            wget https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.en-encoder.mlmodelc.zip
-            unzip ggml-tiny.en-encoder.mlmodelc.zip
-            rm ggml-tiny.en-encoder.mlmodelc.zip
+            wget https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny-encoder.mlmodelc.zip
+            unzip ggml-tiny-encoder.mlmodelc.zip
+            rm ggml-tiny-encoder.mlmodelc.zip
             rm -rf /tmp/testwhisper-encoder.mlmodelc
-            mv ggml-tiny.en-encoder.mlmodelc /tmp/testwhisper-encoder.mlmodelc
+            mv ggml-tiny-encoder.mlmodelc /tmp/testwhisper-encoder.mlmodelc
           fi
           # run e2e testing
           cd nitro
@@ -586,7 +586,7 @@ jobs:
           cmake --build ./build_deps/nitro_deps --config Release
           mkdir -p build
           cd build
-          cmake .. -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=RELEASE -DWHISPER_SDL2=ON -DNITRO_VERSION=${{ needs.set-nitro-version.outputs.version }}
+          cmake .. -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE -DWHISPER_SDL2=ON -DNITRO_VERSION=${{ needs.set-nitro-version.outputs.version }}
           cmake --build . --config Release -j "%NUMBER_OF_PROCESSORS%"
 
       - name: Pack artifacts
@@ -602,15 +602,19 @@ jobs:
           7z a -ttar temp.tar .\build\Release\*
           7z a -tgzip nitro.tar.gz temp.tar
 
-      # - name: Run e2e testing - Llama.cpp
-      #   shell: cmd
-      #   run: |
-      #     .\.github\scripts\e2e-test-llama-windows.bat .\build\Release\nitro.exe ${{ env.LLM_MODEL_URL }}
+      - name: Run e2e testing - Llama.cpp
+        shell: cmd
+        run: |
+          cd build\Release
+          ..\..\.github\scripts\e2e-test-llama-windows.bat nitro.exe ${{ env.LLM_MODEL_URL }}
+          rmdir /S /Q .\build\Release\uploads
 
-      # - name: Run e2e testing - Whisper.cpp
-      #   shell: cmd
-      #   run: |
-      #     .\.github\scripts\e2e-test-whisper-windows.bat .\build\Release\nitro.exe ${{ env.WHISPER_MODEL_URL }}
+      - name: Run e2e testing - Whisper.cpp
+        shell: cmd
+        run: |
+          cd build\Release
+          ..\..\.github\scripts\e2e-test-whisper-windows.bat nitro.exe ${{ env.WHISPER_MODEL_URL }}
+          rmdir /S /Q .\build\Release\uploads
 
       - name: Upload Artifact
         uses: actions/upload-artifact@v2
@@ -679,7 +683,7 @@ jobs:
           cmake --build ./build_deps/nitro_deps --config Release
           mkdir -p build
           cd build
-          cmake .. -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_VULKAN=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=RELEASE -DWHISPER_SDL2=ON -DNITRO_VERSION=${{ needs.set-nitro-version.outputs.version }}
+          cmake .. -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_VULKAN=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE -DWHISPER_SDL2=ON -DNITRO_VERSION=${{ needs.set-nitro-version.outputs.version }}
           cmake --build . --config Release -j "%NUMBER_OF_PROCESSORS%"
 
       - name: Pack artifacts
@@ -770,7 +774,7 @@ jobs:
           cmake --build ./build_deps/nitro_deps --config Release
           mkdir -p build
           cd build
-          cmake .. -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUBLAS=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=RELEASE -DWHISPER_SDL2=ON -DWHISPER_CUBLAS=ON -DNITRO_VERSION=${{ needs.set-nitro-version.outputs.version }}
+          cmake .. -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUBLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE -DWHISPER_SDL2=ON -DWHISPER_CUBLAS=ON -DNITRO_VERSION=${{ needs.set-nitro-version.outputs.version }}
           cmake --build . --config Release -j "%NUMBER_OF_PROCESSORS%"
 
       - name: Pack artifacts
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d452110cd..78443b585 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -59,7 +59,7 @@ add_executable(${PROJECT_NAME} main.cc)
 #
 # and comment out the following lines
 find_package(Drogon CONFIG REQUIRED)
-target_link_libraries(${PROJECT_NAME} PRIVATE Drogon::Drogon common llama llava whisper
+target_link_libraries(${PROJECT_NAME} PRIVATE Drogon::Drogon common llama whisper llava
                                               ${CMAKE_THREAD_LIBS_INIT})
 
 # ##############################################################################
diff --git a/controllers/whisperCPP.cc b/controllers/whisperCPP.cc
index b9b9b8bea..a2039f396 100644
--- a/controllers/whisperCPP.cc
+++ b/controllers/whisperCPP.cc
@@ -872,7 +872,9 @@ void whisperCPP::load_model(
         (*jsonBody)["warm_up_audio_path"].asString();
     // Return 400 error if warm up audio path is not found
     if (!is_file_exist(warm_up_audio_path.c_str())) {
-      std::string error_msg = "Warm up audio " + warm_up_audio_path + " not found, please provide a valid path or don't specify it at all";
+      std::string error_msg =
+          "Warm up audio " + warm_up_audio_path +
+          " not found, please provide a valid path or don't specify it at all";
       LOG_INFO << error_msg;
       Json::Value jsonResp;
       jsonResp["message"] = error_msg;
@@ -881,9 +883,10 @@ void whisperCPP::load_model(
       callback(resp);
       return;
     } else {
-      LOG_INFO << "Warming up model " << model_id << " with audio " << warm_up_audio_path << " ...";
-      std::string warm_up_result = whisper.inference(
-          warm_up_audio_path, "en", "", text_format, 0, false);
+      LOG_INFO << "Warming up model " << model_id << " with audio "
+               << warm_up_audio_path << " ...";
+      std::string warm_up_result = whisper.inference(warm_up_audio_path, "en",
+                                                     "", text_format, 0, false);
       LOG_INFO << "Warm up model " << model_id << " completed";
     }
   } else {