diff --git a/.github/workflows/cortex-cpp-build.yml b/.github/workflows/cortex-cpp-build.yml
index 01227dc89..7be4d2e7f 100644
--- a/.github/workflows/cortex-cpp-build.yml
+++ b/.github/workflows/cortex-cpp-build.yml
@@ -97,25 +97,25 @@ jobs:
 
           - os: "windows"
             name: "amd64-avx2"
-            runs-on: "windows-latest"
+            runs-on: "windows-cuda-12-0"
             cmake-flags: "-DLLAMA_AVX2=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
             run-e2e: true
 
           - os: "windows"
             name: "amd64-avx"
-            runs-on: "windows-latest"
+            runs-on: "windows-cuda-12-0"
             cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
             run-e2e: false
 
           - os: "windows"
             name: "amd64-avx512"
-            runs-on: "windows-latest"
+            runs-on: "windows-cuda-12-0"
             cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
             run-e2e: false
 
           - os: "windows"
             name: "amd64-vulkan"
-            runs-on: "windows-latest"
+            runs-on: "windows-cuda-12-0"
             cmake-flags: "-DLLAMA_VULKAN=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
             run-e2e: false
 
diff --git a/.github/workflows/cortex-cpp-quality-gate.yml b/.github/workflows/cortex-cpp-quality-gate.yml
index e84efa4e7..f5d686845 100644
--- a/.github/workflows/cortex-cpp-quality-gate.yml
+++ b/.github/workflows/cortex-cpp-quality-gate.yml
@@ -80,28 +80,28 @@ jobs:
 
           - os: "windows"
             name: "amd64-avx2"
-            runs-on: "windows-latest"
+            runs-on: "windows-cuda-12-0"
             cmake-flags: "-DLLAMA_AVX2=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
             run-e2e: true
             run-python-e2e: true
 
           - os: "windows"
             name: "amd64-avx"
-            runs-on: "windows-latest"
+            runs-on: "windows-cuda-12-0"
             cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
             run-e2e: false
             run-python-e2e: false
 
           - os: "windows"
             name: "amd64-avx512"
-            runs-on: "windows-latest"
+            runs-on: "windows-cuda-12-0"
             cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
             run-e2e: false
             run-python-e2e: false
 
           - os: "windows"
             name: "amd64-vulkan"
-            runs-on: "windows-latest"
+            runs-on: "windows-cuda-12-0"
             cmake-flags: "-DLLAMA_VULKAN=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
             run-e2e: false
             run-python-e2e: false
diff --git a/cortex-cpp/engines/cortex.llamacpp/engine.cmake b/cortex-cpp/engines/cortex.llamacpp/engine.cmake
index 2fae9a897..2abdfaba9 100644
--- a/cortex-cpp/engines/cortex.llamacpp/engine.cmake
+++ b/cortex-cpp/engines/cortex.llamacpp/engine.cmake
@@ -1,5 +1,5 @@
 # cortex.llamacpp release version
-set(VERSION 0.1.15)
+set(VERSION 0.1.17)
 set(ENGINE_VERSION v${VERSION})
 add_compile_definitions(CORTEX_LLAMACPP_VERSION="${VERSION}")