From d648590ae51f0ba0081adc5146088d53b70a3371 Mon Sep 17 00:00:00 2001
From: vansangpfiev <sang@jan.ai>
Date: Fri, 2 Aug 2024 10:05:36 +0700
Subject: [PATCH 1/3] fix: race condition issue for server example

---
 examples/server/server.cc | 34 +++++++++++++++++++++++-----------
 1 file changed, 23 insertions(+), 11 deletions(-)
diff --git a/examples/server/server.cc b/examples/server/server.cc
index 274fe69..acae5b7 100644
--- a/examples/server/server.cc
+++ b/examples/server/server.cc
@@ -49,6 +49,18 @@ class Server {
   };
 };
 
+struct SyncJsonReader {
+ public:
+  void Parse(const std::string& document, Json::Value& root) {
+    std::lock_guard<std::mutex> l(m);
+    r.parse(document, root);
+  }
+
+ private:
+  Json::Reader r;
+  std::mutex m;
+};
+
 std::function<void(int)> shutdown_handler;
 std::atomic_flag is_terminating = ATOMIC_FLAG_INIT;
 
@@ -78,7 +90,7 @@ int main(int argc, char** argv) {
   }
 
   Server server;
-  Json::Reader r;
+  SyncJsonReader r;
   auto svr = std::make_unique<httplib::Server>();
 
   if (!svr->bind_to_port(hostname, port)) {
@@ -127,7 +139,7 @@ int main(int argc, char** argv) {
     resp.set_header("Access-Control-Allow-Origin",
                     req.get_header_value("Origin"));
     auto req_body = std::make_shared<Json::Value>();
-    r.parse(req.body, *req_body);
+    r.Parse(req.body, *req_body);
     server.engine_->LoadModel(
         req_body, [&server, &resp](Json::Value status, Json::Value res) {
           resp.set_content(res.toStyledString().c_str(),
@@ -141,7 +153,7 @@ int main(int argc, char** argv) {
     resp.set_header("Access-Control-Allow-Origin",
                     req.get_header_value("Origin"));
     auto req_body = std::make_shared<Json::Value>();
-    r.parse(req.body, *req_body);
+    r.Parse(req.body, *req_body);
     server.engine_->UnloadModel(
         req_body, [&server, &resp](Json::Value status, Json::Value res) {
           resp.set_content(res.toStyledString().c_str(),
@@ -155,7 +167,7 @@ int main(int argc, char** argv) {
     resp.set_header("Access-Control-Allow-Origin",
                     req.get_header_value("Origin"));
     auto req_body = std::make_shared<Json::Value>();
-    r.parse(req.body, *req_body);
+    r.Parse(req.body, *req_body);
     bool is_stream = (*req_body).get("stream", false).asBool();
     // This is an async call, need to use queue
     auto q = std::make_shared<SyncQueue>();
@@ -175,7 +187,7 @@ int main(int argc, char** argv) {
     resp.set_header("Access-Control-Allow-Origin",
                     req.get_header_value("Origin"));
     auto req_body = std::make_shared<Json::Value>();
-    r.parse(req.body, *req_body);
+    r.Parse(req.body, *req_body);
     // This is an async call, need to use queue
     SyncQueue q;
     server.engine_->HandleEmbedding(
@@ -190,7 +202,7 @@ int main(int argc, char** argv) {
     resp.set_header("Access-Control-Allow-Origin",
                     req.get_header_value("Origin"));
     auto req_body = std::make_shared<Json::Value>();
-    r.parse(req.body, *req_body);
+    r.Parse(req.body, *req_body);
     server.engine_->GetModelStatus(
         req_body, [&server, &resp](Json::Value status, Json::Value res) {
           resp.set_content(res.toStyledString().c_str(),
@@ -204,7 +216,7 @@ int main(int argc, char** argv) {
     resp.set_header("Access-Control-Allow-Origin",
                     req.get_header_value("Origin"));
     auto req_body = std::make_shared<Json::Value>();
-    r.parse(req.body, *req_body);
+    r.Parse(req.body, *req_body);
     server.engine_->GetModels(
         req_body, [&server, &resp](Json::Value status, Json::Value res) {
           resp.set_content(res.toStyledString().c_str(),
@@ -222,10 +234,10 @@ int main(int argc, char** argv) {
   svr->Get("/models", handle_get_running_models);
   std::atomic<bool> running = true;
   svr->Delete("/destroy",
-            [&](const httplib::Request& req, httplib::Response& resp) {
-              LOG_INFO << "Received Stop command";
-              running = false;
-            });
+              [&](const httplib::Request& req, httplib::Response& resp) {
+                LOG_INFO << "Received Stop command";
+                running = false;
+              });
 
   LOG_INFO << "HTTP server listening: " << hostname << ":" << port;
   svr->new_task_queue = [] {

From 1517d1538f20f0ea7d2a38a679544f00b7d33c5b Mon Sep 17 00:00:00 2001
From: nguyenhoangthuan99 <alex@jan.ai>
Date: Fri, 2 Aug 2024 05:17:23 +0000
Subject: [PATCH 2/3] test cuda build noavx

---
 .../workflows/template-quality-gate-pr.yml    | 334 +++++++++---------
 1 file changed, 171 insertions(+), 163 deletions(-)

diff --git a/.github/workflows/template-quality-gate-pr.yml b/.github/workflows/template-quality-gate-pr.yml
index 7cd406c..1afd67f 100644
--- a/.github/workflows/template-quality-gate-pr.yml
+++ b/.github/workflows/template-quality-gate-pr.yml
@@ -28,66 +28,74 @@ jobs:
       fail-fast: false
       matrix:
         include:
+          # - os: "linux"
+          #   name: "amd64-avx2"
+          #   runs-on: "ubuntu-20-04"
+          #   cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_BUILD_TYPE='Release' -GNinja"
+          #   run-e2e: true
+          #   vulkan: false
+          #   sccache: true
+          #   sccache-conf-path: "/tmp/sccache.conf"
+          # - os: "linux"
+          #   name: "amd64-noavx"
+          #   runs-on: "ubuntu-20-04"
+          #   cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_BUILD_TYPE='Release' -GNinja"
+          #   run-e2e: false
+          #   vulkan: false
+          #   sccache: true
+          #   sccache-conf-path: "/tmp/sccache.conf"
+          # - os: "linux"
+          #   name: "amd64-avx"
+          #   runs-on: "ubuntu-20-04"
+          #   cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX2=OFF -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_BUILD_TYPE='Release' -GNinja"
+          #   run-e2e: false
+          #   vulkan: false
+          #   sccache: true
+          #   sccache-conf-path: "/tmp/sccache.conf"
+          # - os: "linux"
+          #   name: "amd64-avx512"
+          #   runs-on: "ubuntu-20-04"
+          #   cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX512=ON -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_BUILD_TYPE='Release' -GNinja"
+          #   run-e2e: false
+          #   vulkan: false
+          #   sccache: true
+          #   sccache-conf-path: "/tmp/sccache.conf"
+          # - os: "linux"
+          #   name: "amd64-vulkan"
+          #   runs-on: "ubuntu-22-04"
+          #   cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_VULKAN=ON -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_BUILD_TYPE='Release' -GNinja"
+          #   run-e2e: false
+          #   vulkan: true
+          #   sccache: true
+          #   sccache-conf-path: "/tmp/sccache.conf"
+          # - os: "linux"
+          #   name: "amd64-avx2-cuda-11-7"
+          #   runs-on: "ubuntu-20-04-cuda-11-7"
+          #   cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_BUILD_TYPE='Release' -GNinja"
+          #   run-e2e: false
+          #   vulkan: false
+          #   sccache: true
+          #   sccache-conf-path: "/tmp/sccache.conf"
+          # - os: "linux"
+          #   name: "amd64-avx-cuda-11-7"
+          #   runs-on: "ubuntu-20-04-cuda-11-7"
+          #   cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX2=OFF -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_BUILD_TYPE='Release' -GNinja"
+          #   run-e2e: false
+          #   vulkan: false
+          #   sccache: true
+          #   sccache-conf-path: "/tmp/sccache.conf"
+          # - os: "linux"
+          #   name: "amd64-avx512-cuda-11-7"
+          #   runs-on: "ubuntu-20-04-cuda-11-7"
+          #   cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX512=ON -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_BUILD_TYPE='Release' -GNinja"
+          #   run-e2e: false
+          #   vulkan: false
+          #   sccache: true
+          #   sccache-conf-path: "/tmp/sccache.conf"
           - os: "linux"
-            name: "amd64-avx2"
-            runs-on: "ubuntu-20-04"
-            cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_BUILD_TYPE='Release' -GNinja"
-            run-e2e: true
-            vulkan: false
-            sccache: true
-            sccache-conf-path: "/tmp/sccache.conf"
-          - os: "linux"
-            name: "amd64-noavx"
-            runs-on: "ubuntu-20-04"
-            cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_BUILD_TYPE='Release' -GNinja"
-            run-e2e: false
-            vulkan: false
-            sccache: true
-            sccache-conf-path: "/tmp/sccache.conf"
-          - os: "linux"
-            name: "amd64-avx"
-            runs-on: "ubuntu-20-04"
-            cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX2=OFF -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_BUILD_TYPE='Release' -GNinja"
-            run-e2e: false
-            vulkan: false
-            sccache: true
-            sccache-conf-path: "/tmp/sccache.conf"
-          - os: "linux"
-            name: "amd64-avx512"
-            runs-on: "ubuntu-20-04"
-            cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX512=ON -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_BUILD_TYPE='Release' -GNinja"
-            run-e2e: false
-            vulkan: false
-            sccache: true
-            sccache-conf-path: "/tmp/sccache.conf"
-          - os: "linux"
-            name: "amd64-vulkan"
-            runs-on: "ubuntu-22-04"
-            cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_VULKAN=ON -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_BUILD_TYPE='Release' -GNinja"
-            run-e2e: false
-            vulkan: true
-            sccache: true
-            sccache-conf-path: "/tmp/sccache.conf"
-          - os: "linux"
-            name: "amd64-avx2-cuda-11-7"
-            runs-on: "ubuntu-20-04-cuda-11-7"
-            cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_BUILD_TYPE='Release' -GNinja"
-            run-e2e: false
-            vulkan: false
-            sccache: true
-            sccache-conf-path: "/tmp/sccache.conf"
-          - os: "linux"
-            name: "amd64-avx-cuda-11-7"
-            runs-on: "ubuntu-20-04-cuda-11-7"
-            cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX2=OFF -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_BUILD_TYPE='Release' -GNinja"
-            run-e2e: false
-            vulkan: false
-            sccache: true
-            sccache-conf-path: "/tmp/sccache.conf"
-          - os: "linux"
-            name: "amd64-avx512-cuda-11-7"
-            runs-on: "ubuntu-20-04-cuda-11-7"
-            cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX512=ON -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_BUILD_TYPE='Release' -GNinja"
+            name: "amd64-noavx-cuda-12-0"
+            runs-on: "ubuntu-20-04-cuda-12-0"
+            cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX=OFF -DGGML_FMA=OFF -DGGML_AVX2=OFF -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_BUILD_TYPE='Release' -GNinja"
             run-e2e: false
             vulkan: false
             sccache: true
@@ -116,110 +124,110 @@ jobs:
             vulkan: false
             sccache: true
             sccache-conf-path: "/tmp/sccache.conf"
-          - os: "mac"
-            name: "amd64"
-            runs-on: "macos-13"
-            cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DBUILD_SHARED_LIBS=OFF -DGGML_METAL=OFF"
-            run-e2e: true
-            vulkan: false
-            sccache: false
-            sccache-conf-path: ""
-          - os: "mac"
-            name: "arm64"
-            runs-on: "macos-silicon"
-            cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DBUILD_SHARED_LIBS=OFF -DGGML_METAL_EMBED_LIBRARY=ON"
-            run-e2e: true
-            vulkan: false
-            sccache: false
-            sccache-conf-path: ""
-          - os: "windows"
-            name: "amd64-avx2"
-            runs-on: "windows-2019"
-            cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER=cl -DCMAKE_C_COMPILER=cl -GNinja"
-            run-e2e: true
-            vulkan: false
-            sccache: false
-            sccache-conf-path: ""
-          - os: "windows"
-            name: "amd64-noavx"
-            runs-on: "windows-2019"
-            cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DBUILD_SHARED_LIBS=OFF -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_NATIVE=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER=cl -DCMAKE_C_COMPILER=cl -GNinja"
-            run-e2e: false
-            vulkan: false
-            sccache: false
-            sccache-conf-path: ""
-          - os: "windows"
-            name: "amd64-avx"
-            runs-on: "windows-2019"
-            cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_AVX2=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER=cl -DCMAKE_C_COMPILER=cl -GNinja"
-            run-e2e: true
-            vulkan: false
-            sccache: false
-            sccache-conf-path: ""
-          - os: "windows"
-            name: "amd64-avx512"
-            runs-on: "windows-2019"
-            cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_AVX512=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER=cl -DCMAKE_C_COMPILER=cl -GNinja"
-            run-e2e: false
-            vulkan: false
-            sccache: false
-            sccache-conf-path: ""
-          - os: "windows"
-            name: "amd64-vulkan"
-            runs-on: "windows-2019"
-            cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_VULKAN=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER=cl -DCMAKE_C_COMPILER=cl -GNinja"
-            run-e2e: false
-            vulkan: true
-            sccache: false
-            sccache-conf-path: ""
-          - os: "windows"
-            name: "amd64-avx2-cuda-12-0"
-            runs-on: "windows-cuda-12-0"
-            cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CUDA_COMPILER_LAUNCHER=sccache -GNinja"
-            run-e2e: false
-            vulkan: false
-            sccache: true
-            sccache-conf-path: 'C:\sccache.conf'
-          - os: "windows"
-            name: "amd64-avx-cuda-12-0"
-            runs-on: "windows-cuda-12-0"
-            cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX2=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CUDA_COMPILER_LAUNCHER=sccache -GNinja"
-            run-e2e: false
-            vulkan: false
-            sccache: true
-            sccache-conf-path: 'C:\sccache.conf'
-          - os: "windows"
-            name: "amd64-avx512-cuda-12-0"
-            runs-on: "windows-cuda-12-0"
-            cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX512=ON -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CUDA_COMPILER_LAUNCHER=sccache -GNinja"
-            run-e2e: false
-            vulkan: false
-            sccache: true
-            sccache-conf-path: 'C:\sccache.conf'
-          - os: "windows"
-            name: "amd64-avx2-cuda-11-7"
-            runs-on: "windows-cuda-11-7"
-            cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CUDA_COMPILER_LAUNCHER=sccache -GNinja"
-            run-e2e: false
-            vulkan: false
-            sccache: true
-            sccache-conf-path: 'C:\sccache.conf'
-          - os: "windows"
-            name: "amd64-avx-cuda-11-7"
-            runs-on: "windows-cuda-11-7"
-            cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX2=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CUDA_COMPILER_LAUNCHER=sccache -GNinja"
-            run-e2e: false
-            vulkan: false
-            sccache: true
-            sccache-conf-path: 'C:\sccache.conf'
-          - os: "windows"
-            name: "amd64-avx512-cuda-11-7"
-            runs-on: "windows-cuda-11-7"
-            cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX512=ON -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CUDA_COMPILER_LAUNCHER=sccache -GNinja"
-            run-e2e: false
-            vulkan: false
-            sccache: true
-            sccache-conf-path: 'C:\sccache.conf'
+          # - os: "mac"
+          #   name: "amd64"
+          #   runs-on: "macos-13"
+          #   cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DBUILD_SHARED_LIBS=OFF -DGGML_METAL=OFF"
+          #   run-e2e: true
+          #   vulkan: false
+          #   sccache: false
+          #   sccache-conf-path: ""
+          # - os: "mac"
+          #   name: "arm64"
+          #   runs-on: "macos-silicon"
+          #   cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DBUILD_SHARED_LIBS=OFF -DGGML_METAL_EMBED_LIBRARY=ON"
+          #   run-e2e: true
+          #   vulkan: false
+          #   sccache: false
+          #   sccache-conf-path: ""
+          # - os: "windows"
+          #   name: "amd64-avx2"
+          #   runs-on: "windows-2019"
+          #   cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER=cl -DCMAKE_C_COMPILER=cl -GNinja"
+          #   run-e2e: true
+          #   vulkan: false
+          #   sccache: false
+          #   sccache-conf-path: ""
+          # - os: "windows"
+          #   name: "amd64-noavx"
+          #   runs-on: "windows-2019"
+          #   cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DBUILD_SHARED_LIBS=OFF -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_NATIVE=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER=cl -DCMAKE_C_COMPILER=cl -GNinja"
+          #   run-e2e: false
+          #   vulkan: false
+          #   sccache: false
+          #   sccache-conf-path: ""
+          # - os: "windows"
+          #   name: "amd64-avx"
+          #   runs-on: "windows-2019"
+          #   cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_AVX2=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER=cl -DCMAKE_C_COMPILER=cl -GNinja"
+          #   run-e2e: true
+          #   vulkan: false
+          #   sccache: false
+          #   sccache-conf-path: ""
+          # - os: "windows"
+          #   name: "amd64-avx512"
+          #   runs-on: "windows-2019"
+          #   cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_AVX512=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER=cl -DCMAKE_C_COMPILER=cl -GNinja"
+          #   run-e2e: false
+          #   vulkan: false
+          #   sccache: false
+          #   sccache-conf-path: ""
+          # - os: "windows"
+          #   name: "amd64-vulkan"
+          #   runs-on: "windows-2019"
+          #   cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_VULKAN=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER=cl -DCMAKE_C_COMPILER=cl -GNinja"
+          #   run-e2e: false
+          #   vulkan: true
+          #   sccache: false
+          #   sccache-conf-path: ""
+          # - os: "windows"
+          #   name: "amd64-avx2-cuda-12-0"
+          #   runs-on: "windows-cuda-12-0"
+          #   cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CUDA_COMPILER_LAUNCHER=sccache -GNinja"
+          #   run-e2e: false
+          #   vulkan: false
+          #   sccache: true
+          #   sccache-conf-path: 'C:\sccache.conf'
+          # - os: "windows"
+          #   name: "amd64-avx-cuda-12-0"
+          #   runs-on: "windows-cuda-12-0"
+          #   cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX2=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CUDA_COMPILER_LAUNCHER=sccache -GNinja"
+          #   run-e2e: false
+          #   vulkan: false
+          #   sccache: true
+          #   sccache-conf-path: 'C:\sccache.conf'
+          # - os: "windows"
+          #   name: "amd64-avx512-cuda-12-0"
+          #   runs-on: "windows-cuda-12-0"
+          #   cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX512=ON -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CUDA_COMPILER_LAUNCHER=sccache -GNinja"
+          #   run-e2e: false
+          #   vulkan: false
+          #   sccache: true
+          #   sccache-conf-path: 'C:\sccache.conf'
+          # - os: "windows"
+          #   name: "amd64-avx2-cuda-11-7"
+          #   runs-on: "windows-cuda-11-7"
+          #   cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CUDA_COMPILER_LAUNCHER=sccache -GNinja"
+          #   run-e2e: false
+          #   vulkan: false
+          #   sccache: true
+          #   sccache-conf-path: 'C:\sccache.conf'
+          # - os: "windows"
+          #   name: "amd64-avx-cuda-11-7"
+          #   runs-on: "windows-cuda-11-7"
+          #   cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX2=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CUDA_COMPILER_LAUNCHER=sccache -GNinja"
+          #   run-e2e: false
+          #   vulkan: false
+          #   sccache: true
+          #   sccache-conf-path: 'C:\sccache.conf'
+          # - os: "windows"
+          #   name: "amd64-avx512-cuda-11-7"
+          #   runs-on: "windows-cuda-11-7"
+          #   cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX512=ON -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CUDA_COMPILER_LAUNCHER=sccache -GNinja"
+          #   run-e2e: false
+          #   vulkan: false
+          #   sccache: true
+          #   sccache-conf-path: 'C:\sccache.conf'
 
     steps:
       - name: Clone

From 61416e6cbb186ff1e323024381c53a7cd925e620 Mon Sep 17 00:00:00 2001
From: nguyenhoangthuan99 <alex@jan.ai>
Date: Fri, 2 Aug 2024 11:15:24 +0000
Subject: [PATCH 3/3] add build cuda without avx

---
 .github/workflows/build.yml                   |  32 ++
 .github/workflows/nightly-build.yml           |  32 ++
 .../workflows/template-quality-gate-pr.yml    | 360 ++++++++++--------
 .../template-quality-gate-submodule.yml       |  32 ++
 4 files changed, 288 insertions(+), 168 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 6ccb8d5..677ac2e 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -101,6 +101,14 @@ jobs:
             vulkan: true
             sccache: true
             sccache-conf-path: "/tmp/sccache.conf"
+          - os: "linux"
+            name: "amd64-noavx-cuda-11-7"
+            runs-on: "ubuntu-20-04-cuda-11-7"
+            cmake-flags: "-DCORTEXLLAMA_VERSION=${{needs.create-draft-release.outputs.version}} -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DBUILD_SHARED_LIBS=OFF -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_BUILD_TYPE='Release' -GNinja"
+            run-e2e: false
+            vulkan: false
+            sccache: true
+            sccache-conf-path: "/tmp/sccache.conf"
           - os: "linux"
             name: "amd64-avx2-cuda-11-7"
             runs-on: "ubuntu-20-04-cuda-11-7"
@@ -125,6 +133,14 @@ jobs:
             vulkan: false
             sccache: true
             sccache-conf-path: "/tmp/sccache.conf"
+          - os: "linux"
+            name: "amd64-noavx-cuda-12-0"
+            runs-on: "ubuntu-20-04-cuda-12-0"
+            cmake-flags: "-DCORTEXLLAMA_VERSION=${{needs.create-draft-release.outputs.version}} -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DBUILD_SHARED_LIBS=OFF -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_BUILD_TYPE='Release' -GNinja"
+            run-e2e: false
+            vulkan: false
+            sccache: true
+            sccache-conf-path: "/tmp/sccache.conf"
           - os: "linux"
             name: "amd64-avx2-cuda-12-0"
             runs-on: "ubuntu-20-04-cuda-12-0"
@@ -205,6 +221,14 @@ jobs:
             vulkan: true
             sccache: false
             sccache-conf-path: ""
+          - os: "windows"
+            name: "amd64-noavx-cuda-12-0"
+            runs-on: "windows-cuda-12-0"
+            cmake-flags: "-DCORTEXLLAMA_VERSION=${{needs.create-draft-release.outputs.version}} -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CUDA_COMPILER_LAUNCHER=sccache -GNinja"
+            run-e2e: false
+            vulkan: false
+            sccache: true
+            sccache-conf-path: 'C:\sccache.conf'
           - os: "windows"
             name: "amd64-avx2-cuda-12-0"
             runs-on: "windows-cuda-12-0"
@@ -229,6 +253,14 @@ jobs:
             vulkan: false
             sccache: true
             sccache-conf-path: 'C:\sccache.conf'
+          - os: "windows"
+            name: "amd64-noavx-cuda-11-7"
+            runs-on: "windows-cuda-11-7"
+            cmake-flags: "-DCORTEXLLAMA_VERSION=${{needs.create-draft-release.outputs.version}} -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CUDA_COMPILER_LAUNCHER=sccache -GNinja"
+            run-e2e: false
+            vulkan: false
+            sccache: true
+            sccache-conf-path: 'C:\sccache.conf'
           - os: "windows"
             name: "amd64-avx2-cuda-11-7"
             runs-on: "windows-cuda-11-7"
diff --git a/.github/workflows/nightly-build.yml b/.github/workflows/nightly-build.yml
index da8f0ab..b47f5e0 100644
--- a/.github/workflows/nightly-build.yml
+++ b/.github/workflows/nightly-build.yml
@@ -101,6 +101,14 @@ jobs:
             vulkan: true
             sccache: true
             sccache-conf-path: "/tmp/sccache.conf"
+          - os: "linux"
+            name: "amd64-noavx-cuda-11-7"
+            runs-on: "ubuntu-20-04-cuda-11-7"
+            cmake-flags: "-DCORTEXLLAMA_VERSION=${{needs.create-draft-release.outputs.version}} -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DBUILD_SHARED_LIBS=OFF -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_BUILD_TYPE='Release' -GNinja"
+            run-e2e: false
+            vulkan: false
+            sccache: true
+            sccache-conf-path: "/tmp/sccache.conf"
           - os: "linux"
             name: "amd64-avx2-cuda-11-7"
             runs-on: "ubuntu-20-04-cuda-11-7"
@@ -125,6 +133,14 @@ jobs:
             vulkan: false
             sccache: true
             sccache-conf-path: "/tmp/sccache.conf"
+          - os: "linux"
+            name: "amd64-noavx-cuda-12-0"
+            runs-on: "ubuntu-20-04-cuda-12-0"
+            cmake-flags: "-DCORTEXLLAMA_VERSION=${{needs.create-draft-release.outputs.version}} -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DBUILD_SHARED_LIBS=OFF -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_BUILD_TYPE='Release' -GNinja"
+            run-e2e: false
+            vulkan: false
+            sccache: true
+            sccache-conf-path: "/tmp/sccache.conf"
           - os: "linux"
             name: "amd64-avx2-cuda-12-0"
             runs-on: "ubuntu-20-04-cuda-12-0"
@@ -205,6 +221,14 @@ jobs:
             vulkan: true
             sccache: false
             sccache-conf-path: ""
+          - os: "windows"
+            name: "amd64-noavx-cuda-12-0"
+            runs-on: "windows-cuda-12-0"
+            cmake-flags: "-DCORTEXLLAMA_VERSION=${{needs.create-draft-release.outputs.version}} -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CUDA_COMPILER_LAUNCHER=sccache -GNinja"
+            run-e2e: false
+            vulkan: false
+            sccache: true
+            sccache-conf-path: 'C:\sccache.conf'
           - os: "windows"
             name: "amd64-avx2-cuda-12-0"
             runs-on: "windows-cuda-12-0"
@@ -229,6 +253,14 @@ jobs:
             vulkan: false
             sccache: true
             sccache-conf-path: 'C:\sccache.conf'
+          - os: "windows"
+            name: "amd64-noavx-cuda-11-7"
+            runs-on: "windows-cuda-11-7"
+            cmake-flags: "-DCORTEXLLAMA_VERSION=${{needs.create-draft-release.outputs.version}} -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CUDA_COMPILER_LAUNCHER=sccache -GNinja"
+            run-e2e: false
+            vulkan: false
+            sccache: true
+            sccache-conf-path: 'C:\sccache.conf'
           - os: "windows"
             name: "amd64-avx2-cuda-11-7"
             runs-on: "windows-cuda-11-7"
diff --git a/.github/workflows/template-quality-gate-pr.yml b/.github/workflows/template-quality-gate-pr.yml
index 1afd67f..51cc03e 100644
--- a/.github/workflows/template-quality-gate-pr.yml
+++ b/.github/workflows/template-quality-gate-pr.yml
@@ -28,70 +28,78 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          # - os: "linux"
-          #   name: "amd64-avx2"
-          #   runs-on: "ubuntu-20-04"
-          #   cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_BUILD_TYPE='Release' -GNinja"
-          #   run-e2e: true
-          #   vulkan: false
-          #   sccache: true
-          #   sccache-conf-path: "/tmp/sccache.conf"
-          # - os: "linux"
-          #   name: "amd64-noavx"
-          #   runs-on: "ubuntu-20-04"
-          #   cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_BUILD_TYPE='Release' -GNinja"
-          #   run-e2e: false
-          #   vulkan: false
-          #   sccache: true
-          #   sccache-conf-path: "/tmp/sccache.conf"
-          # - os: "linux"
-          #   name: "amd64-avx"
-          #   runs-on: "ubuntu-20-04"
-          #   cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX2=OFF -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_BUILD_TYPE='Release' -GNinja"
-          #   run-e2e: false
-          #   vulkan: false
-          #   sccache: true
-          #   sccache-conf-path: "/tmp/sccache.conf"
-          # - os: "linux"
-          #   name: "amd64-avx512"
-          #   runs-on: "ubuntu-20-04"
-          #   cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX512=ON -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_BUILD_TYPE='Release' -GNinja"
-          #   run-e2e: false
-          #   vulkan: false
-          #   sccache: true
-          #   sccache-conf-path: "/tmp/sccache.conf"
-          # - os: "linux"
-          #   name: "amd64-vulkan"
-          #   runs-on: "ubuntu-22-04"
-          #   cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_VULKAN=ON -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_BUILD_TYPE='Release' -GNinja"
-          #   run-e2e: false
-          #   vulkan: true
-          #   sccache: true
-          #   sccache-conf-path: "/tmp/sccache.conf"
-          # - os: "linux"
-          #   name: "amd64-avx2-cuda-11-7"
-          #   runs-on: "ubuntu-20-04-cuda-11-7"
-          #   cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_BUILD_TYPE='Release' -GNinja"
-          #   run-e2e: false
-          #   vulkan: false
-          #   sccache: true
-          #   sccache-conf-path: "/tmp/sccache.conf"
-          # - os: "linux"
-          #   name: "amd64-avx-cuda-11-7"
-          #   runs-on: "ubuntu-20-04-cuda-11-7"
-          #   cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX2=OFF -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_BUILD_TYPE='Release' -GNinja"
-          #   run-e2e: false
-          #   vulkan: false
-          #   sccache: true
-          #   sccache-conf-path: "/tmp/sccache.conf"
-          # - os: "linux"
-          #   name: "amd64-avx512-cuda-11-7"
-          #   runs-on: "ubuntu-20-04-cuda-11-7"
-          #   cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX512=ON -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_BUILD_TYPE='Release' -GNinja"
-          #   run-e2e: false
-          #   vulkan: false
-          #   sccache: true
-          #   sccache-conf-path: "/tmp/sccache.conf"
+          - os: "linux"
+            name: "amd64-avx2"
+            runs-on: "ubuntu-20-04"
+            cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_BUILD_TYPE='Release' -GNinja"
+            run-e2e: true
+            vulkan: false
+            sccache: true
+            sccache-conf-path: "/tmp/sccache.conf"
+          - os: "linux"
+            name: "amd64-noavx"
+            runs-on: "ubuntu-20-04"
+            cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_BUILD_TYPE='Release' -GNinja"
+            run-e2e: false
+            vulkan: false
+            sccache: true
+            sccache-conf-path: "/tmp/sccache.conf"
+          - os: "linux"
+            name: "amd64-avx"
+            runs-on: "ubuntu-20-04"
+            cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX2=OFF -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_BUILD_TYPE='Release' -GNinja"
+            run-e2e: false
+            vulkan: false
+            sccache: true
+            sccache-conf-path: "/tmp/sccache.conf"
+          - os: "linux"
+            name: "amd64-avx512"
+            runs-on: "ubuntu-20-04"
+            cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX512=ON -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_BUILD_TYPE='Release' -GNinja"
+            run-e2e: false
+            vulkan: false
+            sccache: true
+            sccache-conf-path: "/tmp/sccache.conf"
+          - os: "linux"
+            name: "amd64-vulkan"
+            runs-on: "ubuntu-22-04"
+            cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_VULKAN=ON -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_BUILD_TYPE='Release' -GNinja"
+            run-e2e: false
+            vulkan: true
+            sccache: true
+            sccache-conf-path: "/tmp/sccache.conf"
+          - os: "linux"
+            name: "amd64-noavx-cuda-11-7"
+            runs-on: "ubuntu-20-04-cuda-11-7"
+            cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_BUILD_TYPE='Release' -GNinja"
+            run-e2e: false
+            vulkan: false
+            sccache: true
+            sccache-conf-path: "/tmp/sccache.conf"
+          - os: "linux"
+            name: "amd64-avx2-cuda-11-7"
+            runs-on: "ubuntu-20-04-cuda-11-7"
+            cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_BUILD_TYPE='Release' -GNinja"
+            run-e2e: false
+            vulkan: false
+            sccache: true
+            sccache-conf-path: "/tmp/sccache.conf"
+          - os: "linux"
+            name: "amd64-avx-cuda-11-7"
+            runs-on: "ubuntu-20-04-cuda-11-7"
+            cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX2=OFF -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_BUILD_TYPE='Release' -GNinja"
+            run-e2e: false
+            vulkan: false
+            sccache: true
+            sccache-conf-path: "/tmp/sccache.conf"
+          - os: "linux"
+            name: "amd64-avx512-cuda-11-7"
+            runs-on: "ubuntu-20-04-cuda-11-7"
+            cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX512=ON -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_BUILD_TYPE='Release' -GNinja"
+            run-e2e: false
+            vulkan: false
+            sccache: true
+            sccache-conf-path: "/tmp/sccache.conf"
           - os: "linux"
             name: "amd64-noavx-cuda-12-0"
             runs-on: "ubuntu-20-04-cuda-12-0"
@@ -124,110 +132,126 @@ jobs:
             vulkan: false
             sccache: true
             sccache-conf-path: "/tmp/sccache.conf"
-          # - os: "mac"
-          #   name: "amd64"
-          #   runs-on: "macos-13"
-          #   cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DBUILD_SHARED_LIBS=OFF -DGGML_METAL=OFF"
-          #   run-e2e: true
-          #   vulkan: false
-          #   sccache: false
-          #   sccache-conf-path: ""
-          # - os: "mac"
-          #   name: "arm64"
-          #   runs-on: "macos-silicon"
-          #   cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DBUILD_SHARED_LIBS=OFF -DGGML_METAL_EMBED_LIBRARY=ON"
-          #   run-e2e: true
-          #   vulkan: false
-          #   sccache: false
-          #   sccache-conf-path: ""
-          # - os: "windows"
-          #   name: "amd64-avx2"
-          #   runs-on: "windows-2019"
-          #   cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER=cl -DCMAKE_C_COMPILER=cl -GNinja"
-          #   run-e2e: true
-          #   vulkan: false
-          #   sccache: false
-          #   sccache-conf-path: ""
-          # - os: "windows"
-          #   name: "amd64-noavx"
-          #   runs-on: "windows-2019"
-          #   cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DBUILD_SHARED_LIBS=OFF -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_NATIVE=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER=cl -DCMAKE_C_COMPILER=cl -GNinja"
-          #   run-e2e: false
-          #   vulkan: false
-          #   sccache: false
-          #   sccache-conf-path: ""
-          # - os: "windows"
-          #   name: "amd64-avx"
-          #   runs-on: "windows-2019"
-          #   cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_AVX2=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER=cl -DCMAKE_C_COMPILER=cl -GNinja"
-          #   run-e2e: true
-          #   vulkan: false
-          #   sccache: false
-          #   sccache-conf-path: ""
-          # - os: "windows"
-          #   name: "amd64-avx512"
-          #   runs-on: "windows-2019"
-          #   cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_AVX512=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER=cl -DCMAKE_C_COMPILER=cl -GNinja"
-          #   run-e2e: false
-          #   vulkan: false
-          #   sccache: false
-          #   sccache-conf-path: ""
-          # - os: "windows"
-          #   name: "amd64-vulkan"
-          #   runs-on: "windows-2019"
-          #   cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_VULKAN=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER=cl -DCMAKE_C_COMPILER=cl -GNinja"
-          #   run-e2e: false
-          #   vulkan: true
-          #   sccache: false
-          #   sccache-conf-path: ""
-          # - os: "windows"
-          #   name: "amd64-avx2-cuda-12-0"
-          #   runs-on: "windows-cuda-12-0"
-          #   cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CUDA_COMPILER_LAUNCHER=sccache -GNinja"
-          #   run-e2e: false
-          #   vulkan: false
-          #   sccache: true
-          #   sccache-conf-path: 'C:\sccache.conf'
-          # - os: "windows"
-          #   name: "amd64-avx-cuda-12-0"
-          #   runs-on: "windows-cuda-12-0"
-          #   cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX2=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CUDA_COMPILER_LAUNCHER=sccache -GNinja"
-          #   run-e2e: false
-          #   vulkan: false
-          #   sccache: true
-          #   sccache-conf-path: 'C:\sccache.conf'
-          # - os: "windows"
-          #   name: "amd64-avx512-cuda-12-0"
-          #   runs-on: "windows-cuda-12-0"
-          #   cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX512=ON -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CUDA_COMPILER_LAUNCHER=sccache -GNinja"
-          #   run-e2e: false
-          #   vulkan: false
-          #   sccache: true
-          #   sccache-conf-path: 'C:\sccache.conf'
-          # - os: "windows"
-          #   name: "amd64-avx2-cuda-11-7"
-          #   runs-on: "windows-cuda-11-7"
-          #   cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CUDA_COMPILER_LAUNCHER=sccache -GNinja"
-          #   run-e2e: false
-          #   vulkan: false
-          #   sccache: true
-          #   sccache-conf-path: 'C:\sccache.conf'
-          # - os: "windows"
-          #   name: "amd64-avx-cuda-11-7"
-          #   runs-on: "windows-cuda-11-7"
-          #   cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX2=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CUDA_COMPILER_LAUNCHER=sccache -GNinja"
-          #   run-e2e: false
-          #   vulkan: false
-          #   sccache: true
-          #   sccache-conf-path: 'C:\sccache.conf'
-          # - os: "windows"
-          #   name: "amd64-avx512-cuda-11-7"
-          #   runs-on: "windows-cuda-11-7"
-          #   cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX512=ON -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CUDA_COMPILER_LAUNCHER=sccache -GNinja"
-          #   run-e2e: false
-          #   vulkan: false
-          #   sccache: true
-          #   sccache-conf-path: 'C:\sccache.conf'
+          - os: "mac"
+            name: "amd64"
+            runs-on: "macos-13"
+            cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DBUILD_SHARED_LIBS=OFF -DGGML_METAL=OFF"
+            run-e2e: true
+            vulkan: false
+            sccache: false
+            sccache-conf-path: ""
+          - os: "mac"
+            name: "arm64"
+            runs-on: "macos-silicon"
+            cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DBUILD_SHARED_LIBS=OFF -DGGML_METAL_EMBED_LIBRARY=ON"
+            run-e2e: true
+            vulkan: false
+            sccache: false
+            sccache-conf-path: ""
+          - os: "windows"
+            name: "amd64-avx2"
+            runs-on: "windows-2019"
+            cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER=cl -DCMAKE_C_COMPILER=cl -GNinja"
+            run-e2e: true
+            vulkan: false
+            sccache: false
+            sccache-conf-path: ""
+          - os: "windows"
+            name: "amd64-noavx"
+            runs-on: "windows-2019"
+            cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DBUILD_SHARED_LIBS=OFF -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_NATIVE=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER=cl -DCMAKE_C_COMPILER=cl -GNinja"
+            run-e2e: false
+            vulkan: false
+            sccache: false
+            sccache-conf-path: ""
+          - os: "windows"
+            name: "amd64-avx"
+            runs-on: "windows-2019"
+            cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_AVX2=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER=cl -DCMAKE_C_COMPILER=cl -GNinja"
+            run-e2e: true
+            vulkan: false
+            sccache: false
+            sccache-conf-path: ""
+          - os: "windows"
+            name: "amd64-avx512"
+            runs-on: "windows-2019"
+            cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_AVX512=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER=cl -DCMAKE_C_COMPILER=cl -GNinja"
+            run-e2e: false
+            vulkan: false
+            sccache: false
+            sccache-conf-path: ""
+          - os: "windows"
+            name: "amd64-vulkan"
+            runs-on: "windows-2019"
+            cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_VULKAN=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER=cl -DCMAKE_C_COMPILER=cl -GNinja"
+            run-e2e: false
+            vulkan: true
+            sccache: false
+            sccache-conf-path: ""
+          - os: "windows"
+            name: "amd64-noavx-cuda-12-0"
+            runs-on: "windows-cuda-12-0"
+            cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CUDA_COMPILER_LAUNCHER=sccache -GNinja"
+            run-e2e: false
+            vulkan: false
+            sccache: true
+            sccache-conf-path: 'C:\sccache.conf'
+          - os: "windows"
+            name: "amd64-avx2-cuda-12-0"
+            runs-on: "windows-cuda-12-0"
+            cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CUDA_COMPILER_LAUNCHER=sccache -GNinja"
+            run-e2e: false
+            vulkan: false
+            sccache: true
+            sccache-conf-path: 'C:\sccache.conf'
+          - os: "windows"
+            name: "amd64-avx-cuda-12-0"
+            runs-on: "windows-cuda-12-0"
+            cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX2=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CUDA_COMPILER_LAUNCHER=sccache -GNinja"
+            run-e2e: false
+            vulkan: false
+            sccache: true
+            sccache-conf-path: 'C:\sccache.conf'
+          - os: "windows"
+            name: "amd64-avx512-cuda-12-0"
+            runs-on: "windows-cuda-12-0"
+            cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX512=ON -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CUDA_COMPILER_LAUNCHER=sccache -GNinja"
+            run-e2e: false
+            vulkan: false
+            sccache: true
+            sccache-conf-path: 'C:\sccache.conf'
+          - os: "windows"
+            name: "amd64-noavx-cuda-11-7"
+            runs-on: "windows-cuda-11-7"
+            cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CUDA_COMPILER_LAUNCHER=sccache -GNinja"
+            run-e2e: false
+            vulkan: false
+            sccache: true
+            sccache-conf-path: 'C:\sccache.conf'
+          - os: "windows"
+            name: "amd64-avx2-cuda-11-7"
+            runs-on: "windows-cuda-11-7"
+            cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CUDA_COMPILER_LAUNCHER=sccache -GNinja"
+            run-e2e: false
+            vulkan: false
+            sccache: true
+            sccache-conf-path: 'C:\sccache.conf'
+          - os: "windows"
+            name: "amd64-avx-cuda-11-7"
+            runs-on: "windows-cuda-11-7"
+            cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX2=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CUDA_COMPILER_LAUNCHER=sccache -GNinja"
+            run-e2e: false
+            vulkan: false
+            sccache: true
+            sccache-conf-path: 'C:\sccache.conf'
+          - os: "windows"
+            name: "amd64-avx512-cuda-11-7"
+            runs-on: "windows-cuda-11-7"
+            cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX512=ON -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CUDA_COMPILER_LAUNCHER=sccache -GNinja"
+            run-e2e: false
+            vulkan: false
+            sccache: true
+            sccache-conf-path: 'C:\sccache.conf'
 
     steps:
       - name: Clone
diff --git a/.github/workflows/template-quality-gate-submodule.yml b/.github/workflows/template-quality-gate-submodule.yml
index 1ea3a72..03859ed 100644
--- a/.github/workflows/template-quality-gate-submodule.yml
+++ b/.github/workflows/template-quality-gate-submodule.yml
@@ -68,6 +68,14 @@ jobs:
             vulkan: true
             sccache: true
             sccache-conf-path: "/tmp/sccache.conf"
+          - os: "linux"
+            name: "amd64-noavx-cuda-11-7"
+            runs-on: "ubuntu-20-04-cuda-11-7"
+            cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX=OFF -DGGML_FMA=OFF -DBUILD_SHARED_LIBS=OFF -DGGML_AVX2=OFF -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_BUILD_TYPE='Release' -GNinja"
+            run-e2e: false
+            vulkan: false
+            sccache: true
+            sccache-conf-path: "/tmp/sccache.conf"
           - os: "linux"
             name: "amd64-avx2-cuda-11-7"
             runs-on: "ubuntu-20-04-cuda-11-7"
@@ -92,6 +100,14 @@ jobs:
             vulkan: false
             sccache: true
             sccache-conf-path: "/tmp/sccache.conf"
+          - os: "linux"
+            name: "amd64-noavx-cuda-12-0"
+            runs-on: "ubuntu-20-04-cuda-12-0"
+            cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX=OFF -DGGML_FMA=OFF -DGGML_AVX2=OFF -DBUILD_SHARED_LIBS=OFF -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_BUILD_TYPE='Release' -GNinja"
+            run-e2e: false
+            vulkan: false
+            sccache: true
+            sccache-conf-path: "/tmp/sccache.conf"
           - os: "linux"
             name: "amd64-avx2-cuda-12-0"
             runs-on: "ubuntu-20-04-cuda-12-0"
@@ -172,6 +188,14 @@ jobs:
             vulkan: true
             sccache: false
             sccache-conf-path: ""
+          - os: "windows"
+            name: "amd64-noavx-cuda-12-0"
+            runs-on: "windows-cuda-12-0"
+            cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CUDA_COMPILER_LAUNCHER=sccache -GNinja"
+            run-e2e: false
+            vulkan: false
+            sccache: true
+            sccache-conf-path: 'C:\sccache.conf'
           - os: "windows"
             name: "amd64-avx2-cuda-12-0"
             runs-on: "windows-cuda-12-0"
@@ -196,6 +220,14 @@ jobs:
             vulkan: false
             sccache: true
             sccache-conf-path: 'C:\sccache.conf'
+          - os: "windows"
+            name: "amd64-noavx-cuda-11-7"
+            runs-on: "windows-cuda-11-7"
+            cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE='Release' -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CUDA_COMPILER_LAUNCHER=sccache -GNinja"
+            run-e2e: false
+            vulkan: false
+            sccache: true
+            sccache-conf-path: 'C:\sccache.conf'
           - os: "windows"
             name: "amd64-avx2-cuda-11-7"
             runs-on: "windows-cuda-11-7"