diff --git a/.github/actions/windows-setup-curl/action.yml b/.github/actions/windows-setup-curl/action.yml
index 5d76da3d79ac5..446f799fac34a 100644
--- a/.github/actions/windows-setup-curl/action.yml
+++ b/.github/actions/windows-setup-curl/action.yml
@@ -5,6 +5,10 @@ inputs:
     description: 'CURL version'
     required: false
     default: '8.6.0_6'
+  architecture:
+    description: 'Architecture of the libcurl to download'
+    required: false
+    default: 'win64'
 outputs:
   curl_path:
     description: "Path to the downloaded libcurl"
@@ -18,8 +22,9 @@ runs:
       shell: powershell
       env:
         CURL_VERSION: ${{ inputs.curl_version }}
+        ARCHITECTURE: ${{ inputs.architecture }}
       run: |
-        curl.exe -o $env:RUNNER_TEMP/curl.zip -L "https://curl.se/windows/dl-${env:CURL_VERSION}/curl-${env:CURL_VERSION}-win64-mingw.zip"
+        curl.exe -o $env:RUNNER_TEMP/curl.zip -L "https://curl.se/windows/dl-${env:CURL_VERSION}/curl-${env:CURL_VERSION}-${env:ARCHITECTURE}-mingw.zip"
         mkdir $env:RUNNER_TEMP/libcurl
         tar.exe -xvf $env:RUNNER_TEMP/curl.zip --strip-components=1 -C $env:RUNNER_TEMP/libcurl
         echo "curl_path=$env:RUNNER_TEMP/libcurl" >> $env:GITHUB_OUTPUT
diff --git a/.github/workflows/build-linux-cross.yml b/.github/workflows/build-linux-cross.yml
index 1c38d7e11da6b..dbd31e589be3e 100644
--- a/.github/workflows/build-linux-cross.yml
+++ b/.github/workflows/build-linux-cross.yml
@@ -140,3 +140,94 @@ jobs:
                          -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
 
           cmake --build build --config Release -j $(nproc)
+
+  ubuntu-24-ppc64el-cpu-cross:
+    runs-on: ubuntu-24.04
+
+    steps:
+      - uses: actions/checkout@v4
+      - name: Setup PowerPC64le
+        run: |
+          sudo dpkg --add-architecture ppc64el
+
+          # Add arch-specific repositories for non-amd64 architectures
+          cat << EOF | sudo tee /etc/apt/sources.list.d/ppc64el-ports.list
+          deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
+          deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
+          deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
+          deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
+          EOF
+
+          sudo apt-get update || true    ;# Prevent failure due to missing URLs.
+
+          sudo apt-get install -y --no-install-recommends \
+                  build-essential \
+                  gcc-14-powerpc64le-linux-gnu \
+                  g++-14-powerpc64le-linux-gnu \
+                  libcurl4-openssl-dev:ppc64el
+
+      - name: Build
+        run: |
+          cmake -B build -DCMAKE_BUILD_TYPE=Release \
+                         -DGGML_OPENMP=OFF \
+                         -DLLAMA_BUILD_EXAMPLES=ON \
+                         -DLLAMA_BUILD_TOOLS=ON \
+                         -DLLAMA_BUILD_TESTS=OFF \
+                         -DCMAKE_SYSTEM_NAME=Linux \
+                         -DCMAKE_SYSTEM_PROCESSOR=ppc64 \
+                         -DCMAKE_C_COMPILER=powerpc64le-linux-gnu-gcc-14 \
+                         -DCMAKE_CXX_COMPILER=powerpc64le-linux-gnu-g++-14 \
+                         -DCMAKE_POSITION_INDEPENDENT_CODE=ON \
+                         -DCMAKE_FIND_ROOT_PATH=/usr/lib/powerpc64le-linux-gnu \
+                         -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
+                         -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
+                         -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
+
+          cmake --build build --config Release -j $(nproc)
+
+  ubuntu-24-ppc64el-vulkan-cross:
+    runs-on: ubuntu-24.04
+
+    steps:
+      - uses: actions/checkout@v4
+      - name: Setup PowerPC64le
+        run: |
+          sudo dpkg --add-architecture ppc64el
+
+          # Add arch-specific repositories for non-amd64 architectures
+          cat << EOF | sudo tee /etc/apt/sources.list.d/ppc64el-ports.list
+          deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
+          deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
+          deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
+          deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
+          EOF
+
+          sudo apt-get update || true    ;# Prevent failure due to missing URLs.
+
+          sudo apt-get install -y --no-install-recommends \
+                  build-essential \
+                  glslc \
+                  gcc-14-powerpc64le-linux-gnu \
+                  g++-14-powerpc64le-linux-gnu \
+                  libvulkan-dev:ppc64el \
+                  libcurl4-openssl-dev:ppc64el
+
+      - name: Build
+        run: |
+          cmake -B build -DCMAKE_BUILD_TYPE=Release \
+                         -DGGML_VULKAN=ON \
+                         -DGGML_OPENMP=OFF \
+                         -DLLAMA_BUILD_EXAMPLES=ON \
+                         -DLLAMA_BUILD_TOOLS=ON \
+                         -DLLAMA_BUILD_TESTS=OFF \
+                         -DCMAKE_SYSTEM_NAME=Linux \
+                         -DCMAKE_SYSTEM_PROCESSOR=ppc64 \
+                         -DCMAKE_C_COMPILER=powerpc64le-linux-gnu-gcc-14 \
+                         -DCMAKE_CXX_COMPILER=powerpc64le-linux-gnu-g++-14 \
+                         -DCMAKE_POSITION_INDEPENDENT_CODE=ON \
+                         -DCMAKE_FIND_ROOT_PATH=/usr/lib/powerpc64le-linux-gnu \
+                         -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
+                         -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
+                         -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
+
+          cmake --build build --config Release -j $(nproc)
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 5f54909dcbd8c..02ff188855d6a 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -238,14 +238,19 @@ jobs:
       matrix:
         include:
           - build: 'cpu-x64'
+            arch: 'x64'
             defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_OPENMP=OFF'
           #- build: 'openblas-x64'
+          #  arch: 'x64'
           #  defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_OPENMP=OFF -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
           - build: 'vulkan-x64'
+            arch: 'x64'
             defines: '-DGGML_NATIVE=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_VULKAN=ON'
           - build: 'cpu-arm64'
+            arch: 'arm64'
             defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DGGML_NATIVE=OFF'
           - build: 'opencl-adreno-arm64'
+            arch: 'arm64'
             defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" -DGGML_OPENCL=ON -DGGML_OPENCL_USE_ADRENO_KERNELS=ON'
 
     steps:
@@ -312,6 +317,8 @@ jobs:
       - name: libCURL
         id: get_libcurl
         uses: ./.github/actions/windows-setup-curl
+        with:
+          architecture: ${{ matrix.arch == 'x64' && 'win64' || 'win64a' }}
 
       - name: Build
         id: cmake_build
@@ -339,7 +346,7 @@ jobs:
         env:
           CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }}
         run: |
-          Copy-Item $env:CURL_PATH\bin\libcurl-x64.dll .\build\bin\Release\libcurl-x64.dll
+          Copy-Item $env:CURL_PATH\bin\libcurl-${{ matrix.arch }}.dll .\build\bin\Release\
           7z a llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}.zip .\build\bin\Release\*
 
       - name: Upload artifacts
diff --git a/README.md b/README.md
index 0401723ffcf87..5472f7abdeb21 100644
--- a/README.md
+++ b/README.md
@@ -572,4 +572,11 @@ automatically. For example:
 $ echo "source ~/.llama-completion.bash" >> ~/.bashrc
 ```
 
-## References
+## Dependencies
+
+- [yhirose/cpp-httplib](https://github.com/yhirose/cpp-httplib) - Single-header HTTP server, used by `llama-server` - MIT license
+- [stb-image](https://github.com/nothings/stb) - Single-header image format decoder, used by multimodal subsystem - Public domain
+- [nlohmann/json](https://github.com/nlohmann/json) - Single-header JSON library, used by various tools/examples - MIT License
+- [minja](https://github.com/google/minja) - Minimal Jinja parser in C++, used by various tools/examples - MIT License
+- [linenoise.cpp](./tools/run/linenoise.cpp/linenoise.cpp) - C++ library that provides readline-like line editing capabilities, used by `llama-run` - BSD 2-Clause License
+- [curl](https://curl.se/) - Client-side URL transfer library, used by various tools/examples - [CURL License](https://curl.se/docs/copyright.html)
diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt
index db5d4094a17b8..a7ff3ac16c446 100644
--- a/common/CMakeLists.txt
+++ b/common/CMakeLists.txt
@@ -121,8 +121,8 @@ if (LLAMA_LLGUIDANCE)
 
     ExternalProject_Add(llguidance_ext
         GIT_REPOSITORY https://github.com/guidance-ai/llguidance
-        # v0.7.19 (+ fancy-regex build fix):
-        GIT_TAG b59f98f85269892a7de3d3641ad155366f13daa6
+        # v0.7.20 (+ fix to build on GCC 15):
+        GIT_TAG b5b8b64dba11c4e4ee6b1d1450d3a3ae279891e8
         PREFIX ${CMAKE_BINARY_DIR}/llguidance
         SOURCE_DIR ${LLGUIDANCE_SRC}
         BUILD_IN_SOURCE TRUE
diff --git a/ggml/src/ggml-metal/ggml-metal.m b/ggml/src/ggml-metal/ggml-metal.m
index f4b3d9cf5929c..85dbbcd5d7f99 100644
--- a/ggml/src/ggml-metal/ggml-metal.m
+++ b/ggml/src/ggml-metal/ggml-metal.m
@@ -415,6 +415,13 @@ static void ggml_backend_metal_device_rel(struct ggml_backend_metal_device_conte
     GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_Q8_0_HK192_HV128,
     GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_Q8_0_H256,
     GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_Q8_0_HK576_HV512,
+    GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_F16_H64,
+    GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_BF16_H64,
+    GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_Q4_0_H64,
+    GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_Q4_1_H64,
+    GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_Q5_0_H64,
+    GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_Q5_1_H64,
+    GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_Q8_0_H64,
     GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_F16_H96,
     GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_BF16_H96,
     GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_Q4_0_H96,
@@ -1362,6 +1369,13 @@ @implementation GGMLMetalClass
         GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_Q8_0_HK192_HV128, flash_attn_ext_q8_0_hk192_hv128, has_simdgroup_mm);
         GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_Q8_0_H256,        flash_attn_ext_q8_0_h256,        has_simdgroup_mm);
         GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_Q8_0_HK576_HV512, flash_attn_ext_q8_0_hk576_hv512, has_simdgroup_mm);
+        GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_F16_H64,      flash_attn_ext_vec_f16_h64,      has_simdgroup_reduction);
+        GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_BF16_H64,     flash_attn_ext_vec_bf16_h64,     has_simdgroup_reduction && use_bfloat);
+        GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_Q4_0_H64,     flash_attn_ext_vec_q4_0_h64,     has_simdgroup_reduction);
+        GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_Q4_1_H64,     flash_attn_ext_vec_q4_1_h64,     has_simdgroup_reduction);
+        GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_Q5_0_H64,     flash_attn_ext_vec_q5_0_h64,     has_simdgroup_reduction);
+        GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_Q5_1_H64,     flash_attn_ext_vec_q5_1_h64,     has_simdgroup_reduction);
+        GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_Q8_0_H64,     flash_attn_ext_vec_q8_0_h64,     has_simdgroup_reduction);
         GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_F16_H96,      flash_attn_ext_vec_f16_h96,      has_simdgroup_reduction);
         GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_BF16_H96,     flash_attn_ext_vec_bf16_h96,     has_simdgroup_reduction && use_bfloat);
         GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_Q4_0_H96,     flash_attn_ext_vec_q4_0_h96,     has_simdgroup_reduction);
@@ -4358,7 +4372,7 @@ static bool ggml_metal_encode_node(
                 // TODO: add vec kernels for (ne00%64 == 0) and maybe also for (ne00%32 == 0)
                 //       for now avoiding mainly to keep the number of templates/kernels a bit lower
                 //       these are now trivial to add after: https://github.com/ggml-org/llama.cpp/pull/12612
-                if (ne01 >= 20 || (ne00%128 != 0 && ne00 != 96 && ne00 != 192 && ne00 != 576)) {
+                if (ne01 >= 20 || (ne00%128 != 0 && ne00 != 64 && ne00 != 96 && ne00 != 192 && ne00 != 576)) {
                     switch (src1->type) {
                         case GGML_TYPE_F16:
                             {
@@ -4539,6 +4553,24 @@ static bool ggml_metal_encode_node(
                     use_vec_kernel = true;
 
                     switch (ne00) {
+                        case 64:
+                            {
+                                switch (src1->type) {
+                                    case GGML_TYPE_F16:  pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_F16_H64].pipeline; break;
+                                    case GGML_TYPE_BF16: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_BF16_H64].pipeline; break;
+                                    case GGML_TYPE_Q4_0: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_Q4_0_H64].pipeline; break;
+                                    case GGML_TYPE_Q4_1: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_Q4_1_H64].pipeline; break;
+                                    case GGML_TYPE_Q5_0: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_Q5_0_H64].pipeline; break;
+                                    case GGML_TYPE_Q5_1: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_Q5_1_H64].pipeline; break;
+                                    case GGML_TYPE_Q8_0: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_Q8_0_H64].pipeline; break;
+                                    default:
+                                        {
+                                            GGML_LOG_ERROR("unsupported type: %d\n", src1->type);
+                                            GGML_LOG_ERROR("add template specialization for this type\n");
+                                            GGML_ABORT("add template specialization for this type");
+                                        }
+                                }
+                            } break;
                         case 96:
                             {
                                 switch (src1->type) {
diff --git a/ggml/src/ggml-metal/ggml-metal.metal b/ggml/src/ggml-metal/ggml-metal.metal
index 122ae59737196..e94b6cd756441 100644
--- a/ggml/src/ggml-metal/ggml-metal.metal
+++ b/ggml/src/ggml-metal/ggml-metal.metal
@@ -4124,6 +4124,16 @@ kernel void kernel_flash_attn_ext_vec(
 
 typedef decltype(kernel_flash_attn_ext_vec<FA_TYPES, half4, 1, dequantize_f16_t4, half4, 1, dequantize_f16_t4, 128, 128, 4>) flash_attn_ext_vec_t;
 
+template [[host_name("kernel_flash_attn_ext_vec_f16_h64")]]  kernel flash_attn_ext_vec_t kernel_flash_attn_ext_vec<FA_TYPES, half4,             1, dequantize_f16_t4,  half4,       1, dequantize_f16_t4,  64, 64, 8>;
+#if defined(GGML_METAL_USE_BF16)
+template [[host_name("kernel_flash_attn_ext_vec_bf16_h64")]] kernel flash_attn_ext_vec_t kernel_flash_attn_ext_vec<FA_TYPES, bfloat4,           1, dequantize_bf16_t4, bfloat4,     1, dequantize_bf16_t4, 64, 64, 8>;
+#endif
+template [[host_name("kernel_flash_attn_ext_vec_q4_0_h64")]] kernel flash_attn_ext_vec_t kernel_flash_attn_ext_vec<FA_TYPES, block_q4_0,        8, dequantize_q4_0_t4, block_q4_0,  8, dequantize_q4_0_t4, 64, 64, 8>;
+template [[host_name("kernel_flash_attn_ext_vec_q4_1_h64")]] kernel flash_attn_ext_vec_t kernel_flash_attn_ext_vec<FA_TYPES, block_q4_1,        8, dequantize_q4_1_t4, block_q4_1,  8, dequantize_q4_1_t4, 64, 64, 8>;
+template [[host_name("kernel_flash_attn_ext_vec_q5_0_h64")]] kernel flash_attn_ext_vec_t kernel_flash_attn_ext_vec<FA_TYPES, block_q5_0,        8, dequantize_q5_0_t4, block_q5_0,  8, dequantize_q5_0_t4, 64, 64, 8>;
+template [[host_name("kernel_flash_attn_ext_vec_q5_1_h64")]] kernel flash_attn_ext_vec_t kernel_flash_attn_ext_vec<FA_TYPES, block_q5_1,        8, dequantize_q5_1_t4, block_q5_1,  8, dequantize_q5_1_t4, 64, 64, 8>;
+template [[host_name("kernel_flash_attn_ext_vec_q8_0_h64")]] kernel flash_attn_ext_vec_t kernel_flash_attn_ext_vec<FA_TYPES, block_q8_0,        8, dequantize_q8_0_t4, block_q8_0,  8, dequantize_q8_0_t4, 64, 64, 8>;
+
 template [[host_name("kernel_flash_attn_ext_vec_f16_h96")]]  kernel flash_attn_ext_vec_t kernel_flash_attn_ext_vec<FA_TYPES, half4,             1, dequantize_f16_t4,  half4,       1, dequantize_f16_t4,  96, 96, 4>;
 #if defined(GGML_METAL_USE_BF16)
 template [[host_name("kernel_flash_attn_ext_vec_bf16_h96")]] kernel flash_attn_ext_vec_t kernel_flash_attn_ext_vec<FA_TYPES, bfloat4,           1, dequantize_bf16_t4, bfloat4,     1, dequantize_bf16_t4, 96, 96, 4>;
diff --git a/ggml/src/ggml-sycl/element_wise.cpp b/ggml/src/ggml-sycl/element_wise.cpp
index dcc6ec809a7d1..becaac4048a7f 100644
--- a/ggml/src/ggml-sycl/element_wise.cpp
+++ b/ggml/src/ggml-sycl/element_wise.cpp
@@ -655,7 +655,6 @@ inline void ggml_sycl_op_sgn(ggml_backend_sycl_context & ctx, ggml_tensor * dst)
             }
         default:
             GGML_ABORT("GGML tensor type not supported!\n");
-            break;
     }
 }
 
@@ -688,7 +687,6 @@ inline void ggml_sycl_op_abs(ggml_backend_sycl_context & ctx, ggml_tensor * dst)
             }
         default:
             GGML_ABORT("GGML tensor type not supported!\n");
-            break;
     }
 }
 
@@ -722,7 +720,6 @@ inline void ggml_sycl_op_elu(ggml_backend_sycl_context & ctx, ggml_tensor * dst)
             }
         default:
             GGML_ABORT("GGML tensor type not supported!\n");
-            break;
     }
 }
 
@@ -754,7 +751,6 @@ inline void ggml_sycl_op_silu(ggml_backend_sycl_context & ctx, ggml_tensor * dst
             }
         default:
             GGML_ABORT("GGML tensor type not supported!\n");
-            break;
     }
 }
 
@@ -786,7 +782,6 @@ inline void ggml_sycl_op_gelu(ggml_backend_sycl_context & ctx, ggml_tensor * dst
             }
         default:
             GGML_ABORT("GGML tensor type not supported!\n");
-            break;
     }
 }
 
@@ -818,7 +813,6 @@ inline void ggml_sycl_op_gelu_quick(ggml_backend_sycl_context & ctx, ggml_tensor
             }
         default:
             GGML_ABORT("GGML tensor type not supported!\n");
-            break;
     }
 }
 
@@ -850,7 +844,6 @@ inline void ggml_sycl_op_tanh(ggml_backend_sycl_context & ctx, ggml_tensor * dst
             }
         default:
             GGML_ABORT("GGML tensor type not supported!\n");
-            break;
     }
 }
 
@@ -883,7 +876,6 @@ inline void ggml_sycl_op_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst
             }
         default:
             GGML_ABORT("GGML tensor type not supported!\n");
-            break;
     }
 }
 
@@ -917,7 +909,6 @@ inline void ggml_sycl_op_hardsigmoid(ggml_backend_sycl_context & ctx, ggml_tenso
             }
         default:
             GGML_ABORT("GGML tensor type not supported!\n");
-            break;
     }
 }
 
@@ -949,7 +940,6 @@ inline void ggml_sycl_op_hardswish(ggml_backend_sycl_context & ctx, ggml_tensor
             }
         default:
             GGML_ABORT("GGML tensor type not supported!\n");
-            break;
     }
 }
 
@@ -981,7 +971,6 @@ inline void ggml_sycl_op_exp(ggml_backend_sycl_context & ctx, ggml_tensor * dst)
             }
         default:
             GGML_ABORT("GGML tensor type not supported!\n");
-            break;
     }
 }
 
@@ -1013,7 +1002,6 @@ inline void ggml_sycl_op_log(ggml_backend_sycl_context & ctx, ggml_tensor * dst)
             }
         default:
             GGML_ABORT("GGML tensor type not supported!\n");
-            break;
     }
 }
 
@@ -1045,7 +1033,6 @@ inline void ggml_sycl_op_sigmoid(ggml_backend_sycl_context & ctx, ggml_tensor *
             }
         default:
             GGML_ABORT("GGML tensor type not supported!\n");
-            break;
     }
 }
 
@@ -1078,7 +1065,6 @@ inline void ggml_sycl_op_sqrt(ggml_backend_sycl_context & ctx, ggml_tensor * dst
             }
         default:
             GGML_ABORT("GGML tensor type not supported!\n");
-            break;
     }
 }
 
@@ -1110,7 +1096,6 @@ inline void ggml_sycl_op_sin(ggml_backend_sycl_context & ctx, ggml_tensor * dst)
             }
         default:
             GGML_ABORT("GGML tensor type not supported!\n");
-            break;
     }
 }
 
@@ -1142,7 +1127,6 @@ inline void ggml_sycl_op_cos(ggml_backend_sycl_context & ctx, ggml_tensor * dst)
             }
         default:
             GGML_ABORT("GGML tensor type not supported!\n");
-            break;
     }
 }
 
@@ -1174,7 +1158,6 @@ inline void ggml_sycl_op_step(ggml_backend_sycl_context & ctx, ggml_tensor * dst
             }
         default:
             GGML_ABORT("GGML tensor type not supported!\n");
-            break;
     }
 }
 
@@ -1206,7 +1189,6 @@ inline void ggml_sycl_op_neg(ggml_backend_sycl_context & ctx, ggml_tensor * dst)
             }
         default:
             GGML_ABORT("GGML tensor type not supported!\n");
-            break;
     }
 }
 
@@ -1241,7 +1223,6 @@ inline void ggml_sycl_op_leaky_relu(ggml_backend_sycl_context & ctx, ggml_tensor
             }
         default:
             GGML_ABORT("GGML tensor type not supported!\n");
-            break;
     }
 }
 
@@ -1273,7 +1254,6 @@ inline void ggml_sycl_op_sqr(ggml_backend_sycl_context & ctx, ggml_tensor * dst)
             }
         default:
             GGML_ABORT("GGML tensor type not supported!\n");
-            break;
     }
 }
 
@@ -1315,7 +1295,6 @@ inline void ggml_sycl_op_upscale(ggml_backend_sycl_context & ctx, ggml_tensor *
             }
         default:
             GGML_ABORT("GGML tensor type not supported!\n");
-            break;
     }
 }
 
@@ -1350,7 +1329,6 @@ inline void ggml_sycl_op_pad(ggml_backend_sycl_context & ctx, ggml_tensor * dst)
             }
         default:
             GGML_ABORT("GGML tensor type not supported!\n");
-            break;
     }
 }
 
@@ -1388,7 +1366,6 @@ inline void ggml_sycl_op_clamp(ggml_backend_sycl_context & ctx, ggml_tensor * ds
             }
         default:
             GGML_ABORT("GGML tensor type not supported!\n");
-            break;
     }
 }
 
diff --git a/src/llama.cpp b/src/llama.cpp
index 9fdddf7b071f8..2f06e0f8ce12d 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -140,6 +140,11 @@ static struct llama_model * llama_model_load_from_file_impl(
         struct llama_model_params params) {
     ggml_time_init();
 
+    if (!params.vocab_only && ggml_backend_reg_count() == 0) {
+        LLAMA_LOG_ERROR("%s: no backends are loaded. hint: use ggml_backend_load() or ggml_backend_load_all() to load a backend before calling this function\n", __func__);
+        return nullptr;
+    }
+
     unsigned cur_percentage = 0;
     if (params.progress_callback == NULL) {
         params.progress_callback_user_data = &cur_percentage;
diff --git a/tools/server/public/index.html.gz b/tools/server/public/index.html.gz
index 01eec46e842ac..02fb00339ec8d 100644
Binary files a/tools/server/public/index.html.gz and b/tools/server/public/index.html.gz differ
diff --git a/tools/server/server.cpp b/tools/server/server.cpp
index f32f3c86aad2c..129d013ac75f7 100644
--- a/tools/server/server.cpp
+++ b/tools/server/server.cpp
@@ -2251,6 +2251,14 @@ struct server_context {
             slot.has_next_token = true;
         }
 
+        // if context shifting is disabled, make sure that we don't run out of context
+        if (!params_base.ctx_shift && slot.n_past + 1 >= slot.n_ctx) {
+            slot.stop           = STOP_TYPE_LIMIT;
+            slot.has_next_token = false;
+
+            SLT_DBG(slot, "stopped due to running out of context, n_past = %d, n_ctx = %d\n", slot.n_past, slot.n_ctx);
+        }
+
         // check the limits
         if (slot.n_decoded > 0 && slot.has_next_token && !slot.has_budget(params_base)) {
             slot.stop           = STOP_TYPE_LIMIT;
diff --git a/tools/server/tests/unit/test_ctx_shift.py b/tools/server/tests/unit/test_ctx_shift.py
index be93a6d31f410..2431ac70882d7 100644
--- a/tools/server/tests/unit/test_ctx_shift.py
+++ b/tools/server/tests/unit/test_ctx_shift.py
@@ -65,3 +65,21 @@ def test_ctx_shift_disabled_long_prompt():
     assert res.status_code != 200
     assert "error" in res.body
     assert "exceeds the available context size" in res.body["error"]["message"]
+
+def test_ctx_shift_disabled_stream():
+    global server
+    server.disable_ctx_shift = True
+    server.start()
+    res = server.make_stream_request("POST", "/v1/completions", data={
+        "n_predict": 256,
+        "prompt": "Once",
+        "stream": True,
+    })
+    content = ""
+    for data in res:
+        choice = data["choices"][0]
+        if choice["finish_reason"] == "length":
+            assert len(content) > 0
+        else:
+            assert choice["finish_reason"] is None
+            content += choice["text"]
diff --git a/tools/server/webui/src/App.tsx b/tools/server/webui/src/App.tsx
index 3b00a8f909ad6..1b673bbaa1cce 100644
--- a/tools/server/webui/src/App.tsx
+++ b/tools/server/webui/src/App.tsx
@@ -28,13 +28,13 @@ function AppLayout() {
   return (
     <>
       <Sidebar />
-      <div
+      <main
         className="drawer-content grow flex flex-col h-screen w-screen mx-auto px-4 overflow-auto bg-base-100"
         id="main-scroll"
       >
         <Header />
         <Outlet />
-      </div>
+      </main>
       {
         <SettingDialog
           show={showSettings}
diff --git a/tools/server/webui/src/components/ChatInputExtraContextItem.tsx b/tools/server/webui/src/components/ChatInputExtraContextItem.tsx
index ac416fa907d99..4f28f887482a6 100644
--- a/tools/server/webui/src/components/ChatInputExtraContextItem.tsx
+++ b/tools/server/webui/src/components/ChatInputExtraContextItem.tsx
@@ -18,16 +18,26 @@ export default function ChatInputExtraContextItem({
   if (!items) return null;
 
   return (
-    <div className="flex flex-row gap-4 overflow-x-auto py-2 px-1 mb-1">
+    <div
+      className="flex flex-row gap-4 overflow-x-auto py-2 px-1 mb-1"
+      role="group"
+      aria-description="Selected files"
+    >
       {items.map((item, i) => (
         <div
           className="indicator"
           key={i}
           onClick={() => clickToShow && setShow(i)}
+          tabIndex={0}
+          aria-description={
+            clickToShow ? `Click to show: ${item.name}` : undefined
+          }
+          role={clickToShow ? 'button' : 'menuitem'}
         >
           {removeItem && (
             <div className="indicator-item indicator-top">
               <button
+                aria-label="Remove file"
                 className="btn btn-neutral btn-sm w-4 h-4 p-0 rounded-full"
                 onClick={() => removeItem(i)}
               >
@@ -46,13 +56,16 @@ export default function ChatInputExtraContextItem({
               <>
                 <img
                   src={item.base64Url}
-                  alt={item.name}
+                  alt={`Preview image for ${item.name}`}
                   className="w-14 h-14 object-cover rounded-md"
                 />
               </>
             ) : (
               <>
-                <div className="w-14 h-14 flex items-center justify-center">
+                <div
+                  className="w-14 h-14 flex items-center justify-center"
+                  aria-description="Document icon"
+                >
                   <DocumentTextIcon className="h-8 w-14 text-base-content/50" />
                 </div>
 
@@ -66,16 +79,25 @@ export default function ChatInputExtraContextItem({
       ))}
 
       {showingItem && (
-        <dialog className="modal modal-open">
+        <dialog
+          className="modal modal-open"
+          aria-description={`Preview ${showingItem.name}`}
+        >
           <div className="modal-box">
             <div className="flex justify-between items-center mb-4">
               <b>{showingItem.name ?? 'Extra content'}</b>
-              <button className="btn btn-ghost btn-sm">
+              <button
+                className="btn btn-ghost btn-sm"
+                aria-label="Close preview dialog"
+              >
                 <XMarkIcon className="h-5 w-5" onClick={() => setShow(-1)} />
               </button>
             </div>
             {showingItem.type === 'imageFile' ? (
-              <img src={showingItem.base64Url} alt={showingItem.name} />
+              <img
+                src={showingItem.base64Url}
+                alt={`Preview image for ${showingItem.name}`}
+              />
             ) : (
               <div className="overflow-x-auto">
                 <pre className="whitespace-pre-wrap break-words text-sm">
diff --git a/tools/server/webui/src/components/ChatMessage.tsx b/tools/server/webui/src/components/ChatMessage.tsx
index 08eb423526b53..ee59de450d1ff 100644
--- a/tools/server/webui/src/components/ChatMessage.tsx
+++ b/tools/server/webui/src/components/ChatMessage.tsx
@@ -83,13 +83,20 @@ export default function ChatMessage({
 
   if (!viewingChat) return null;
 
+  const isUser = msg.role === 'user';
+
   return (
-    <div className="group" id={id}>
+    <div
+      className="group"
+      id={id}
+      role="group"
+      aria-description={`Message from ${msg.role}`}
+    >
       <div
         className={classNames({
           chat: true,
-          'chat-start': msg.role !== 'user',
-          'chat-end': msg.role === 'user',
+          'chat-start': !isUser,
+          'chat-end': isUser,
         })}
       >
         {msg.extra && msg.extra.length > 0 && (
@@ -99,7 +106,7 @@ export default function ChatMessage({
         <div
           className={classNames({
             'chat-bubble markdown': true,
-            'chat-bubble bg-transparent': msg.role !== 'user',
+            'chat-bubble bg-transparent': !isUser,
           })}
         >
           {/* textarea for editing message */}
@@ -142,7 +149,7 @@ export default function ChatMessage({
               ) : (
                 <>
                   {/* render message as markdown */}
-                  <div dir="auto">
+                  <div dir="auto" tabIndex={0}>
                     {thought && (
                       <ThoughtProcess
                         isThinking={!!isThinking && !!isPending}
@@ -196,13 +203,18 @@ export default function ChatMessage({
           })}
         >
           {siblingLeafNodeIds && siblingLeafNodeIds.length > 1 && (
-            <div className="flex gap-1 items-center opacity-60 text-sm">
+            <div
+              className="flex gap-1 items-center opacity-60 text-sm"
+              role="navigation"
+              aria-description={`Message version ${siblingCurrIdx + 1} of ${siblingLeafNodeIds.length}`}
+            >
               <button
                 className={classNames({
                   'btn btn-sm btn-ghost p-1': true,
                   'opacity-20': !prevSibling,
                 })}
                 onClick={() => prevSibling && onChangeSibling(prevSibling)}
+                aria-label="Previous message version"
               >
                 <ChevronLeftIcon className="h-4 w-4" />
               </button>
@@ -215,6 +227,7 @@ export default function ChatMessage({
                   'opacity-20': !nextSibling,
                 })}
                 onClick={() => nextSibling && onChangeSibling(nextSibling)}
+                aria-label="Next message version"
               >
                 <ChevronRightIcon className="h-4 w-4" />
               </button>
@@ -223,7 +236,7 @@ export default function ChatMessage({
           {/* user message */}
           {msg.role === 'user' && (
             <BtnWithTooltips
-              className="btn-mini show-on-hover w-8 h-8"
+              className="btn-mini w-8 h-8"
               onClick={() => setEditingContent(msg.content)}
               disabled={msg.content === null}
               tooltipsContent="Edit message"
@@ -236,7 +249,7 @@ export default function ChatMessage({
             <>
               {!isPending && (
                 <BtnWithTooltips
-                  className="btn-mini show-on-hover w-8 h-8"
+                  className="btn-mini w-8 h-8"
                   onClick={() => {
                     if (msg.content !== null) {
                       onRegenerateMessage(msg as Message);
@@ -250,10 +263,7 @@ export default function ChatMessage({
               )}
             </>
           )}
-          <CopyButton
-            className="btn-mini show-on-hover w-8 h-8"
-            content={msg.content}
-          />
+          <CopyButton className="btn-mini w-8 h-8" content={msg.content} />
         </div>
       )}
     </div>
@@ -271,6 +281,8 @@ function ThoughtProcess({
 }) {
   return (
     <div
+      role="button"
+      aria-label="Toggle thought process display"
       tabIndex={0}
       className={classNames({
         'collapse bg-none': true,
@@ -292,7 +304,11 @@ function ThoughtProcess({
           )}
         </div>
       </div>
-      <div className="collapse-content text-base-content/70 text-sm p-1">
+      <div
+        className="collapse-content text-base-content/70 text-sm p-1"
+        tabIndex={0}
+        aria-description="Thought process content"
+      >
         <div className="border-l-2 border-base-content/20 pl-4 mb-4">
           <MarkdownDisplay content={content} />
         </div>
diff --git a/tools/server/webui/src/components/ChatScreen.tsx b/tools/server/webui/src/components/ChatScreen.tsx
index 661fe14905a8f..09c601ef2366a 100644
--- a/tools/server/webui/src/components/ChatScreen.tsx
+++ b/tools/server/webui/src/components/ChatScreen.tsx
@@ -279,7 +279,11 @@ export default function ChatScreen() {
 function ServerInfo() {
   const { serverProps } = useAppContext();
   return (
-    <div className="card card-sm shadow-sm border-1 border-base-content/20 text-base-content/70 mb-6">
+    <div
+      className="card card-sm shadow-sm border-1 border-base-content/20 text-base-content/70 mb-6"
+      tabIndex={0}
+      aria-description="Server information"
+    >
       <div className="card-body">
         <b>Server Info</b>
         <p>
@@ -311,6 +315,8 @@ function ChatInput({
 
   return (
     <div
+      role="group"
+      aria-label="Chat input"
       className={classNames({
         'flex items-end pt-8 pb-6 sticky bottom-0 bg-base-100': true,
         'opacity-50': isDrag, // simply visual feedback to inform user that the file will be accepted
@@ -400,13 +406,15 @@ function ChatInput({
                     'btn w-8 h-8 p-0 rounded-full': true,
                     'btn-disabled': isGenerating,
                   })}
+                  aria-label="Upload file"
+                  tabIndex={0}
+                  role="button"
                 >
                   <PaperClipIcon className="h-5 w-5" />
                 </label>
                 <input
                   id="file-upload"
                   type="file"
-                  className="hidden"
                   disabled={isGenerating}
                   {...getInputProps()}
                   hidden
@@ -422,6 +430,7 @@ function ChatInput({
                   <button
                     className="btn btn-primary w-8 h-8 p-0 rounded-full"
                     onClick={onSend}
+                    aria-label="Send message"
                   >
                     <ArrowUpIcon className="h-5 w-5" />
                   </button>
diff --git a/tools/server/webui/src/components/Header.tsx b/tools/server/webui/src/components/Header.tsx
index 45775ff7a6258..ccddc21ddab73 100644
--- a/tools/server/webui/src/components/Header.tsx
+++ b/tools/server/webui/src/components/Header.tsx
@@ -38,8 +38,12 @@ export default function Header() {
 
       {/* action buttons (top right) */}
       <div className="flex items-center">
-        <div className="tooltip tooltip-bottom" data-tip="Settings">
-          <button className="btn" onClick={() => setShowSettings(true)}>
+        <div
+          className="tooltip tooltip-bottom"
+          data-tip="Settings"
+          onClick={() => setShowSettings(true)}
+        >
+          <button className="btn" aria-hidden={true}>
             {/* settings button */}
             <Cog8ToothIcon className="w-5 h-5" />
           </button>
diff --git a/tools/server/webui/src/components/SettingDialog.tsx b/tools/server/webui/src/components/SettingDialog.tsx
index 0240a17f407a4..e4684be7e007c 100644
--- a/tools/server/webui/src/components/SettingDialog.tsx
+++ b/tools/server/webui/src/components/SettingDialog.tsx
@@ -335,14 +335,22 @@ export default function SettingDialog({
   };
 
   return (
-    <dialog className={classNames({ modal: true, 'modal-open': show })}>
+    <dialog
+      className={classNames({ modal: true, 'modal-open': show })}
+      aria-label="Settings dialog"
+    >
       <div className="modal-box w-11/12 max-w-3xl">
         <h3 className="text-lg font-bold mb-6">Settings</h3>
         <div className="flex flex-col md:flex-row h-[calc(90vh-12rem)]">
           {/* Left panel, showing sections - Desktop version */}
-          <div className="hidden md:flex flex-col items-stretch pr-4 mr-4 border-r-2 border-base-200">
+          <div
+            className="hidden md:flex flex-col items-stretch pr-4 mr-4 border-r-2 border-base-200"
+            role="complementary"
+            aria-description="Settings sections"
+            tabIndex={0}
+          >
             {SETTING_SECTIONS.map((section, idx) => (
-              <div
+              <button
                 key={idx}
                 className={classNames({
                   'btn btn-ghost justify-start font-normal w-44 mb-1': true,
@@ -352,12 +360,16 @@ export default function SettingDialog({
                 dir="auto"
               >
                 {section.title}
-              </div>
+              </button>
             ))}
           </div>
 
           {/* Left panel, showing sections - Mobile version */}
-          <div className="md:hidden flex flex-row gap-2 mb-4">
+          {/* This menu is skipped on a11y, otherwise it's repeated the desktop version */}
+          <div
+            className="md:hidden flex flex-row gap-2 mb-4"
+            aria-disabled={true}
+          >
             <details className="dropdown">
               <summary className="btn bt-sm w-full m-1">
                 {SETTING_SECTIONS[sectionIdx].title}
diff --git a/tools/server/webui/src/components/Sidebar.tsx b/tools/server/webui/src/components/Sidebar.tsx
index 8e79e00b8dd4f..8cac52f4c6ddf 100644
--- a/tools/server/webui/src/components/Sidebar.tsx
+++ b/tools/server/webui/src/components/Sidebar.tsx
@@ -50,44 +50,72 @@ export default function Sidebar() {
         id="toggle-drawer"
         type="checkbox"
         className="drawer-toggle"
+        aria-label="Toggle sidebar"
         defaultChecked
       />
 
-      <div className="drawer-side h-screen lg:h-screen z-50 lg:max-w-64">
+      <div
+        className="drawer-side h-screen lg:h-screen z-50 lg:max-w-64"
+        role="complementary"
+        aria-label="Sidebar"
+        tabIndex={0}
+      >
         <label
           htmlFor="toggle-drawer"
-          aria-label="close sidebar"
+          aria-label="Close sidebar"
           className="drawer-overlay"
         ></label>
+
+        <a
+          href="#main-scroll"
+          className="absolute -left-80 top-0 w-1 h-1 overflow-hidden"
+        >
+          Skip to main content
+        </a>
+
         <div className="flex flex-col bg-base-200 min-h-full max-w-64 py-4 px-4">
           <div className="flex flex-row items-center justify-between mb-4 mt-4">
-            <h2 className="font-bold ml-4">Conversations</h2>
+            <h2 className="font-bold ml-4" role="heading">
+              Conversations
+            </h2>
 
             {/* close sidebar button */}
-            <label htmlFor="toggle-drawer" className="btn btn-ghost lg:hidden">
+            <label
+              htmlFor="toggle-drawer"
+              className="btn btn-ghost lg:hidden"
+              aria-label="Close sidebar"
+              role="button"
+              tabIndex={0}
+            >
               <XMarkIcon className="w-5 h-5" />
             </label>
           </div>
 
           {/* new conversation button */}
-          <div
+          <button
             className={classNames({
               'btn btn-ghost justify-start px-2': true,
               'btn-soft': !currConv,
             })}
             onClick={() => navigate('/')}
+            aria-label="New conversation"
           >
             <PencilSquareIcon className="w-5 h-5" />
             New conversation
-          </div>
+          </button>
 
           {/* list of conversations */}
           {groupedConv.map((group, i) => (
-            <div key={i}>
+            <div key={i} role="group">
               {/* group name (by date) */}
               {group.title ? (
                 // we use btn class here to make sure that the padding/margin are aligned with the other items
-                <b className="btn btn-ghost btn-xs bg-none btn-disabled block text-xs text-base-content text-start px-2 mb-0 mt-6 font-bold">
+                <b
+                  className="btn btn-ghost btn-xs bg-none btn-disabled block text-xs text-base-content text-start px-2 mb-0 mt-6 font-bold"
+                  role="note"
+                  aria-description={group.title}
+                  tabIndex={0}
+                >
                   {group.title}
                 </b>
               ) : (
@@ -184,20 +212,23 @@ function ConversationItem({
 }) {
   return (
     <div
+      role="menuitem"
+      tabIndex={0}
+      aria-label={conv.name}
       className={classNames({
         'group flex flex-row btn btn-ghost justify-start items-center font-normal px-2 h-9':
           true,
         'btn-soft': isCurrConv,
       })}
     >
-      <div
+      <button
         key={conv.id}
         className="w-full overflow-hidden truncate text-start"
         onClick={onSelect}
         dir="auto"
       >
         {conv.name}
-      </div>
+      </button>
       <div className="dropdown dropdown-end h-5">
         <BtnWithTooltips
           // on mobile, we always show the ellipsis icon
@@ -211,22 +242,23 @@ function ConversationItem({
         </BtnWithTooltips>
         {/* dropdown menu */}
         <ul
+          aria-label="More options"
           tabIndex={0}
           className="dropdown-content menu bg-base-100 rounded-box z-[1] p-2 shadow"
         >
-          <li onClick={onRename}>
+          <li onClick={onRename} tabIndex={0}>
             <a>
               <PencilIcon className="w-4 h-4" />
               Rename
             </a>
           </li>
-          <li onClick={onDownload}>
+          <li onClick={onDownload} tabIndex={0}>
             <a>
               <ArrowDownTrayIcon className="w-4 h-4" />
               Download
             </a>
           </li>
-          <li className="text-error" onClick={onDelete}>
+          <li className="text-error" onClick={onDelete} tabIndex={0}>
             <a>
               <TrashIcon className="w-4 h-4" />
               Delete
diff --git a/tools/server/webui/src/index.scss b/tools/server/webui/src/index.scss
index 563e7a4610358..64460b74092e1 100644
--- a/tools/server/webui/src/index.scss
+++ b/tools/server/webui/src/index.scss
@@ -34,9 +34,6 @@ html {
   /* TODO: fix markdown table */
 }
 
-.show-on-hover {
-  @apply md:opacity-0 md:group-hover:opacity-100;
-}
 .btn-mini {
   @apply cursor-pointer;
 }
diff --git a/tools/server/webui/src/utils/common.tsx b/tools/server/webui/src/utils/common.tsx
index 372f464a2469b..7dd64508a4f5f 100644
--- a/tools/server/webui/src/utils/common.tsx
+++ b/tools/server/webui/src/utils/common.tsx
@@ -52,13 +52,20 @@ export function BtnWithTooltips({
   tooltipsContent: string;
   disabled?: boolean;
 }) {
+  // the onClick handler is on the container, so screen readers can safely ignore the inner button
+  // this prevents the label from being read twice
   return (
-    <div className="tooltip tooltip-bottom" data-tip={tooltipsContent}>
+    <div
+      className="tooltip tooltip-bottom"
+      data-tip={tooltipsContent}
+      role="button"
+      onClick={onClick}
+    >
       <button
         className={`${className ?? ''} flex items-center justify-center`}
-        onClick={onClick}
         disabled={disabled}
         onMouseLeave={onMouseLeave}
+        aria-hidden={true}
       >
         {children}
       </button>