Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,7 @@ lcov-report/

tags
.build/
build*
release
debug
build-*
!build-info.cmake
!build-info.cpp.in
!build-info.sh
Expand Down
2 changes: 2 additions & 0 deletions CMakePresets.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
{ "name": "static", "hidden": true, "cacheVariables": { "GGML_STATIC": "ON" } },
{ "name": "sycl_f16", "hidden": true, "cacheVariables": { "GGML_SYCL_F16": "ON" } },
{ "name": "vulkan", "hidden": true, "cacheVariables": { "GGML_VULKAN": "ON" } },
{ "name": "remoting_frontend", "hidden": true, "cacheVariables": { "GGML_REMOTING_FRONTEND": "ON" } },
{ "name": "remoting_backend", "hidden": true, "cacheVariables": { "GGML_REMOTING_BACKEND": "ON" } },

{
"name": "x64-windows-llvm", "hidden": true,
Expand Down
13 changes: 13 additions & 0 deletions OWNERS
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
approvers:
- kpouget
- cfergeau
- praveenkumar
- vyasgun
- gbraad
options: {}
reviewers:
- kpouget
- cfergeau
- praveenkumar
- vyasgun
- gbraad
36 changes: 36 additions & 0 deletions build.backend.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# force isatty-->true, so that $0 |& head -50 has colors ...
rm -f READY_backend FAILED_backend

echo "int isatty(int fd) { return 1; }" | gcc -O2 -fpic -shared -ldl -o /tmp/isatty.so -xc -
export LD_PRELOAD=/tmp/isatty.so

if [[ "${PERF_MODE:-}" ]]; then
FLAVOR="-prod"
else
FLAVOR=""
fi

export SDKROOT=$(xcrun --sdk macosx --show-sdk-path)

if [[ "$FLAVOR" == "-prod" ]]; then
cat <<EOF
###
### Building the prod flavor
###
EOF
fi

TARGETS="llama-run"
if [[ "${BENCH_MODE:-}" == "bench" ]]; then
TARGETS="$TARGETS llama-bench"
elif [[ "${BENCH_MODE:-}" == "perf" ]]; then
TARGETS="$TARGETS test-backend-ops"
fi

cmake --build ../build.remoting-backend$FLAVOR --parallel 8 --target $TARGETS "$@"

if [[ $? == 0 ]]; then
touch READY_backend
else
touch FAILED_backend
fi
25 changes: 25 additions & 0 deletions build.remoting.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# force isatty-->true, so that $0 |& head -50 has colors ...
rm -f READY FAILED

echo "int isatty(int fd) { return 1; }" | gcc -O2 -fpic -shared -ldl -o /tmp/isatty.so -xc -
export LD_PRELOAD=/tmp/isatty.so

TARGETS="ggml-remotingfrontend"

TARGETS="$BUILD_TARGET llama-run"
set -x
if [[ "${BENCH_MODE:-}" == "bench" ]]; then
TARGETS="$TARGETS llama-bench"
elif [[ "${BENCH_MODE:-}" == "server" ]]; then
TARGETS="$TARGETS llama-server"
elif [[ "${BENCH_MODE:-}" == "perf" ]]; then
TARGETS="$TARGETS test-backend-ops"
fi

cmake --build ../build.remoting-frontend$FLAVOR --parallel 8 --target $TARGETS "$@"

if [[ $? == 0 ]]; then
touch READY
else
touch FAILED
fi
1 change: 1 addition & 0 deletions build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
cmake --build ./build/ --parallel 8
9 changes: 9 additions & 0 deletions build.vulkan.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
rm -f READY FAILED

cmake --build ../build.vulkan --parallel 8 --target llama-run

if [[ $? == 0 ]]; then
touch READY
else
touch FAILED
fi
3 changes: 3 additions & 0 deletions ggml/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,8 @@ option(GGML_WEBGPU_CPU_PROFILE "ggml: enable WebGPU profiling (CPU)
option(GGML_WEBGPU_GPU_PROFILE "ggml: enable WebGPU profiling (GPU)" OFF)

option(GGML_ZDNN "ggml: use zDNN" OFF)
option(GGML_REMOTING_FRONTEND "ggml: use the API Remoting frontend" OFF)
option(GGML_REMOTING_BACKEND "ggml: use the API Remoting backend" OFF)
option(GGML_METAL "ggml: use Metal" ${GGML_METAL_DEFAULT})
Comment on lines +229 to 231
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

Option name mismatch breaks backend enablement.

ggml_add_backend(RemotingFrontend|RemotingBackend) expects variables GGML_REMOTINGFRONTEND and GGML_REMOTINGBACKEND (no underscore). Current options define GGML_REMOTING_FRONTEND/BACKEND. Backends won’t be included.

Apply a backward-compatible alias:

 option(GGML_ZDNN                            "ggml: use zDNN"                                  OFF)
-option(GGML_REMOTING_FRONTEND               "ggml: use the API Remoting frontend"             OFF)
-option(GGML_REMOTING_BACKEND                "ggml: use the API Remoting backend"              OFF)
+option(GGML_REMOTING_FRONTEND               "ggml: use the API Remoting frontend"             OFF)
+option(GGML_REMOTING_BACKEND                "ggml: use the API Remoting backend"              OFF)
+# aliases expected by ggml_add_backend(RemotingFrontend/RemotingBackend)
+set(GGML_REMOTINGFRONTEND ${GGML_REMOTING_FRONTEND})
+set(GGML_REMOTINGBACKEND  ${GGML_REMOTING_BACKEND})
🤖 Prompt for AI Agents
In ggml/CMakeLists.txt around lines 229 to 231, the option names use
GGML_REMOTING_FRONTEND and GGML_REMOTING_BACKEND (with underscores) but
ggml_add_backend expects GGML_REMOTINGFRONTEND and GGML_REMOTINGBACKEND (no
underscore), so backends won't be enabled; add backward-compatible aliases by
setting the expected no-underscore CMake variables to the value of the
underscored options if they are not already set (i.e., after the option(...)
lines, check if GGML_REMOTINGFRONTEND is undefined and if so set it to
GGML_REMOTING_FRONTEND, and similarly for GGML_REMOTINGBACKEND :=
GGML_REMOTING_BACKEND) so existing builds using the old names continue to work
while satisfying ggml_add_backend.

option(GGML_METAL_NDEBUG "ggml: disable Metal debugging" OFF)
option(GGML_METAL_SHADER_DEBUG "ggml: compile Metal with -fno-fast-math" OFF)
Expand Down Expand Up @@ -317,6 +319,7 @@ set(GGML_PUBLIC_HEADERS
include/ggml-sycl.h
include/ggml-vulkan.h
include/ggml-webgpu.h
include/ggml-remoting-frontend.h
include/gguf.h)

set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
Expand Down
16 changes: 16 additions & 0 deletions ggml/include/ggml-remoting-frontend.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#pragma once

#include "ggml.h"
#include "ggml-backend.h"

#ifdef __cplusplus
extern "C" {
#endif

#define GGML_REMOTING_FRONTEND_NAME "RemotingFrontend"

GGML_BACKEND_API ggml_backend_reg_t ggml_backend_remoting_frontend_reg();

#ifdef __cplusplus
}
#endif
2 changes: 2 additions & 0 deletions ggml/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,8 @@ ggml_add_backend(WebGPU)
ggml_add_backend(zDNN)
ggml_add_backend(OpenCL)
ggml_add_backend(Hexagon)
ggml_add_backend(RemotingFrontend)
ggml_add_backend(RemotingBackend)

Comment on lines +409 to 411
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

Backend toggles rely on GGML_REMOTINGFRONTEND/REMOTINGBACKEND.

Given the top-level options currently use underscores, without the alias fix these will never be enabled. Ensure the alias or rename is applied in ggml/CMakeLists.txt.


I'll verify this review comment by inspecting the CMakeLists.txt files and the backend toggle mechanism.


---

🏁 Script executed:

```shell
#!/bin/bash
# Search for top-level backend options in ggml/CMakeLists.txt
echo "=== Top-level Remoting options in ggml/CMakeLists.txt ==="
rg -n -i "remoting" ggml/CMakeLists.txt -A2 -B2

echo -e "\n=== ggml_add_backend function definition ==="
rg -n "ggml_add_backend" ggml/CMakeLists.txt -A10 -B2

echo -e "\n=== Context around lines 409-411 in ggml/src/CMakeLists.txt ==="
sed -n '400,420p' ggml/src/CMakeLists.txt

Length of output: 1499


Now I need to find the ggml_add_backend() function definition to understand how it maps backend names to CMake options.


---

🏁 Script executed:

```shell
#!/bin/bash
# Search for ggml_add_backend definition more broadly
echo "=== Searching for ggml_add_backend definition ==="
rg -n "function\s*\(\s*ggml_add_backend\s*\)|macro\s*\(\s*ggml_add_backend" ggml/ --type cmake

echo -e "\n=== Alternative: macro or function definition lines ==="
rg -n "ggml_add_backend" ggml/CMakeLists.txt ggml/src/CMakeLists.txt -B5 | head -50

echo -e "\n=== Check if there's a cmake utilities or macro file ==="
find ggml -name "*.cmake" -type f

Length of output: 2213


I need to see the full implementation of ggml_add_backend() to understand how it converts the backend name argument to a CMake option.


---

🏁 Script executed:

```shell
#!/bin/bash
# Get the full ggml_add_backend function implementation
echo "=== ggml_add_backend function implementation ==="
sed -n '274,320p' ggml/src/CMakeLists.txt

Length of output: 1634


Now I need to verify if there's already an alias or rename mechanism in ggml/CMakeLists.txt that handles this mismatch.


---

🏁 Script executed:

```shell
#!/bin/bash
# Search for any alias, set, or mapping of RemotingFrontend/RemotingBackend options
echo "=== Checking for alias or rename in ggml/CMakeLists.txt ==="
rg -n -i "remotingfrontend|remotingbackend|REMOTING" ggml/CMakeLists.txt -A2 -B2

echo -e "\n=== Look for any set() statements that might alias variables ==="
rg -n "set\s*\(\s*GGML_REMOTING" ggml/CMakeLists.txt

echo -e "\n=== Check if options are conditionally set elsewhere ==="
rg -n "GGML_REMOTING" ggml/CMakeLists.txt

Length of output: 1060


Add alias or rename for GGML_REMOTING_ options in ggml/CMakeLists.txt to enable RemotingFrontend and RemotingBackend backends.*

The review comment is accurate. The ggml_add_backend() function converts backend names to uppercase with prefix: RemotingFrontendGGML_REMOTINGFRONTEND. However, the top-level options are defined as GGML_REMOTING_FRONTEND and GGML_REMOTING_BACKEND (with underscores). Without an alias mapping these variable names, the condition in ggml_add_backend() will never be true, and the backends cannot be enabled.

The following must be added to ggml/CMakeLists.txt:

  • Either rename options to remove underscores: GGML_REMOTINGFRONTEND and GGML_REMOTINGBACKEND
  • Or add aliases mapping the underscored names to the non-underscored names that ggml_add_backend() expects
🤖 Prompt for AI Agents
In ggml/src/CMakeLists.txt around lines 409 to 411, the ggml_add_backend() call
uppercases names without underscores (e.g., RemotingFrontend ->
GGML_REMOTINGFRONTEND) but the options are currently defined as
GGML_REMOTING_FRONTEND/GGML_REMOTING_BACKEND, so the checks never match; fix by
either renaming the option variables to GGML_REMOTINGFRONTEND and
GGML_REMOTINGBACKEND or add alias mappings that set
GGML_REMOTINGFRONTEND/GGML_REMOTINGBACKEND from
GGML_REMOTING_FRONTEND/GGML_REMOTING_BACKEND (preserving existing option names)
before ggml_add_backend() is invoked so the backend enablement logic sees the
expected variable names.

foreach (target ggml-base ggml)
target_include_directories(${target} PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include> $<INSTALL_INTERFACE:include>)
Expand Down
9 changes: 9 additions & 0 deletions ggml/src/ggml-backend-reg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,10 @@
#include "ggml-cann.h"
#endif

#ifdef GGML_USE_REMOTINGFRONTEND
#include "ggml-remoting-frontend.h"
#endif

// disable C++17 deprecation warning for std::codecvt_utf8
#if defined(__clang__)
# pragma clang diagnostic push
Expand Down Expand Up @@ -200,6 +204,10 @@ struct ggml_backend_registry {
#ifdef GGML_USE_ZDNN
register_backend(ggml_backend_zdnn_reg());
#endif
#ifdef GGML_USE_REMOTINGFRONTEND
register_backend(ggml_backend_remoting_frontend_reg());
#endif

#ifdef GGML_USE_OPENCL
register_backend(ggml_backend_opencl_reg());
#endif
Expand Down Expand Up @@ -604,6 +612,7 @@ void ggml_backend_load_all_from_path(const char * dir_path) {
ggml_backend_load_best("rpc", silent, dir_path);
ggml_backend_load_best("sycl", silent, dir_path);
ggml_backend_load_best("vulkan", silent, dir_path);
ggml_backend_load_best("remoting_frontend", silent, dir_path);
ggml_backend_load_best("opencl", silent, dir_path);
Comment on lines +615 to 616
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

Dynamic loader basename likely mismatches built target.

This searches for libggml-remoting_frontend*.so, but targets/scripts use “ggml-remotingfrontend” (no underscore). Rename to ensure discovery.

Apply this diff:

-    ggml_backend_load_best("remoting_frontend", silent, dir_path);
+    ggml_backend_load_best("remotingfrontend", silent, dir_path);
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
ggml_backend_load_best("remoting_frontend", silent, dir_path);
ggml_backend_load_best("opencl", silent, dir_path);
ggml_backend_load_best("remotingfrontend", silent, dir_path);
ggml_backend_load_best("opencl", silent, dir_path);
🤖 Prompt for AI Agents
In ggml/src/ggml-backend-reg.cpp around lines 615-616, the dynamic loader is
searching for the basename "remoting_frontend" which doesn't match the built
target name "remotingfrontend"; change the backend name passed to
ggml_backend_load_best from "remoting_frontend" to "remotingfrontend" so the
loader will correctly find libggml-remotingfrontend*.so (leave the "opencl" call
unchanged).

ggml_backend_load_best("hexagon", silent, dir_path);
ggml_backend_load_best("musa", silent, dir_path);
Expand Down
1 change: 1 addition & 0 deletions ggml/src/ggml-metal/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ ggml_add_backend_library(ggml-metal
ggml-metal-common.cpp
ggml-metal-context.m
ggml-metal-ops.cpp
ggml-metal-remoting.cpp
)
Comment on lines +14 to 15
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Gate new Metal remoting source on the remoting backend option.

Unconditionally adding ggml-metal-remoting.cpp can break builds when remoting is off or the file isn’t present. Guard it with the remoting backend toggle.

Apply this CMake diff:

 ggml_add_backend_library(ggml-metal
                          ggml-metal.cpp
                          ggml-metal-device.m
                          ggml-metal-device.cpp
                          ggml-metal-common.cpp
                          ggml-metal-context.m
                          ggml-metal-ops.cpp
-                         ggml-metal-remoting.cpp
+                         $<$<BOOL:${GGML_REMOTING_BACKEND}>:ggml-metal-remoting.cpp>
                         )
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
ggml-metal-remoting.cpp
)
ggml_add_backend_library(ggml-metal
ggml-metal.cpp
ggml-metal-device.m
ggml-metal-device.cpp
ggml-metal-common.cpp
ggml-metal-context.m
ggml-metal-ops.cpp
$<$<BOOL:${GGML_REMOTING_BACKEND}>:ggml-metal-remoting.cpp>
)
🤖 Prompt for AI Agents
In ggml/src/ggml-metal/CMakeLists.txt around lines 14-15, the build
unconditionally lists ggml-metal-remoting.cpp which breaks builds when remoting
is disabled or the file is absent; gate adding this source behind the remoting
backend option (check the existing CMake option/variable name used for remoting
in the project) and only append ggml-metal-remoting.cpp to the target when that
option is enabled (optionally also test for file existence before adding to
avoid configure-time errors).


target_link_libraries(ggml-metal PRIVATE
Expand Down
4 changes: 2 additions & 2 deletions ggml/src/ggml-metal/ggml-metal-context.m
Original file line number Diff line number Diff line change
Expand Up @@ -514,13 +514,13 @@ enum ggml_status ggml_metal_graph_compute(ggml_metal_t ctx, struct ggml_cgraph *
}

void ggml_metal_graph_optimize(ggml_metal_t ctx, struct ggml_cgraph * gf) {
//const int64_t t_start = ggml_time_us();
const int64_t t_start = ggml_time_us();

if (ctx->use_graph_optimize) {
ggml_graph_optimize(gf);
}

//printf("%s: graph optimize took %.3f ms\n", __func__, (ggml_time_us() - t_start) / 1000.0);
printf("%s: graph optimize took %.3f ms\n", __func__, (ggml_time_us() - t_start) / 1000.0);
}
Comment on lines +517 to 524
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion | 🟠 Major

Use logging macros instead of printf.

While timing instrumentation is valuable, the code uses printf instead of the codebase's standard logging macros, which is inconsistent with logging practices elsewhere in the file.

Apply this diff to use the standard logging infrastructure:

 void ggml_metal_graph_optimize(ggml_metal_t ctx, struct ggml_cgraph * gf) {
     const int64_t t_start = ggml_time_us();
 
     if (ctx->use_graph_optimize) {
         ggml_graph_optimize(gf);
     }
 
-    printf("%s: graph optimize took %.3f ms\n", __func__, (ggml_time_us() - t_start) / 1000.0);
+    GGML_LOG_INFO("%s: graph optimize took %.3f ms\n", __func__, (ggml_time_us() - t_start) / 1000.0);
 }
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
const int64_t t_start = ggml_time_us();
if (ctx->use_graph_optimize) {
ggml_graph_optimize(gf);
}
//printf("%s: graph optimize took %.3f ms\n", __func__, (ggml_time_us() - t_start) / 1000.0);
printf("%s: graph optimize took %.3f ms\n", __func__, (ggml_time_us() - t_start) / 1000.0);
}
const int64_t t_start = ggml_time_us();
if (ctx->use_graph_optimize) {
ggml_graph_optimize(gf);
}
GGML_LOG_INFO("%s: graph optimize took %.3f ms\n", __func__, (ggml_time_us() - t_start) / 1000.0);
}
🤖 Prompt for AI Agents
In ggml/src/ggml-metal/ggml-metal-context.m around lines 517 to 524, replace the
printf call with the codebase's standard logging macro used elsewhere in this
file (preserve the message content: function name and elapsed ms), e.g. use the
INFO/DEBUG logging macro (matching the file's existing logging style) and format
the elapsed time the same way; ensure any required logging header is included if
not already present and remove the printf call.


void ggml_metal_set_n_cb(ggml_metal_t ctx, int n_cb) {
Expand Down
7 changes: 4 additions & 3 deletions ggml/src/ggml-metal/ggml-metal-device.m
Original file line number Diff line number Diff line change
Expand Up @@ -333,9 +333,9 @@ ggml_metal_pipeline_t ggml_metal_library_compile_pipeline(ggml_metal_library_t l
NSError * error = nil;

NSString * base_func = [NSString stringWithUTF8String:base];

#if 0
GGML_LOG_DEBUG("%s: compiling pipeline: base = '%s', name = '%s'\n", __func__, base, name);

#endif
id<MTLFunction> mtl_function;
if (!cv) {
mtl_function = [lib->obj newFunctionWithName:base_func];
Expand All @@ -358,10 +358,11 @@ ggml_metal_pipeline_t ggml_metal_library_compile_pipeline(ggml_metal_library_t l
ggml_metal_pipelines_add(lib->pipelines, name, res);

[mtl_function release];

#if 0
GGML_LOG_DEBUG("%s: loaded %-40s %16p | th_max = %4d | th_width = %4d\n", __func__, name, (void *) res->obj,
(int) res->obj.maxTotalThreadsPerThreadgroup,
(int) res->obj.threadExecutionWidth);
#endif
}

ggml_critical_section_end();
Expand Down
28 changes: 28 additions & 0 deletions ggml/src/ggml-metal/ggml-metal-remoting.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#include "ggml-backend.h"
#include "ggml-backend-impl.h"
#include "ggml-impl.h"

#include "ggml-metal-device.h"
#include "ggml-metal-impl.h"
#include "ggml-metal-context.h"

extern "C" {
GGML_BACKEND_API void ggml_backend_metal_get_device_context(ggml_backend_dev_t dev,
bool *has_simdgroup_mm,
bool *has_simdgroup_reduction,
bool *use_bfloat);

GGML_BACKEND_API void
ggml_backend_metal_get_device_context(ggml_backend_dev_t dev,
bool *has_simdgroup_mm,
bool *has_simdgroup_reduction,
bool *has_bfloat) {
Comment on lines +10 to +19
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Remove duplicate function declaration with inconsistent parameter naming.

Lines 10-13 declare the function with parameter use_bfloat, while lines 15-19 declare and define it with has_bfloat. The first declaration is redundant and the inconsistent naming can cause confusion.

Apply this diff to remove the duplicate declaration:

 extern "C" {
-  GGML_BACKEND_API void ggml_backend_metal_get_device_context(ggml_backend_dev_t dev,
-							      bool *has_simdgroup_mm,
-							      bool *has_simdgroup_reduction,
-							      bool *use_bfloat);
-  
   GGML_BACKEND_API void
   ggml_backend_metal_get_device_context(ggml_backend_dev_t dev,
🤖 Prompt for AI Agents
In ggml/src/ggml-metal/ggml-metal-remoting.cpp around lines 10 to 19, there is a
redundant forward declaration of ggml_backend_metal_get_device_context using a
different parameter name (use_bfloat) than the subsequent definition
(has_bfloat); remove the duplicate declaration (lines 10–13) so only the single
definition remains and ensure the parameter name in the remaining
declaration/definition is consistent (has_bfloat).

ggml_metal_device_t dev_ctx = (ggml_metal_device_t)dev->context;

const struct ggml_metal_device_props *props = ggml_metal_device_get_props(dev_ctx);

*has_bfloat = props->has_bfloat;
*has_simdgroup_reduction = props->has_simdgroup_reduction;
*has_simdgroup_mm = props->has_simdgroup_mm;
}
}
21 changes: 21 additions & 0 deletions ggml/src/ggml-remotingbackend/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
cmake_minimum_required(VERSION 3.19)
cmake_policy(SET CMP0114 NEW)

message(STATUS "Enable API Remoting backend")

ggml_add_backend_library(ggml-remotingbackend
backend.cpp
backend-dispatched.cpp
backend-dispatched-backend.cpp
backend-dispatched-device.cpp
backend-dispatched-buffer.cpp
backend-dispatched-buffer-type.cpp
backend-dispatched-metal.cpp
backend-utils.cpp
shared/api_remoting.h
shared/apir_backend.h
shared/venus_cs.h
venus_cs_ggml-rpc-back.cpp
)

target_compile_options(ggml-remotingbackend PRIVATE -std=c++20)
15 changes: 15 additions & 0 deletions ggml/src/ggml-remotingbackend/backend-convert.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#include "shared/apir_backend.h"
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Add include guard to prevent multiple inclusion.

The header lacks an include guard (#pragma once or traditional guards), which could lead to redefinition errors if included multiple times.

Apply this diff to add a guard:

+#pragma once
+
 #include "shared/apir_backend.h"
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
#include "shared/apir_backend.h"
#pragma once
#include "shared/apir_backend.h"
🤖 Prompt for AI Agents
In ggml/src/ggml-remotingbackend/backend-convert.h around line 1, the header is
missing an include guard which can cause redefinition errors when included
multiple times; add a header guard by either inserting a top-level `#pragma
once` or a traditional macro guard (`#ifndef ... / #define ... / #endif`) using
a unique macro name (e.g., GGML_REMOTINGBACKEND_BACKEND_CONVERT_H) to wrap the
file contents so the header is only processed once per translation unit.


#define BUFFER_TO_HOST_HANDLE(name) ggml_buffer_to_apir_handle(name)

static inline apir_buffer_host_handle_t
ggml_buffer_to_apir_handle(ggml_backend_buffer_t buffer) {
// in the backend, the buffer handle is the buffer pointer
return (apir_buffer_host_handle_t) buffer;
}

static inline apir_buffer_type_host_handle_t
ggml_buffer_type_to_apir_handle(ggml_backend_buffer_type_t buft) {
// in the backend, the buffer handle is the buffer pointer
return (apir_buffer_type_host_handle_t) buft;
}
Comment on lines +5 to +15
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Update copy-pasted comment for buffer type conversion.

The comment at line 13 is identical to line 7 but should be updated to reflect that it's converting a buffer type handle, not a buffer handle.

Apply this diff to fix the comment:

 static inline apir_buffer_type_host_handle_t
 ggml_buffer_type_to_apir_handle(ggml_backend_buffer_type_t buft) {
-  // in the backend, the buffer handle is the buffer pointer
+  // in the backend, the buffer type handle is the buffer type pointer
   return (apir_buffer_type_host_handle_t) buft;
 }
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
static inline apir_buffer_host_handle_t
ggml_buffer_to_apir_handle(ggml_backend_buffer_t buffer) {
// in the backend, the buffer handle is the buffer pointer
return (apir_buffer_host_handle_t) buffer;
}
static inline apir_buffer_type_host_handle_t
ggml_buffer_type_to_apir_handle(ggml_backend_buffer_type_t buft) {
// in the backend, the buffer handle is the buffer pointer
return (apir_buffer_type_host_handle_t) buft;
}
static inline apir_buffer_host_handle_t
ggml_buffer_to_apir_handle(ggml_backend_buffer_t buffer) {
// in the backend, the buffer handle is the buffer pointer
return (apir_buffer_host_handle_t) buffer;
}
static inline apir_buffer_type_host_handle_t
ggml_buffer_type_to_apir_handle(ggml_backend_buffer_type_t buft) {
// in the backend, the buffer type handle is the buffer type pointer
return (apir_buffer_type_host_handle_t) buft;
}
🤖 Prompt for AI Agents
In ggml/src/ggml-remotingbackend/backend-convert.h around lines 5 to 15, the
comment above ggml_buffer_type_to_apir_handle incorrectly duplicates the
buffer-pointer comment from ggml_buffer_to_apir_handle; update that comment to
state that this function converts a buffer type handle (i.e., the buffer type
pointer) to an apir_buffer_type_host_handle_t so it accurately describes
buffer-type conversion.

58 changes: 58 additions & 0 deletions ggml/src/ggml-remotingbackend/backend-dispatched-backend.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#include <cstdint>
#include "backend-internal.h"
#include "backend-dispatched.h"

#include "ggml-impl.h"
#include "ggml-backend-impl.h"
#include "ggml-backend.h"

#include "shared/apir_backend.h"

struct timer_data graph_compute_timer = {0, 0, 0, "compute_timer"};

uint32_t
backend_graph_compute(struct vn_cs_encoder *enc, struct vn_cs_decoder *dec, struct virgl_apir_context *ctx) {
UNUSED(ctx);
UNUSED(enc);

start_timer(&graph_compute_timer);

uint32_t shmem_res_id;
vn_decode_virtgpu_shmem_res_id(dec, &shmem_res_id);

const void *shmem_data = ctx->iface.get_shmem_ptr(ctx->virgl_ctx, shmem_res_id);
if (!shmem_data) {
FATAL("Couldn't get the shmem addr from virgl :/");
}
size_t cgraph_size;
vn_decode_size_t(dec, &cgraph_size);

struct vn_cs_decoder secondary_dec = vn_cs_new_decoder((const char *) shmem_data, cgraph_size);

ggml_cgraph *cgraph = vn_decode_ggml_cgraph(&secondary_dec, cgraph_size);

ggml_status status;
#if APIR_BACKEND_CHECK_SUPPORTS_OP == 1
for (int idx = 0; idx < cgraph->n_nodes; idx++) {
ggml_tensor *op = ggml_graph_node(cgraph, idx);
if (dev->iface.supports_op(dev, op)) {
continue;
}
ERROR("Graph node %d (%s) not supported by the backend :/", idx, ggml_op_desc(op));

status = GGML_STATUS_ABORTED;
vn_encode_ggml_status(enc, &status);

stop_timer(&graph_compute_timer);
return 0;
}
#endif
status = bck->iface.graph_compute(bck, cgraph);
bck->iface.synchronize(bck);

vn_encode_ggml_status(enc, &status);

stop_timer(&graph_compute_timer);

return 0;
}
Loading