Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
735 commits
Select commit Hold shift + click to select a range
baa9255
llama : merge conts and reshapes and remove unnecessary cont (#15380)
CISC Aug 18, 2025
f0c541d
scripts : update sync scripts
ggerganov Aug 18, 2025
60212f1
sync : ggml
ggerganov Aug 18, 2025
6d7f111
codeowners : remove mmv.*
ggerganov Aug 18, 2025
f08c4c0
mtmd : clean up clip_n_output_tokens (#15391)
ngxson Aug 18, 2025
f0d3c74
batched-bench : use rand tokens (#15398)
ggerganov Aug 19, 2025
9d262f4
server : remove swa_full warning (#15399)
ggerganov Aug 19, 2025
e9288e8
chat : clarify the meaning of reasoning_format (#15408)
ngxson Aug 19, 2025
6424594
ggml-cpu: add mxfp4 VSX intrinsics for Power9+ (ppc64le) hardware (#1…
mgiessing Aug 19, 2025
67f09a3
musa: handle __hgt2_mask, available starting from MUSA SDK rc4.3.0 (#…
yeahdongcn Aug 19, 2025
a6d3cfe
CANN: optimize rope operator (#15335)
YangShuai52 Aug 19, 2025
d2fcd91
server : disable context shift by default (#15416)
ggerganov Aug 19, 2025
1e19f5d
common : Add top-nsigma sampler to help globally (#15428)
gcp Aug 19, 2025
9ef6b0b
model : add gpt-oss type strings (#15424)
ggerganov Aug 19, 2025
fb22dd0
opencl: mark `argsort` unsupported if cols exceed workgroup limit (#1…
lhez Aug 19, 2025
a094f38
musa: fix build warnings (#15258)
yeahdongcn Aug 20, 2025
2f37014
lookahead : add sample command to readme (#15447)
ggerganov Aug 20, 2025
37f10f9
make : remove make in favor of CMake (#15449)
danbev Aug 20, 2025
1a99c2d
cmake : fix target include directories (#15450)
xiaobing318 Aug 20, 2025
ec5ab1a
common : fix context shift help message (#15448)
DamonFool Aug 20, 2025
657b8a7
chat: handle gpt-oss return/end token inconsistency (#15421)
danbev Aug 20, 2025
fec9519
vulkan: shorten pipeline name strings (#15431)
jeffbolznv Aug 20, 2025
7a6e91a
CUDA: replace GGML_CUDA_F16 with CUDA arch checks (#15433)
JohannesGaessler Aug 20, 2025
13aeb7a
CUDA: refactor FA support/selection code (#15454)
JohannesGaessler Aug 20, 2025
1bc664a
server: fix OpenAI API compatibility for usage statistics in chat str…
TeoZosa Aug 20, 2025
5682a37
sched : copy only the used experts when offloading prompt processing …
slaren Aug 20, 2025
8ad038c
musa: add GGML_UNUSED_VARS (#15446)
yeahdongcn Aug 21, 2025
29f538a
examples : remove references to `make` in examples [no ci] (#15457)
danbev Aug 21, 2025
1b0db8f
server : fix webui (#15462)
stduhpf Aug 21, 2025
945e1f1
ggml : fix condition of im2col on Metal backend (#15460)
ngxson Aug 21, 2025
2f3dbff
common : fix incorrect print of non-ascii characters in the logging (…
DamonFool Aug 21, 2025
b2caf67
convert : make Mistral community chat templates optional via paramete…
juliendenize Aug 21, 2025
245be73
ci : add copilot-instructions.md (#15286)
Copilot Aug 21, 2025
b108e42
ci : fix -Werror=return-type in clip.cpp so ci/run.sh can run without…
michaelgiba Aug 21, 2025
2758fa1
examples : add model conversion tool/example (#15455)
danbev Aug 21, 2025
30649ca
ci : continue file download with wget (#15471)
ggerganov Aug 21, 2025
029bb39
ci : enable RVV1.0 native build (#15386)
alitariq4589 Aug 21, 2025
ad294df
examples : install torch-cpu for model conversion tool/example (#15475)
DamonFool Aug 21, 2025
715a6db
kv-cache : drop the "unified" prefix (#15467)
ggerganov Aug 21, 2025
9ad5e60
examples : fix some typos in examples/model-conversion/README.md (#15…
DamonFool Aug 21, 2025
96452a3
vulkan: Reuse conversion results in prealloc_y (#15410)
jeffbolznv Aug 21, 2025
20c2dac
vulkan: add exp operation (#15456)
ddwkim Aug 21, 2025
97ae596
vulkan : support conv_2d_dw with f16 weights (#15392)
Acly Aug 21, 2025
3f196be
graph : remove build_attn_with_sinks overload (#15469)
ggerganov Aug 21, 2025
cd36b5e
llama : remove deprecated llama_kv_self API (#15472)
ggerganov Aug 21, 2025
54a241f
sched : fix possible use of wrong ids tensor when offloading moe prom…
slaren Aug 21, 2025
a0f98dd
CANN: Optimize RMS_NORM using cache (#15419)
noemotiovon Aug 22, 2025
e288693
readme : model : mtdm : lfm2 improvements (#15476)
tdakhran Aug 22, 2025
4afb0a7
server : Support multimodal completion and embeddings prompts in JSON…
65a Aug 22, 2025
ad5c975
ggml-cpu: Support Q5_0 and Q5_1 on s390x (#15486)
taronaeo Aug 22, 2025
9ebebef
llama : remove KV cache defragmentation logic (#15473)
ggerganov Aug 22, 2025
b1ab918
cuda : add Pad Reflect 1D support (#14659)
YavorGIvanov Aug 22, 2025
92f7f0a
ggml: add `conv3d` op (#15182)
rmatif Aug 22, 2025
32732f2
model : gpt-oss add response_format support (#15494)
aldehir Aug 22, 2025
4536363
ggml WebGPU: add support for quantization types (#15440)
reeselevine Aug 22, 2025
e92734d
test-opt: allow slight inprecision (#15503)
JohannesGaessler Aug 22, 2025
330c3d2
vulkan: optimize mul_mat_id loading row ids into shared memory (#15427)
jeffbolznv Aug 23, 2025
0a9b43e
vulkan : support ggml_mean (#15393)
Acly Aug 23, 2025
b55f06e
vulkan.Dockerfile: install vulkan SDK using tarball (#15282)
yeahdongcn Aug 23, 2025
289bf41
vulkan: Rewrite synchronization to allow some overlap between nodes (…
jeffbolznv Aug 23, 2025
21dc4dd
chat : fix debug build assertion in trim function (#15520)
LaffeyNyaa Aug 23, 2025
9ef5369
scripts: fix compare-llama-bench.py (#15521)
JohannesGaessler Aug 23, 2025
b1afcab
model : add support for Seed-OSS (#15490)
pwilkin Aug 23, 2025
611f419
vulkan: optimize rms_norm, and allow the work to spread across multip…
jeffbolznv Aug 23, 2025
710dfc4
CUDA: fix half2 -> half conversion for HIP (#15529)
JohannesGaessler Aug 23, 2025
e78cf0d
vulkan: workaround MoltenVK compile failure in multi_add (#15506)
jeffbolznv Aug 24, 2025
a9c6ffc
vulkan: enable Conv2D for Apple after MoltenVK fixed the bug (#15526)
0cc4m Aug 24, 2025
c9a24fb
vulkan: Support FA with any multiple of 8 head sizes (#15537)
jeffbolznv Aug 24, 2025
b730706
kv-cache : support layer reuse (#15504)
ggerganov Aug 24, 2025
043fb27
vulkan: apply MUL_MAT_ID subgroup optimization to non-coopmat devices…
0cc4m Aug 24, 2025
c247d06
CANN: ROPE cache sin/cos repeat (#15501)
noemotiovon Aug 25, 2025
7da9fed
convert : support interns1-mini (#15412)
RunningLeon Aug 25, 2025
b0ba31f
metal : add FA kernels for HS=40 (#15559)
ggerganov Aug 25, 2025
0d5a470
convert : update Ernie 4.5 dense architecture name (#15555)
ownia Aug 25, 2025
6b64f74
batched-bench : fix unified KV cache handling + pp timing (#15562)
ggerganov Aug 25, 2025
5a6bc6b
model-conversion : add model card template for embeddings [no ci] (#1…
danbev Aug 25, 2025
dfd9b5f
model-conversion : set pooling type to none in logits.cpp (#15564)
danbev Aug 25, 2025
5eff6ec
CUDA: MoE helper in device code, better tile sizes (#15525)
JohannesGaessler Aug 25, 2025
111f8d0
metal: fix regression when no metal devices are present (#15531)
booxter Aug 25, 2025
886b97a
tests: Generate unique input values for count_equal (#15487)
jeffbolznv Aug 25, 2025
4d917cd
vulkan: fix min subgroup 16 condition for mmid subgroup optimization …
0cc4m Aug 25, 2025
f7207b0
opencl: fix support ops condition for `rms_norm` (#15560)
lhez Aug 25, 2025
74f52f7
CUDA: Accelerate MXFP4 table lookup using `__byte_perm` (#15451)
Qeeweew Aug 25, 2025
34bdbbd
vulkan: Remove splitting for mul_mat_id (#15568)
jeffbolznv Aug 26, 2025
4c37636
Add a warning for special devices (#15563)
pt13762104 Aug 26, 2025
0fd90db
metal : remove contiguous assertion for src0 in IM2COL (#15577)
CISC Aug 26, 2025
39842a7
gguf-py : remove erroneous FFN_GATE entry (#15583)
CISC Aug 26, 2025
c4e9239
model : support MiniCPM-V 4.5 (#15575)
tc-mb Aug 26, 2025
1d8d83d
metal : improve `MUL_MAT_ID` (#15541)
ggerganov Aug 26, 2025
85cc1ae
context : print graph stats for memory-less contexts (#15586)
ggerganov Aug 26, 2025
79a5462
mtmd : support Kimi VL model (#15458)
ngxson Aug 26, 2025
b3964c1
metal : optimize FA vec for large sequences and BS <= 8 (#15566)
ggerganov Aug 26, 2025
8f5afa9
CUDA: return -1 for nonexistent compiled arch (#15587)
JohannesGaessler Aug 26, 2025
62cef26
model-conversion : add qat-q4 quantization targets (#15588)
danbev Aug 26, 2025
0373486
graph : fix assert in memory-less build_attn (#15590)
ggerganov Aug 26, 2025
a6a58d6
llamafile: PowerPC Sgemm Optimization (#15558)
shalinib-ibm Aug 26, 2025
44b1efa
tests: add performance test for mul mat id (#15543)
netrunnereve Aug 26, 2025
8ce3ff1
mtmd : fix mtmd ios build (#15579)
fidoriel Aug 26, 2025
8b69686
SYCL: fix rms_norm_mul_add for tensor dim not a multiple of sg_size (…
qnixsynapse Aug 26, 2025
bcbddcd
tests : fix test-opt with GGML_BACKEND_DL (#15599)
slaren Aug 26, 2025
86076f9
OpenCL: add fused group_norm/norm, mul, add (#15314)
rmatif Aug 27, 2025
fcca218
common : add -m to bash completion for --model [no ci] (#15591)
danbev Aug 27, 2025
1cf123a
ggml-cpu : add basic RVV support for vector f32 ops (#15057)
xctan Aug 27, 2025
1e74897
CANN: refactor mask handling and improve performance in FA (#15561)
noemotiovon Aug 27, 2025
5094941
src: reduce the logging
kpouget Apr 9, 2025
23409dc
Add helper scripts
kpouget Apr 9, 2025
1183af9
build-system: integrate the Remoting Frontend backend build
kpouget Apr 9, 2025
7c8b8fb
ggml: ggml-remotingfrontend: stubs of a new backend
kpouget Apr 9, 2025
f39b99f
.github: remove
kpouget Apr 9, 2025
226c539
CMakeLists: add the ggml files and include Mesa files
kpouget Apr 10, 2025
5336ac9
ggml-*: move the ggml interfaces to a dedicated file
kpouget Apr 10, 2025
88d94c3
run.vulkan.sh: allow running with GDB
kpouget Apr 10, 2025
b0ce82d
virtgpu: start integrating virt-gpu code
kpouget Apr 10, 2025
7ccd1b1
virtgpu: allocate a shared page with the host
kpouget Apr 10, 2025
a864dab
run.remoting: cleanup the screen before running
kpouget Apr 11, 2025
1288908
Reduce the verbose logging
kpouget Apr 11, 2025
2d20b2d
Trace the executionpath
kpouget May 2, 2025
f002dcc
virtgpu: abort early
kpouget May 2, 2025
b497545
virtgpu: add the virtgpu_submit to kick a command on the host
kpouget May 2, 2025
a2ada88
podman_compile.sh: add compile helper
kpouget May 2, 2025
354e55d
virtgpu: move the logging functions to virtgpu-utils
kpouget May 5, 2025
44e62f0
virtgpu: use venus CS functions
kpouget May 5, 2025
cd9f3e9
virtgpu: make more generic
kpouget May 6, 2025
3c98263
ggml-remotingfrontend: fix and make more generic
kpouget May 6, 2025
d286d69
prepare.backend.sh: helper script
kpouget May 6, 2025
ad0c43a
build.backend.sh: helper script
kpouget May 6, 2025
2c63dbd
build: integrate the remoting-backend skeleton
kpouget May 6, 2025
8679435
remoting: start using shared header files
kpouget May 6, 2025
9c66722
remotingbackend/CMakeLists: add header dependencies
kpouget May 6, 2025
790d84b
ggml-remotingbackend: add skeleton of argument passing
kpouget May 6, 2025
646cddd
remotingfrontend: improve the typing
kpouget May 6, 2025
f6b8a40
podman_compile: delete the pod before compiling
kpouget May 6, 2025
134ff4b
virtgpu-utils: add WARNING
kpouget May 7, 2025
326a80d
virtgpu: split the remote call into prepare/call/finish
kpouget May 7, 2025
0e2ae43
ggml-backend-reg: reindent
kpouget May 7, 2025
567e2a8
move thks_bye() to virtgpu-utils
kpouget May 7, 2025
a0acf8d
virtgpu: remove forward call wip code
kpouget May 7, 2025
54327f3
ggml-remotingfrontend: build the apir framework
kpouget May 7, 2025
29527a2
ggml-remotingbackend: build the apir framework
kpouget May 7, 2025
48b2328
Add support for device name and description
kpouget May 13, 2025
faf7789
ggml: src: ggml-metal/ggml-metal: make less verbose
kpouget May 13, 2025
584401d
ggml-remotingbackend: include the ggml backend initialization
kpouget May 13, 2025
6d10bf0
remoting: include device_get_type and device_get_memory
kpouget May 13, 2025
cec6854
ggml: src: ggml-remotingbackend/backend: make less verbose
kpouget May 13, 2025
4830b28
shared: venus_cs: add more CS functions
kpouget May 13, 2025
fbb510a
ggml: src: ggml-remotingfrontend/ggml-remoting: make the NOT_IMPLEMEN…
kpouget May 13, 2025
081d7b9
ggml: src: ggml-remotingfrontend/virtgpu-forward: make less verbose
kpouget May 13, 2025
5683479
remoting: correct the device_get_* name order
kpouget May 13, 2025
473a70c
remoting: add support for device_supports_op
kpouget May 13, 2025
a966f26
ggml/src/ggml-remotingbackend/shared/venus_cs.h: clearer message when…
kpouget May 13, 2025
35ad53c
ggml/src/ggml-remotingfrontend/virtgpu.cpp: make less verbose
kpouget May 13, 2025
63c5c52
remoting: reindent and mark functions as NOT_IMPLEMENTED
kpouget May 13, 2025
c6db63e
Add buffer-type support
kpouget May 14, 2025
17009ba
Keep working
kpouget May 14, 2025
ab6524f
Keep working on buffer types and buffers
kpouget May 15, 2025
8f06b44
implemnt alloc_buffer and get_base
kpouget May 15, 2025
6e2adf6
buffer: clean ups
kpouget May 15, 2025
0ffa4bf
Keep working on buffers
kpouget May 15, 2025
1e484a1
build.backend: build llama-run
kpouget May 16, 2025
dbe47f8
ggml: src: ggml-remotingbackend/shared/venus_cs: fix memory corruptio…
kpouget May 16, 2025
2e49088
ggml: src: ggml-remotingfrontend/ggml-backend-device: handcode the caps
kpouget May 16, 2025
a088b6c
remoting: implement buffer_set_tensor
kpouget May 16, 2025
72e15e7
remoting: improve
kpouget May 16, 2025
959179f
remotingbackend: accept the virgl context argument
kpouget May 19, 2025
87491f8
remotingfrontend: implement buffer_set_tensor with a guest shared page
kpouget May 19, 2025
c638bd1
ggml: src: ggml-remotingbackend/backend-dispatched-buffer: implement …
kpouget May 19, 2025
8d0ef1c
remotingfrontend: add more STOP_HERE calls
kpouget May 19, 2025
cddfa5d
remotingfrontend: add IMPLEMENTED_ONCE
kpouget May 19, 2025
4761586
ggml: src: ggml-remotingfrontend/virtgpu-shm: reduce the verbosity
kpouget May 19, 2025
70bed26
ggml: src: ggml-remotingfrontend/ggml-backend-reg: refactor to untigh…
kpouget May 19, 2025
3312f5d
ggml: src: ggml-remotingfrontend/ggml-remoting: remove draft code
kpouget May 19, 2025
5bcb9dc
remotingfrontend: add host buffer memory allocation
kpouget May 19, 2025
c9da129
remoting: add clear buffer and get_tensor
kpouget May 20, 2025
e95189e
remoting: add skeleton for graph_compute method
kpouget May 20, 2025
f3fab8c
remoting: continue the compute_graph skeleton
kpouget May 20, 2025
7f4e9cb
Continue the skeleton
kpouget May 20, 2025
586c3a9
remoting: recursively encode/decode the tensors
kpouget May 20, 2025
5f910a8
keep working
kpouget May 20, 2025
5fb1607
start using the ggml-rpc serialization methods
kpouget May 21, 2025
4e54111
remoting: implement the free_buffer function
kpouget May 21, 2025
4dff7f0
remoting: highlight the hot path
kpouget May 21, 2025
14daf76
remoting: fix the warnings and mute the debug logs when not in debug …
kpouget May 22, 2025
5d9641e
scripts: make it easier to build and run in prod mode
kpouget May 22, 2025
18da4cf
remotingfrontend: always prepare a shared memory for data
kpouget May 22, 2025
91f60df
remoting: release device buffers on exit
kpouget May 22, 2025
d41e7f3
remoting: refactor the buffer context
kpouget May 26, 2025
7026e8c
remoting: exchange more data
kpouget May 26, 2025
1bef318
podman_compile: pass the PERF_MODE flag to the container
kpouget May 26, 2025
7734728
examples: run: run: measure the generation throughput
kpouget May 27, 2025
5d80465
examples: run: run: stop after 25 tokens
kpouget May 27, 2025
32641f4
remoting: add basic timing measurements
kpouget May 27, 2025
50d345f
remoting: cleanup the logs
kpouget May 27, 2025
38cc90f
ggml: src: ggml-remotingfrontend/ggml-backend-reg: call the initializ…
kpouget May 27, 2025
f4ac17d
disable APIR_ALLOC_FROM_HOST_PTR
kpouget May 27, 2025
01c5fc1
remoting: cache the buffer_get_base result
kpouget May 27, 2025
e3975e6
examples: run: run: improve the timing measurement
kpouget May 28, 2025
b801838
examples: run: run: remove the stop after 25 tokens
kpouget May 28, 2025
eefb58e
remoting: improve the timing measurement
kpouget May 28, 2025
027ef7a
remoting: allow compiling to Vulkan
kpouget May 28, 2025
0b80159
ggml: src: ggml-remotingfrontend/virtgpu: reduce the response time wa…
kpouget May 28, 2025
654f52f
remoting: experiement with buffer_from_ptr
kpouget Jun 2, 2025
4ddf33b
remoting: remove from_ptr code
kpouget Jun 2, 2025
5a4a38a
remoting: try host_pointer
kpouget Jun 2, 2025
892e570
remoting: try from_host_ptr
kpouget Jun 2, 2025
a022d81
remoting: make alloc_memory + alloc_from_host_ptr work :)
kpouget Jun 2, 2025
714db8e
build.backend: export SDKROOT to please apple compiler ...
kpouget Jun 11, 2025
d67aed7
prepare.backend.sh: more flags
kpouget Jun 11, 2025
f540ac3
run.vulkan.sh: more flexible
kpouget Jun 11, 2025
d38911f
run.remoting.sh: more flexible
kpouget Jun 11, 2025
b63a2d7
prepare.vulkan.sh: more details
kpouget Jun 11, 2025
9f7525f
ggml: src: ggml-remotingfrontend/virtgpu: don't include virglrenderer…
kpouget Jun 11, 2025
a32d8a4
ggml: src: ggml-remotingfrontend/virtgpu: don't use absolute paths in…
kpouget Jun 11, 2025
f44cdf9
remoting: rewrite to avoid hard-coded paths
kpouget Jun 11, 2025
496a80b
update the custom scripts
kpouget Jun 11, 2025
31352ed
ggml: src: ggml-remotingfrontend/virtgpu-shm: import the cpp atomic
kpouget Jun 17, 2025
8fd37a5
remoting: reintroduce the support for support_op(tensor)
kpouget Jun 19, 2025
91ad061
remotingbackend: add an optional call to support_op to avoid crashing…
kpouget Jun 19, 2025
dde12b1
remotingfrontend: reduce and cleanup the logging
kpouget Jun 19, 2025
0eb86c3
remotingfrontend: cache some values
kpouget Jun 19, 2025
aa55111
Update the custom scripts
kpouget Jun 19, 2025
43766d9
remotingbackend: set APIR_DEVICE_SUPPORTS_OP_ALWAYS_TRUE = 1
kpouget Jun 19, 2025
a23f4d7
remotingfrontend: use GGML_ASSERT instead of assert
kpouget Jun 19, 2025
6aeff90
tools: run: run: use GGML_ASSERT instead of assert
kpouget Jun 19, 2025
1df6f69
ggml-metal: expose some internal device properties
kpouget Jun 25, 2025
01d5c51
remoting: add support for running the ggml-metal support_op method in…
kpouget Jun 25, 2025
32bfb94
ggml: src: ggml-remotingfrontend/virtgpu-forward-buffer: remove an un…
kpouget Jun 25, 2025
d0383ef
remotingfrontend: detect initialization issue
kpouget Jun 27, 2025
eae3907
remotingfrontend: detect initialization issue
kpouget Jun 27, 2025
62c97d3
ggml: src: ggml-remotingbackend/shared/apir_backend: add apir_backend…
kpouget Jul 1, 2025
9c03318
ggml: src: ggml-remotingfrontend/virtgpu: give more info on error
kpouget Jul 1, 2025
2292e2e
ggml: src: ggml-remotingbackend/backend-dispatched-metal: add missing…
kpouget Jul 1, 2025
28280f1
ggml-remotingbackend: allow saving the hypervisor logs to a file
kpouget Jul 1, 2025
516933f
remotingbackend: update the VIRGL_APIR indexes to match virlrenderer …
kpouget Jul 2, 2025
87d71f2
remotingfrontend/virtgpu: give more time to load the libraries and lo…
kpouget Jul 4, 2025
3bd97bc
OWNERS: add file for openshift CI
kpouget Jul 4, 2025
1fa1c2d
ggml-remotingbackend/shared/apir_backend: return the duration in stop…
kpouget Jul 7, 2025
d1b255f
ggml-remotingfrontend/virtgpu: rewrite the timeout mechanism to make …
kpouget Jul 7, 2025
38d49bd
ggml-remotingfrontend: improve the timers display
kpouget Jul 7, 2025
2a2b19b
ggml-remotingfrontend: add an ERROR log level
kpouget Jul 7, 2025
2ec8784
ggml-remotingfrontend: turn some INFO logs into MESSAGE (always printed)
kpouget Jul 7, 2025
2f8f3ee
ggml-remotingfrontend: turn a INFO log into ERROR
kpouget Jul 7, 2025
135ff21
ggml: src: ggml-remotingfrontend/virtgpu: correctly fail when the vir…
kpouget Jul 7, 2025
ebb5fb2
run.remoting: update to run llama-server
kpouget Jul 7, 2025
cb0fca5
OWNERS: Update
kpouget Jul 9, 2025
910e3fc
remoting: improve the frontend<>backend error handling
kpouget Jul 8, 2025
3b9b455
remoting: improve the frontend<>backend return code exchange
kpouget Jul 9, 2025
8725fdd
ggml: src: ggml-remotingfrontend/virtgpu: fix typo
kpouget Aug 20, 2025
871c4c8
update the build scripts
kpouget Aug 27, 2025
e5c6771
ggml: src: ggml-remotingfrontend/ggml-metal-remoting: update ggml_met…
kpouget Aug 29, 2025
ab02d59
update the build scripts
kpouget Aug 29, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
13 changes: 8 additions & 5 deletions .clang-format
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ AllowShortIfStatementsOnASingleLine: Never
AllowShortLambdasOnASingleLine: Inline
AllowShortLoopsOnASingleLine: false
AlwaysBreakBeforeMultilineStrings: true
BinPackArguments: true
BinPackParameters: true # OnePerLine
BinPackArguments: false
BinPackParameters: false # OnePerLine
BitFieldColonSpacing: Both
BreakBeforeBraces: Custom # Attach
BraceWrapping:
Expand Down Expand Up @@ -70,15 +70,18 @@ ExperimentalAutoDetectBinPacking: false
FixNamespaceComments: true
IncludeBlocks: Regroup
IncludeCategories:
- Regex: '^<.*\.h>'
- Regex: '".*"'
Priority: 1
SortPriority: 0
- Regex: '^<.*'
- Regex: '^<.*\.h>'
Priority: 2
SortPriority: 0
- Regex: '.*'
- Regex: '^<.*'
Priority: 3
SortPriority: 0
- Regex: '.*'
Priority: 4
SortPriority: 0
IncludeIsMainRegex: '([-_](test|unittest))?$'
IncludeIsMainSourceRegex: ''
IndentAccessModifiers: false
Expand Down
130 changes: 130 additions & 0 deletions .devops/cann.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
# ==============================================================================
# ARGUMENTS
# ==============================================================================

# Define the CANN base image for easier version updates later
ARG CANN_BASE_IMAGE=quay.io/ascend/cann:8.1.rc1-910b-openeuler22.03-py3.10

# ==============================================================================
# BUILD STAGE
# Compile all binary files and libraries
# ==============================================================================
FROM ${CANN_BASE_IMAGE} AS build

# Define the Ascend chip model for compilation. Default is Ascend910B3
ARG ASCEND_SOC_TYPE=Ascend910B3

# -- Install build dependencies --
RUN yum install -y gcc g++ cmake make git libcurl-devel python3 python3-pip && \
yum clean all && \
rm -rf /var/cache/yum

# -- Set the working directory --
WORKDIR /app

# -- Copy project files --
COPY . .

# -- Set CANN environment variables (required for compilation) --
# Using ENV instead of `source` allows environment variables to persist across the entire image layer
ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${LD_LIBRARY_PATH}
ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${PATH}
ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/runtime/lib64/stub:$LD_LIBRARY_PATH
# ... You can add other environment variables from the original file as needed ...
# For brevity, only core variables are listed here. You can paste the original ENV list here.

# -- Build llama.cpp --
# Use the passed ASCEND_SOC_TYPE argument and add general build options
RUN source /usr/local/Ascend/ascend-toolkit/set_env.sh --force \
&& \
cmake -B build \
-DGGML_CANN=ON \
-DCMAKE_BUILD_TYPE=Release \
-DSOC_TYPE=${ASCEND_SOC_TYPE} \
. && \
cmake --build build --config Release -j$(nproc)

# -- Organize build artifacts for copying in later stages --
# Create a lib directory to store all .so files
RUN mkdir -p /app/lib && \
find build -name "*.so" -exec cp {} /app/lib \;

# Create a full directory to store all executables and Python scripts
RUN mkdir -p /app/full && \
cp build/bin/* /app/full/ && \
cp *.py /app/full/ && \
cp -r gguf-py /app/full/ && \
cp -r requirements /app/full/ && \
cp requirements.txt /app/full/
# If you have a tools.sh script, make sure it is copied here
# cp .devops/tools.sh /app/full/tools.sh

# ==============================================================================
# BASE STAGE
# Create a minimal base image with CANN runtime and common libraries
# ==============================================================================
FROM ${CANN_BASE_IMAGE} AS base

# -- Install runtime dependencies --
RUN yum install -y libgomp curl && \
yum clean all && \
rm -rf /var/cache/yum

# -- Set CANN environment variables (required for runtime) --
ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
ENV LD_LIBRARY_PATH=/app:${ASCEND_TOOLKIT_HOME}/lib64:${LD_LIBRARY_PATH}
ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${PATH}
ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
# ... You can add other environment variables from the original file as needed ...

WORKDIR /app

# Copy compiled .so files from the build stage
COPY --from=build /app/lib/ /app

# ==============================================================================
# FINAL STAGES (TARGETS)
# ==============================================================================

### Target: full
# Complete image with all tools, Python bindings, and dependencies
# ==============================================================================
FROM base AS full

COPY --from=build /app/full /app

# Install Python dependencies
RUN yum install -y git python3 python3-pip && \
pip3 install --no-cache-dir --upgrade pip setuptools wheel && \
pip3 install --no-cache-dir -r requirements.txt && \
yum clean all && \
rm -rf /var/cache/yum

# You need to provide a tools.sh script as the entrypoint
ENTRYPOINT ["/app/tools.sh"]
# If there is no tools.sh, you can set the default to start the server
# ENTRYPOINT ["/app/llama-server"]

### Target: light
# Lightweight image containing only llama-cli
# ==============================================================================
FROM base AS light

COPY --from=build /app/full/llama-cli /app

ENTRYPOINT [ "/app/llama-cli" ]

### Target: server
# Dedicated server image containing only llama-server
# ==============================================================================
FROM base AS server

ENV LLAMA_ARG_HOST=0.0.0.0

COPY --from=build /app/full/llama-server /app

HEALTHCHECK --interval=5m CMD [ "curl", "-f", "http://localhost:8080/health" ]

ENTRYPOINT [ "/app/llama-server" ]
22 changes: 0 additions & 22 deletions .devops/cloud-v-pipeline

This file was deleted.

6 changes: 1 addition & 5 deletions .devops/cpu.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,15 @@ FROM ubuntu:$UBUNTU_VERSION AS build

ARG TARGETARCH

ARG GGML_CPU_ARM_ARCH=armv8-a

RUN apt-get update && \
apt-get install -y build-essential git cmake libcurl4-openssl-dev

WORKDIR /app

COPY . .

RUN if [ "$TARGETARCH" = "amd64" ]; then \
RUN if [ "$TARGETARCH" = "amd64" ] || [ "$TARGETARCH" = "arm64" ]; then \
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DLLAMA_BUILD_TESTS=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON; \
elif [ "$TARGETARCH" = "arm64" ]; then \
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DLLAMA_BUILD_TESTS=OFF -DGGML_CPU_ARM_ARCH=${GGML_CPU_ARM_ARCH}; \
else \
echo "Unsupported architecture"; \
exit 1; \
Expand Down
2 changes: 1 addition & 1 deletion .devops/cuda.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ RUN apt-get update \
python3 \
python3-pip \
&& pip install --upgrade pip setuptools wheel \
&& pip install -r requirements.txt \
&& pip install --break-system-packages -r requirements.txt \
&& apt autoremove -y \
&& apt clean -y \
&& rm -rf /tmp/* /var/tmp/* \
Expand Down
6 changes: 3 additions & 3 deletions .devops/musa.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
ARG UBUNTU_VERSION=22.04
# This needs to generally match the container host's environment.
ARG MUSA_VERSION=rc4.0.1
ARG MUSA_VERSION=rc4.2.0
# Target the MUSA build image
ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-mudnn-devel-ubuntu${UBUNTU_VERSION}
ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}-amd64

ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-mudnn-runtime-ubuntu${UBUNTU_VERSION}
ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}-amd64

FROM ${BASE_MUSA_DEV_CONTAINER} AS build

Expand Down
3 changes: 2 additions & 1 deletion .devops/nix/package.nix
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ let
inherit (lib)
cmakeBool
cmakeFeature
optionalAttrs
optionals
strings
;
Expand Down Expand Up @@ -197,7 +198,7 @@ effectiveStdenv.mkDerivation (finalAttrs: {
];

# Environment variables needed for ROCm
env = optionals useRocm {
env = optionalAttrs useRocm {
ROCM_PATH = "${rocmPackages.clr}";
HIP_DEVICE_LIB_PATH = "${rocmPackages.rocm-device-libs}/amdgcn/bitcode";
};
Expand Down
4 changes: 2 additions & 2 deletions .devops/rocm.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
ARG UBUNTU_VERSION=24.04

# This needs to generally match the container host's environment.
ARG ROCM_VERSION=6.3
ARG AMDGPU_VERSION=6.3
ARG ROCM_VERSION=6.4
ARG AMDGPU_VERSION=6.4

# Target the CUDA build image
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
Expand Down
2 changes: 1 addition & 1 deletion .devops/tools.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/bin/bash
#!/usr/bin/env bash
set -e

# Read the first argument into a variable
Expand Down
30 changes: 23 additions & 7 deletions .devops/vulkan.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,30 @@ ARG UBUNTU_VERSION=24.04

FROM ubuntu:$UBUNTU_VERSION AS build

# Install build tools
RUN apt update && apt install -y git build-essential cmake wget
# Ref: https://vulkan.lunarg.com/doc/sdk/latest/linux/getting_started.html

# Install Vulkan SDK and cURL
RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-noble.list https://packages.lunarg.com/vulkan/lunarg-vulkan-noble.list && \
apt update -y && \
apt-get install -y vulkan-sdk libcurl4-openssl-dev curl
# Install build tools
RUN apt update && apt install -y git build-essential cmake wget xz-utils

# Install Vulkan SDK
ARG VULKAN_VERSION=1.4.321.1
RUN ARCH=$(uname -m) && \
wget -qO /tmp/vulkan-sdk.tar.xz https://sdk.lunarg.com/sdk/download/${VULKAN_VERSION}/linux/vulkan-sdk-linux-${ARCH}-${VULKAN_VERSION}.tar.xz && \
mkdir -p /opt/vulkan && \
tar -xf /tmp/vulkan-sdk.tar.xz -C /tmp --strip-components=1 && \
mv /tmp/${ARCH}/* /opt/vulkan/ && \
rm -rf /tmp/*

# Install cURL and Vulkan SDK dependencies
RUN apt install -y libcurl4-openssl-dev curl \
libxcb-xinput0 libxcb-xinerama0 libxcb-cursor-dev

# Set environment variables
ENV VULKAN_SDK=/opt/vulkan
ENV PATH=$VULKAN_SDK/bin:$PATH
ENV LD_LIBRARY_PATH=$VULKAN_SDK/lib:$LD_LIBRARY_PATH
ENV CMAKE_PREFIX_PATH=$VULKAN_SDK:$CMAKE_PREFIX_PATH
ENV PKG_CONFIG_PATH=$VULKAN_SDK/lib/pkgconfig:$PKG_CONFIG_PATH

# Build it
WORKDIR /app
Expand Down
87 changes: 0 additions & 87 deletions .github/ISSUE_TEMPLATE/010-bug-compilation.yml

This file was deleted.

Loading