Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
d0d5b22
CANN: Refactor to reduce duplicate code (#12731)
hipudding Apr 7, 2025
52b3d71
CANN: fix typo in ggml-cann (#12733)
jeffzhou2000 Apr 7, 2025
bd3f59f
cmake : enable curl by default (#12761)
ngxson Apr 7, 2025
e391d3e
ci : no curl on ggml-ci (#12796)
ngxson Apr 7, 2025
518a014
sycl: remove redundant memcopy in function ggml_backend_sycl_buffer_s…
jeffzhou2000 Apr 7, 2025
995083e
cpu: move all the operators into a separate c++ file (except mul_mat)…
cmdr2 Apr 2, 2025
36ca8b3
CUDA: don't convert BF16 weights to FP32 (ggml/1174)
CISC Apr 4, 2025
ff067db
ggml : simplify Arm fp16 CPU logic (ggml/1177)
ggerganov Apr 7, 2025
a4e46e2
sync : ggml
ggerganov Apr 7, 2025
1a1ab7e
cuda : fix HIP and MUSA BF16 (#0)
ggerganov Apr 7, 2025
4ccea21
hellaswag: display estimated score confidence interval (#12797)
stduhpf Apr 7, 2025
8297401
opencl: better identify Adreno GPU (#12760)
lhez Apr 7, 2025
1466621
llama : Support llama 4 text-only (#12791)
ngxson Apr 7, 2025
a226bc7
gguf-py : support lazy tensor splitting (#12809)
compilade Apr 8, 2025
656babd
Revert "sycl:remove redundant memcopy in function ggml_backend_sycl_b…
NeoZhangJianyu Apr 8, 2025
8ca6e1c
server : webui : Improve Chat Input with Auto-Sizing Textarea (#12785)
characharm Apr 8, 2025
1d343b4
arg : Including limits file on AIX (#12822)
mehendarkarprajwal Apr 8, 2025
2dabf75
llava: add more helper functions to check projector types in clip con…
dm4 Apr 8, 2025
78a1ba0
server : fix thread.join() on exit (#12831)
ngxson Apr 8, 2025
a19b5ce
llama : fix FA when KV cache is not used (i.e. embeddings) (#12825)
ggerganov Apr 8, 2025
b32efad
llava: improve clip_ctx destructor to not memleak load_image_size (#1…
mattjcly Apr 8, 2025
7538246
cuda : add f32 to bf16 copy op (#12806)
CISC Apr 8, 2025
7ecd780
vulkan: Use fp16 for the flash attention P*V multiplication (#12783)
jeffbolznv Apr 9, 2025
0090950
vulkan: In coopmat2 mmq, load q4_k/q5_k scales through shared memory …
jeffbolznv Apr 9, 2025
6e1c4ce
CANN: Support Opt CONV_TRANSPOSE_1D and ELU (#12786)
noemotiovon Apr 9, 2025
47277d6
readme : add rpc backend (#12842)
ggerganov Apr 9, 2025
65a69e6
clip : do not print ftype (#12832)
ngxson Apr 9, 2025
381603a
ci: detach common from the library (#12827)
pminev Apr 9, 2025
8ed7124
sycl: update documentation to use -no-cnv (#12845)
Rbiessy Apr 9, 2025
d9a63b2
musa: enable freediskspace for docker image build (#12839)
yeahdongcn Apr 9, 2025
d3bd719
llama : Support Qwen3 and Qwen3MoE (#12828)
bozheng-hit Apr 9, 2025
2391506
ggml-impl.h: fix build on POWER9 (#12855)
pkubaj Apr 9, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .devops/cpu.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ WORKDIR /app
COPY . .

RUN if [ "$TARGETARCH" = "amd64" ]; then \
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON -DGGML_NATIVE=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON; \
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON; \
elif [ "$TARGETARCH" = "arm64" ]; then \
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=${GGML_CPU_ARM_ARCH}; \
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=${GGML_CPU_ARM_ARCH}; \
else \
echo "Unsupported architecture"; \
exit 1; \
Expand Down
2 changes: 1 addition & 1 deletion .devops/cuda.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ COPY . .
RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
fi && \
cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
cmake --build build --config Release -j$(nproc)

RUN mkdir -p /app/lib && \
Expand Down
2 changes: 1 addition & 1 deletion .devops/intel.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
&& export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \
fi && \
echo "Building with dynamic libs" && \
cmake -B build -DGGML_NATIVE=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \
cmake -B build -DGGML_NATIVE=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \
cmake --build build --config Release -j$(nproc)

RUN mkdir -p /app/lib && \
Expand Down
4 changes: 2 additions & 2 deletions .devops/llama-cli-cann.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
ARG ASCEND_VERSION=8.0.rc2.alpha003-910b-openeuler22.03-py3.8
ARG ASCEND_VERSION=8.1.RC1.alpha001-910b-openeuler22.03-py3.10

FROM ascendai/cann:$ASCEND_VERSION AS build

WORKDIR /app

COPY . .

RUN yum install -y gcc g++ cmake make
RUN yum install -y gcc g++ cmake make libcurl-devel
ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH}
Expand Down
2 changes: 1 addition & 1 deletion .devops/musa.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ COPY . .
RUN if [ "${MUSA_DOCKER_ARCH}" != "default" ]; then \
export CMAKE_ARGS="-DMUSA_ARCHITECTURES=${MUSA_DOCKER_ARCH}"; \
fi && \
cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
cmake --build build --config Release -j$(nproc)

RUN mkdir -p /app/lib && \
Expand Down
2 changes: 1 addition & 1 deletion .devops/rocm.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ WORKDIR /app
COPY . .

RUN HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \
cmake -S . -B build -DGGML_HIP=ON -DAMDGPU_TARGETS=$ROCM_DOCKER_ARCH -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON \
cmake -S . -B build -DGGML_HIP=ON -DAMDGPU_TARGETS=$ROCM_DOCKER_ARCH -DCMAKE_BUILD_TYPE=Release \
&& cmake --build build --config Release -j$(nproc)

RUN mkdir -p /app/lib \
Expand Down
25 changes: 25 additions & 0 deletions .github/actions/windows-setup-curl/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
name: 'Windows - Setup CURL'
description: 'Composite action, to be reused in other workflow'
inputs:
curl_version:
description: 'CURL version'
required: false
default: '8.6.0_6'
outputs:
curl_path:
description: "Path to the downloaded libcurl"
value: ${{ steps.get_libcurl.outputs.curl_path }}

runs:
using: "composite"
steps:
- name: libCURL
id: get_libcurl
shell: powershell
env:
CURL_VERSION: ${{ inputs.curl_version }}
run: |
curl.exe -o $env:RUNNER_TEMP/curl.zip -L "https://curl.se/windows/dl-${env:CURL_VERSION}/curl-${env:CURL_VERSION}-win64-mingw.zip"
mkdir $env:RUNNER_TEMP/libcurl
tar.exe -xvf $env:RUNNER_TEMP/curl.zip --strip-components=1 -C $env:RUNNER_TEMP/libcurl
echo "curl_path=$env:RUNNER_TEMP/libcurl" >> $env:GITHUB_OUTPUT
1 change: 0 additions & 1 deletion .github/workflows/bench.yml.disabled
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,6 @@ jobs:
cmake -B build \
-DGGML_NATIVE=OFF \
-DLLAMA_BUILD_SERVER=ON \
-DLLAMA_CURL=ON \
-DLLAMA_CUBLAS=ON \
-DCUDAToolkit_ROOT=/usr/local/cuda \
-DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc \
Expand Down
9 changes: 6 additions & 3 deletions .github/workflows/build-linux-cross.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ jobs:
sudo apt-get install -y --no-install-recommends \
build-essential \
gcc-14-riscv64-linux-gnu \
g++-14-riscv64-linux-gnu
g++-14-riscv64-linux-gnu \
libcurl4-openssl-dev:riscv64

- name: Build
run: |
Expand Down Expand Up @@ -59,7 +60,8 @@ jobs:
glslc \
gcc-14-riscv64-linux-gnu \
g++-14-riscv64-linux-gnu \
libvulkan-dev:riscv64
libvulkan-dev:riscv64 \
libcurl4-openssl-dev:riscv64

- name: Build
run: |
Expand Down Expand Up @@ -99,7 +101,8 @@ jobs:
build-essential \
glslc \
crossbuild-essential-arm64 \
libvulkan-dev:arm64
libvulkan-dev:arm64 \
libcurl4-openssl-dev:arm64

- name: Build
run: |
Expand Down
Loading