diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 2ad38115940..cab3ba9e68e 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -134,6 +134,8 @@ jobs: include: - build: 'x64' os: ubuntu-22.04 + - build: 's390x-z15' # z15 because our CI runners are on z15 + os: ubuntu-22.04-s390x # GGML_BACKEND_DL and GGML_CPU_ALL_VARIANTS are not currently supported on arm # - build: 'arm64' # os: ubuntu-22.04-arm diff --git a/.github/workflows/update-ops-docs.yml b/.github/workflows/update-ops-docs.yml index c0218fa7421..d5e264b34f4 100644 --- a/.github/workflows/update-ops-docs.yml +++ b/.github/workflows/update-ops-docs.yml @@ -3,10 +3,12 @@ name: Update Operations Documentation on: push: paths: + - 'docs/ops.md' - 'docs/ops/**' - 'scripts/create_ops_docs.py' pull_request: paths: + - 'docs/ops.md' - 'docs/ops/**' - 'scripts/create_ops_docs.py' diff --git a/CODEOWNERS b/CODEOWNERS index 3b696bf94a1..f833fb7cf48 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -55,7 +55,7 @@ /ggml/src/ggml-cuda/common.cuh @slaren /ggml/src/ggml-cuda/fattn* @JohannesGaessler /ggml/src/ggml-cuda/ggml-cuda.cu @slaren -/ggml/src/ggml-cuda/mmf.* @JohannesGaessler +/ggml/src/ggml-cuda/mmf.* @JohannesGaessler @am17an /ggml/src/ggml-cuda/mmq.* @JohannesGaessler /ggml/src/ggml-cuda/mmvf.* @JohannesGaessler /ggml/src/ggml-cuda/mmvq.* @JohannesGaessler diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt index 892c23318a1..3356ef550de 100644 --- a/ggml/src/CMakeLists.txt +++ b/ggml/src/CMakeLists.txt @@ -307,6 +307,10 @@ function(ggml_add_cpu_backend_variant tag_name) foreach (feat ${ARGN}) set(GGML_INTERNAL_${feat} ON) endforeach() + elseif (GGML_SYSTEM_ARCH STREQUAL "s390x") + foreach (feat ${ARGN}) + set(GGML_INTERNAL_${feat} ON) + endforeach() endif() ggml_add_cpu_backend_variant_impl(${tag_name}) @@ -371,6 +375,14 @@ if (GGML_CPU_ALL_VARIANTS) else() message(FATAL_ERROR "Unsupported PowerPC target OS: ${CMAKE_SYSTEM_NAME}") endif() + elseif (GGML_SYSTEM_ARCH STREQUAL "s390x") + if (CMAKE_SYSTEM_NAME MATCHES "Linux") + ggml_add_cpu_backend_variant(s390x_z15 Z15 VXE) + # ggml_add_cpu_backend_variant(s390x_z16 Z16 VXE) + # ggml_add_cpu_backend_variant(s390x_z17 Z17 VXE) + else() + message(FATAL_ERROR "Unsupported s390x target OS: ${CMAKE_SYSTEM_NAME}") + endif() else() message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS not yet supported with ${GGML_SYSTEM_ARCH} on ${CMAKE_SYSTEM_NAME}") endif() diff --git a/ggml/src/ggml-cpu/CMakeLists.txt b/ggml/src/ggml-cpu/CMakeLists.txt index 42041b717aa..34323afa076 100644 --- a/ggml/src/ggml-cpu/CMakeLists.txt +++ b/ggml/src/ggml-cpu/CMakeLists.txt @@ -466,29 +466,45 @@ function(ggml_add_cpu_backend_variant_impl tag_name) list(APPEND ARCH_FLAGS "-march=${MARCH_STR}" -mabi=lp64d) elseif (GGML_SYSTEM_ARCH STREQUAL "s390x") message(STATUS "s390x detected") - list(APPEND GGML_CPU_SOURCES ggml-cpu/arch/s390/quants.c) - file(READ "/proc/cpuinfo" CPUINFO_CONTENTS) - string(REGEX REPLACE "machine[ \t\r\n]*=[ \t\r\n]*([0-9]+)" "\\1" S390X_M ${CPUINFO_CONTENTS}) - - # TODO: Separation to determine activation of VX/VXE/VXE2 - if (${S390X_M} MATCHES "8561|8562") - message(STATUS "z15 target") - list(APPEND ARCH_FLAGS -march=z15) - elseif (${S390X_M} MATCHES "3931") - message(STATUS "z16 target") - list(APPEND ARCH_FLAGS -march=z16) - elseif (${S390X_M} MATCHES "9175|9176") - # NOTE: Only available from GCC 15.1.0 onwards. Any z17 machine with compile issues must first verify their GCC version. - # binutils must also be updated to the latest for the -march=z17 flag to work. Otherwise, use -march=arch15. - message(STATUS "z17 target") - list(APPEND ARCH_FLAGS -march=arch15) - else() - message(STATUS "Unknown target") - message(WARNING "Unknown target. If you are compiling for z14 and earlier, you might have to add -DGGML_VXE=OFF.") - list(APPEND ARCH_FLAGS -march=native -mtune=native) + list(APPEND GGML_CPU_SOURCES + ggml-cpu/arch/s390/quants.c) + + # for native compilation + if (GGML_NATIVE) + # check machine level to determine target + file(READ "/proc/cpuinfo" CPUINFO_CONTENTS) + string(REGEX REPLACE "machine[ \t\r\n]*=[ \t\r\n]*([0-9]+)" "\\1" S390X_M ${CPUINFO_CONTENTS}) + + # TODO: Separation to determine activation of VX/VXE/VXE2 + if (${S390X_M} MATCHES "8561|8562") + message(STATUS "z15 target") + list(APPEND ARCH_FLAGS -march=z15) + elseif (${S390X_M} MATCHES "3931") + message(STATUS "z16 target") + list(APPEND ARCH_FLAGS -march=z16) + elseif (${S390X_M} MATCHES "9175|9176") + # NOTE: Only available from GCC 15.1.0 onwards. Any z17 machine with compile issues must first verify their GCC version. + # binutils must also be updated to the latest for the -march=z17 flag to work. Otherwise, use -march=arch15. + message(STATUS "z17 target") + list(APPEND ARCH_FLAGS -march=arch15) + else() + message(STATUS "Unknown target") + message(WARNING "Unknown target. If you are compiling for z14 and earlier, you might have to add -DGGML_VXE=OFF.") + list(APPEND ARCH_FLAGS -march=native -mtune=native) + endif() + # for cross-compilation + elseif(GGML_CPU_ALL_VARIANTS) + # range through IBM z15 to z17 + # NOTE: update when a new hardware level is released + foreach (ZHW RANGE 15 17) + if(DEFINED GGML_INTERNAL_Z${ZHW}) + message(STATUS "z${ZHW} cross-compile target") + list(APPEND ARCH_FLAGS -march=z${ZHW}) + endif() + endforeach() endif() - if (GGML_VXE) + if (GGML_VXE OR GGML_INTERNAL_VXE) message(STATUS "VX/VXE/VXE2 enabled") list(APPEND ARCH_FLAGS -mvx -mzvector) list(APPEND ARCH_DEFINITIONS GGML_VXE) diff --git a/src/llama-model.cpp b/src/llama-model.cpp index 522d1f67da3..909b49e8e64 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -114,6 +114,7 @@ const char * llm_type_name(llm_type type) { case LLM_TYPE_17B_16E: return "17Bx16E (Scout)"; case LLM_TYPE_17B_128E: return "17Bx128E (Maverick)"; case LLM_TYPE_A13B: return "A13B"; + case LLM_TYPE_7B_A1B: return "7B.A1B"; case LLM_TYPE_8B_A1B: return "8B.A1B"; case LLM_TYPE_21B_A3B: return "21B.A3B"; case LLM_TYPE_30B_A3B: return "30B.A3B"; @@ -1843,8 +1844,10 @@ void llama_model::load_hparams(llama_model_loader & ml) { ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps); - switch (hparams.n_layer) { - // TODO: Add llm type label (not sure this is useful) + switch (hparams.n_embd) { + case 1536: type = LLM_TYPE_7B_A1B; break; + case 2048: case 2560: type = LLM_TYPE_3B; break; + case 4096: type = LLM_TYPE_32B; break; default: type = LLM_TYPE_UNKNOWN; } diff --git a/src/llama-model.h b/src/llama-model.h index 7f48662f280..05701e7d70c 100644 --- a/src/llama-model.h +++ b/src/llama-model.h @@ -107,6 +107,7 @@ enum llm_type { LLM_TYPE_17B_16E, // llama4 Scout LLM_TYPE_17B_128E, // llama4 Maverick LLM_TYPE_A13B, + LLM_TYPE_7B_A1B, LLM_TYPE_8B_A1B, // lfm2moe LLM_TYPE_21B_A3B, // Ernie MoE small LLM_TYPE_30B_A3B,