Skip to content

Commit

Permalink
Merge branch 'master' into component-model/parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
hydai committed Nov 3, 2023
2 parents dc72f71 + f7931f2 commit 18cc6ee
Show file tree
Hide file tree
Showing 27 changed files with 13,844 additions and 1,329 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ option(WASMEDGE_LINK_TOOLS_STATIC "Statically link the wasmedge and wasmedgec to
option(WASMEDGE_ENABLE_UB_SANITIZER "Enable undefined behavior sanitizer." OFF)
set(WASMEDGE_PLUGIN_WASI_NN_BACKEND "" CACHE STRING "Enable WasmEdge Wasi-NN plugin with backends.")
option(WASMEDGE_PLUGIN_WASI_NN_GGML_LLAMA_BLAS "Enable LLAMA_BLAS in the WASI-NN GGML backend" ON)
option(WASMEDGE_PLUGIN_WASI_NN_GGML_LLAMA_CUBLAS "Enable LLAMA_CUBLAS in the WASI-NN GGML backend" OFF)
option(WASMEDGE_PLUGIN_WASI_NN_GGML_LLAMA_METAL "Enable LLAMA_METAL in the WASI-NN GGML backend" OFF)
# Currently supported WASI-NN backend: "OpenVINO" on Linux x86_64
option(WASMEDGE_PLUGIN_WASI_CRYPTO "Enable WasmEdge Wasi-crypto plugin." OFF)
Expand Down
2 changes: 1 addition & 1 deletion LICENSE.spdx
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ PackageFileName: ./plugins/wasi_nn/thirdparty/ggml
PackageHomePage: https://github.com/ggerganov/llama.cpp
PackageOriginator: Georgi Gerganov
PackageLicenseDeclared: MIT
PackageDownloadLocation: git://github.com/ggerganov/llama.cpp.git@b1309
PackageDownloadLocation: git://github.com/ggerganov/llama.cpp.git@b1383

## Relationships
Relationship: SPDXRef-wasmedge CONTAINS SPDXRef-blake3
Expand Down
9 changes: 7 additions & 2 deletions cmake/Helper.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,15 @@ else()
list(APPEND WASMEDGE_CFLAGS
-Wall
-Wextra
-Werror
-Wno-error=pedantic
)

if(NOT WASMEDGE_PLUGIN_WASI_NN_GGML_LLAMA_CUBLAS)
list(APPEND WASMEDGE_CFLAGS
-Werror
-Wno-error=pedantic
)
endif()

if(WASMEDGE_ENABLE_UB_SANITIZER)
list(APPEND WASMEDGE_CFLAGS -fsanitize=undefined)
endif()
Expand Down
10 changes: 10 additions & 0 deletions plugins/wasi_nn/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,16 @@
set(LLAMA_ALL_WARNINGS OFF)
set(LLAMA_METAL_NDEBUG ON)

if(WASMEDGE_PLUGIN_WASI_NN_GGML_LLAMA_CUBLAS)
message(STATUS "WASI-NN GGML LLAMA backend: Enable LLAMA_CUBLAS")
set(LLAMA_CUBLAS ON)
# If CUBLAS is ON, then OpenBLAS should be OFF.
set(WASMEDGE_PLUGIN_WASI_NN_GGML_LLAMA_BLAS OFF)
else()
message(STATUS "WASI-NN GGML LLAMA backend: Disable LLAMA_CUBLAS")
set(LLAMA_CUBLAS OFF)
endif()

if(WASMEDGE_PLUGIN_WASI_NN_GGML_LLAMA_BLAS)
message(STATUS "WASI-NN GGML LLAMA backend: Enable LLAMA_BLAS")
# Default use OpenBLAS
Expand Down
26 changes: 25 additions & 1 deletion plugins/wasi_nn/ggml.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,9 @@ Expect<ErrNo> load(WasiNNEnvironment &Env, Span<const Span<uint8_t>> Builders,
gpt_params Params;
llama_backend_init(Params.numa);
llama_model_params ModelParams = llama_model_default_params();
GraphRef.ModelFilePath = ModelFilePath;
GraphRef.LlamaModel =
llama_load_model_from_file(ModelFilePath.c_str(), ModelParams);
llama_load_model_from_file(GraphRef.ModelFilePath.c_str(), ModelParams);
if (GraphRef.LlamaModel == nullptr) {
spdlog::error("[WASI-NN] GGML backend: Error: unable to init model."sv);
Env.NNGraph.pop_back();
Expand Down Expand Up @@ -116,6 +117,7 @@ Expect<ErrNo> setInput(WasiNNEnvironment &Env, uint32_t ContextId,
"[WASI-NN] GGML backend: Unable to retrieve the enable-log option."sv);
return ErrNo::InvalidArgument;
}
llama_log_set(nullptr, &CxtRef.EnableLog);
}
if (Doc.at_key("stream-stdout").error() == simdjson::SUCCESS) {
auto Err = Doc["stream-stdout"].get<bool>().get(CxtRef.StreamStdout);
Expand Down Expand Up @@ -176,6 +178,28 @@ Expect<ErrNo> setInput(WasiNNEnvironment &Env, uint32_t ContextId,
llama_context_params ContextParams = llama_context_default_params();
ContextParams.n_ctx = CxtRef.CtxSize;
ContextParams.n_batch = CxtRef.BatchSize;

// XXX: Due to the limitation of WASI-NN proposal,
// we have no way to pass the metadata before the setInput phase
// when we want to do some configurations in the load phase.
// That's why we have this hack.
{
llama_model_params ModelParams = llama_model_default_params();
// If the `n_gpu_layers` in `setInput` is different from the
// `n_gpu_layers` in `llama_model_params`, we will reload
// the model with the new configuration.
if (ModelParams.n_gpu_layers != static_cast<int32_t>(CxtRef.NGPULayers)) {
ModelParams.n_gpu_layers = CxtRef.NGPULayers;
GraphRef.LlamaModel = llama_load_model_from_file(
GraphRef.ModelFilePath.c_str(), ModelParams);
if (GraphRef.LlamaModel == nullptr) {
spdlog::error("[WASI-NN] GGML backend: Error: unable to init model."sv);
Env.NNGraph.pop_back();
return ErrNo::InvalidArgument;
}
}
}

GraphRef.LlamaContext =
llama_new_context_with_model(GraphRef.LlamaModel, ContextParams);

Expand Down
5 changes: 3 additions & 2 deletions plugins/wasi_nn/ggml.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,9 @@ namespace WasmEdge::Host::WASINN::GGML {

#ifdef WASMEDGE_PLUGIN_WASI_NN_BACKEND_GGML
struct Graph {
llama_model *LlamaModel;
llama_context *LlamaContext;
llama_model *LlamaModel = nullptr;
llama_context *LlamaContext = nullptr;
std::string ModelFilePath;
};

struct Context {
Expand Down
44 changes: 30 additions & 14 deletions plugins/wasi_nn/thirdparty/ggml/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,12 @@ option(LLAMA_SANITIZE_ADDRESS "llama: enable address sanitizer"
option(LLAMA_SANITIZE_UNDEFINED "llama: enable undefined sanitizer" OFF)

# instruction set specific
if (LLAMA_NATIVE)
set(INS_ENB OFF)
else()
set(INS_ENB ON)
endif()

option(LLAMA_AVX "llama: enable AVX" ON)
option(LLAMA_AVX2 "llama: enable AVX2" ON)
option(LLAMA_AVX512 "llama: enable AVX512" OFF)
Expand Down Expand Up @@ -89,6 +95,20 @@ if (NOT MSVC)
endif()
endif()

if (APPLE AND LLAMA_ACCELERATE)
find_library(ACCELERATE_FRAMEWORK Accelerate)
if (ACCELERATE_FRAMEWORK)
message(STATUS "Accelerate framework found")

add_compile_definitions(GGML_USE_ACCELERATE)
add_compile_definitions(ACCELERATE_NEW_LAPACK)
add_compile_definitions(ACCELERATE_LAPACK_ILP64)
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${ACCELERATE_FRAMEWORK})
else()
message(WARNING "Accelerate framework not found")
endif()
endif()

if (LLAMA_METAL)
find_library(FOUNDATION_LIBRARY Foundation REQUIRED)
find_library(METAL_FRAMEWORK Metal REQUIRED)
Expand Down Expand Up @@ -335,8 +355,7 @@ endif()
if (LLAMA_ALL_WARNINGS)
if (NOT MSVC)
set(warning_flags -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function)
set(c_flags -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int
-Werror=implicit-function-declaration)
set(c_flags -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int -Werror=implicit-function-declaration)
set(cxx_flags -Wmissing-declarations -Wmissing-noreturn)
set(host_cxx_flags "")

Expand Down Expand Up @@ -368,7 +387,8 @@ if (LLAMA_ALL_WARNINGS)
set(c_flags ${c_flags} ${warning_flags})
set(cxx_flags ${cxx_flags} ${warning_flags})
add_compile_options("$<$<COMPILE_LANGUAGE:C>:${c_flags}>"
"$<$<COMPILE_LANGUAGE:CXX>:${cxx_flags} ${host_cxx_flags}>")
"$<$<COMPILE_LANGUAGE:CXX>:${cxx_flags}>"
"$<$<COMPILE_LANGUAGE:CXX>:${host_cxx_flags}>")

endif()

Expand Down Expand Up @@ -423,9 +443,6 @@ if (NOT MSVC)
if (LLAMA_GPROF)
add_compile_options(-pg)
endif()
if (LLAMA_NATIVE)
add_compile_options(-march=native)
endif()
endif()

if ((${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm") OR (${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64") OR ("${CMAKE_GENERATOR_PLATFORM_LWR}" MATCHES "arm64"))
Expand Down Expand Up @@ -480,6 +497,9 @@ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$" OR "${CMAKE_GE
add_compile_options($<$<COMPILE_LANGUAGE:CXX>:/arch:AVX>)
endif()
else()
if (LLAMA_NATIVE)
add_compile_options(-march=native)
endif()
if (LLAMA_F16C)
add_compile_options(-mf16c)
endif()
Expand Down Expand Up @@ -576,8 +596,12 @@ wasmedge_add_library(ggml OBJECT
ggml.h
ggml-alloc.c
ggml-alloc.h
ggml-backend.c
ggml-backend.h
common.cpp
common.h
sampling.cpp
sampling.h
${GGML_SOURCES_CUDA} ${GGML_HEADERS_CUDA}
${GGML_SOURCES_OPENCL} ${GGML_HEADERS_OPENCL}
${GGML_SOURCES_METAL} ${GGML_HEADERS_METAL}
Expand Down Expand Up @@ -622,14 +646,6 @@ if (BUILD_SHARED_LIBS)
endif()
endif()

# global flags for ggml
if (NOT WIN32)
target_compile_options(ggml
PRIVATE
-DGGML_USE_K_QUANTS
)
endif()

# disable warnings
if (NOT WIN32)
target_compile_options(ggml
Expand Down

0 comments on commit 18cc6ee

Please sign in to comment.