Skip to content

Commit

Permalink
[WASI-NN] ggml backend: all model paramters can only be set in config…
Browse files Browse the repository at this point in the history
… stage

Signed-off-by: hydai <z54981220@gmail.com>
  • Loading branch information
hydai committed Dec 18, 2023
1 parent 5b06107 commit b421e73
Showing 1 changed file with 6 additions and 43 deletions.
49 changes: 6 additions & 43 deletions plugins/wasi_nn/ggml.cpp
Expand Up @@ -15,8 +15,7 @@ namespace WasmEdge::Host::WASINN::GGML {
#ifdef WASMEDGE_PLUGIN_WASI_NN_BACKEND_GGML

namespace details {
Expect<ErrNo> parseMetadata(Graph &GraphRef, const std::string &Metadata,
bool *IsModelUpdated = nullptr) noexcept {
Expect<ErrNo> parseMetadata(Graph &GraphRef, const std::string &Metadata) noexcept {
simdjson::dom::parser Parser;
simdjson::dom::element Doc;
auto ParseError = Parser.parse(Metadata).get(Doc);
Expand All @@ -26,8 +25,6 @@ Expect<ErrNo> parseMetadata(Graph &GraphRef, const std::string &Metadata,
}

// Get metadata from the json.
// Need to update Model:
// * n_gpu_layers
// Need to update Context:
// * ctx-size
// * batch-size
Expand Down Expand Up @@ -81,16 +78,6 @@ Expect<ErrNo> parseMetadata(Graph &GraphRef, const std::string &Metadata,
GraphRef.ReversePrompt = ReversePrompt;
}

// The model parameters.
if (Doc.at_key("n-gpu-layers").error() == simdjson::SUCCESS) {
auto Err = Doc["n-gpu-layers"].get<int64_t>().get(GraphRef.NGPULayers);
if (Err) {
spdlog::error(
"[WASI-NN] GGML backend: Unable to retrieve the n-gpu-layers option."sv);
return ErrNo::InvalidArgument;
}
}

// The context parameters.
if (Doc.at_key("ctx-size").error() == simdjson::SUCCESS) {
auto Err = Doc["ctx-size"].get<uint64_t>().get(GraphRef.CtxSize);
Expand Down Expand Up @@ -127,11 +114,6 @@ Expect<ErrNo> parseMetadata(Graph &GraphRef, const std::string &Metadata,
}
}

// Check if the model is updated.
if (IsModelUpdated && ModelParams.n_gpu_layers != GraphRef.NGPULayers) {
*IsModelUpdated = true;
}

return ErrNo::Success;
}

Expand Down Expand Up @@ -167,8 +149,12 @@ Expect<ErrNo> parseModelConfig(Graph &GraphRef,
Token = Config.substr(0, Pos);
try {
if (Token == "n_gpu_layers" || Token == "ngl") {
#ifndef __APPLE__
GraphRef.NGPULayers =
std::stoi(Config.substr(Pos + Delimiter.length()));
#else
GraphRef.NGPULayers = 1; // Force enabled Metal on macOS
#endif
}
} catch (const std::invalid_argument &e) {
spdlog::error(
Expand Down Expand Up @@ -316,7 +302,6 @@ Expect<ErrNo> setInput(WasiNNEnvironment &Env, uint32_t ContextId,
spdlog::info("[WASI-NN][Debug] GGML backend: setInput"sv);
}

bool IsModelParamsUpdated = false;
// Use index 1 for metadata.
if (Index == 1) {
if (GraphRef.EnableDebugLog) {
Expand All @@ -326,35 +311,13 @@ Expect<ErrNo> setInput(WasiNNEnvironment &Env, uint32_t ContextId,
const std::string Metadata(reinterpret_cast<char *>(Tensor.Tensor.data()),
Tensor.Tensor.size());
auto Res =
details::parseMetadata(GraphRef, Metadata, &IsModelParamsUpdated);
details::parseMetadata(GraphRef, Metadata);

if (Res != ErrNo::Success) {
spdlog::error("[WASI-NN] GGML backend: Failed to parse metadata."sv);
return Res;
}

// XXX: Due to the limitation of WASI-NN proposal,
// we have no way to pass the metadata before the setInput phase
// when we want to do some configurations in the load phase.
// That's why we have this hack.
#ifndef __APPLE__
{
if (IsModelParamsUpdated) {
llama_model_params ModelParams = llama_model_default_params();
ModelParams.n_gpu_layers = GraphRef.NGPULayers;
llama_free_model(GraphRef.LlamaModel);
GraphRef.LlamaModel = llama_load_model_from_file(
GraphRef.ModelFilePath.c_str(), ModelParams);
if (GraphRef.LlamaModel == nullptr) {
spdlog::error(
"[WASI-NN] GGML backend: Error: unable to init model."sv);
Env.NNGraph.pop_back();
return ErrNo::InvalidArgument;
}
}
}
#endif

if (GraphRef.EnableDebugLog) {
spdlog::info(
"[WASI-NN][Debug] GGML backend: found Metadata, processing...Done"sv);
Expand Down

0 comments on commit b421e73

Please sign in to comment.