[WASI-NN] ggml backend: all model paramters can only be set in config…

… stage Signed-off-by: hydai <z54981220@gmail.com>
WasmEdge · Dec 18, 2023 · b421e73 · b421e73
1 parent 5b06107
commit b421e73
Showing 1 changed file with 6 additions and 43 deletions.
diff --git a/plugins/wasi_nn/ggml.cpp b/plugins/wasi_nn/ggml.cpp
@@ -15,8 +15,7 @@ namespace WasmEdge::Host::WASINN::GGML {
 #ifdef WASMEDGE_PLUGIN_WASI_NN_BACKEND_GGML
 
 namespace details {
-Expect<ErrNo> parseMetadata(Graph &GraphRef, const std::string &Metadata,
-                            bool *IsModelUpdated = nullptr) noexcept {
+Expect<ErrNo> parseMetadata(Graph &GraphRef, const std::string &Metadata) noexcept {
   simdjson::dom::parser Parser;
   simdjson::dom::element Doc;
   auto ParseError = Parser.parse(Metadata).get(Doc);
@@ -26,8 +25,6 @@ Expect<ErrNo> parseMetadata(Graph &GraphRef, const std::string &Metadata,
   }
 
   // Get metadata from the json.
-  // Need to update Model:
-  // * n_gpu_layers
   // Need to update Context:
   // * ctx-size
   // * batch-size
@@ -81,16 +78,6 @@ Expect<ErrNo> parseMetadata(Graph &GraphRef, const std::string &Metadata,
     GraphRef.ReversePrompt = ReversePrompt;
   }
 
-  // The model parameters.
-  if (Doc.at_key("n-gpu-layers").error() == simdjson::SUCCESS) {
-    auto Err = Doc["n-gpu-layers"].get<int64_t>().get(GraphRef.NGPULayers);
-    if (Err) {
-      spdlog::error(
-          "[WASI-NN] GGML backend: Unable to retrieve the n-gpu-layers option."sv);
-      return ErrNo::InvalidArgument;
-    }
-  }
-
   // The context parameters.
   if (Doc.at_key("ctx-size").error() == simdjson::SUCCESS) {
     auto Err = Doc["ctx-size"].get<uint64_t>().get(GraphRef.CtxSize);
@@ -127,11 +114,6 @@ Expect<ErrNo> parseMetadata(Graph &GraphRef, const std::string &Metadata,
     }
   }
 
-  // Check if the model is updated.
-  if (IsModelUpdated && ModelParams.n_gpu_layers != GraphRef.NGPULayers) {
-    *IsModelUpdated = true;
-  }
-
   return ErrNo::Success;
 }
 
@@ -167,8 +149,12 @@ Expect<ErrNo> parseModelConfig(Graph &GraphRef,
     Token = Config.substr(0, Pos);
     try {
       if (Token == "n_gpu_layers" || Token == "ngl") {
+#ifndef __APPLE__
         GraphRef.NGPULayers =
             std::stoi(Config.substr(Pos + Delimiter.length()));
+#else
+        GraphRef.NGPULayers = 1; // Force enabled Metal on macOS
+#endif
       }
     } catch (const std::invalid_argument &e) {
       spdlog::error(
@@ -316,7 +302,6 @@ Expect<ErrNo> setInput(WasiNNEnvironment &Env, uint32_t ContextId,
     spdlog::info("[WASI-NN][Debug] GGML backend: setInput"sv);
   }
 
-  bool IsModelParamsUpdated = false;
   // Use index 1 for metadata.
   if (Index == 1) {
     if (GraphRef.EnableDebugLog) {
@@ -326,35 +311,13 @@ Expect<ErrNo> setInput(WasiNNEnvironment &Env, uint32_t ContextId,
     const std::string Metadata(reinterpret_cast<char *>(Tensor.Tensor.data()),
                                Tensor.Tensor.size());
     auto Res =
-        details::parseMetadata(GraphRef, Metadata, &IsModelParamsUpdated);
+        details::parseMetadata(GraphRef, Metadata);
 
     if (Res != ErrNo::Success) {
       spdlog::error("[WASI-NN] GGML backend: Failed to parse metadata."sv);
       return Res;
     }
 
-    // XXX: Due to the limitation of WASI-NN proposal,
-    // we have no way to pass the metadata before the setInput phase
-    // when we want to do some configurations in the load phase.
-    // That's why we have this hack.
-#ifndef __APPLE__
-    {
-      if (IsModelParamsUpdated) {
-        llama_model_params ModelParams = llama_model_default_params();
-        ModelParams.n_gpu_layers = GraphRef.NGPULayers;
-        llama_free_model(GraphRef.LlamaModel);
-        GraphRef.LlamaModel = llama_load_model_from_file(
-            GraphRef.ModelFilePath.c_str(), ModelParams);
-        if (GraphRef.LlamaModel == nullptr) {
-          spdlog::error(
-              "[WASI-NN] GGML backend: Error: unable to init model."sv);
-          Env.NNGraph.pop_back();
-          return ErrNo::InvalidArgument;
-        }
-      }
-    }
-#endif
-
     if (GraphRef.EnableDebugLog) {
       spdlog::info(
           "[WASI-NN][Debug] GGML backend: found Metadata, processing...Done"sv);