diff --git a/engine/controllers/models.cc b/engine/controllers/models.cc
index 796f70d16..9e4ba1e9f 100644
--- a/engine/controllers/models.cc
+++ b/engine/controllers/models.cc
@@ -442,6 +442,14 @@ void Models::StartModel(
   // model_path has higher priority
   if (auto& o = (*(req->getJsonObject()))["llama_model_path"]; !o.isNull()) {
     params_override.model_path = o.asString();
+    if (auto& mp = (*(req->getJsonObject()))["model_path"]; mp.isNull()) {
+      // Bypass if model does not exist in DB and llama_model_path exists
+      if (std::filesystem::exists(params_override.model_path.value()) &&
+          !model_service_->HasModel(model_handle)) {
+        CTL_INF("llama_model_path exists, bypass check model id");
+        params_override.bypass_llama_model_path = true;
+      }
+    }
   }
 
   if (auto& o = (*(req->getJsonObject()))["model_path"]; !o.isNull()) {
@@ -489,7 +497,7 @@ void Models::StartModel(
     auto& v = result.value();
     Json::Value ret;
     ret["message"] = "Started successfully!";
-    if(v.warning) {
+    if (v.warning) {
       ret["warning"] = *(v.warning);
     }
     auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc
index 3a8507c22..793e8ecb5 100644
--- a/engine/services/model_service.cc
+++ b/engine/services/model_service.cc
@@ -381,6 +381,10 @@ cpp::result<std::string, std::string> ModelService::HandleUrl(
   return unique_model_id;
 }
 
+bool ModelService::HasModel(const std::string& id) const {
+  return cortex::db::Models().HasModel(id);
+}
+
 cpp::result<DownloadTask, std::string>
 ModelService::DownloadModelFromCortexsoAsync(
     const std::string& name, const std::string& branch,
@@ -745,7 +749,8 @@ cpp::result<StartModelResult, std::string> ModelService::StartModel(
       return cpp::fail(
           "Not enough VRAM - required: " + std::to_string(vram_needed_MiB) +
           " MiB, available: " + std::to_string(free_vram_MiB) +
-          " MiB - Should adjust ngl to " + std::to_string(free_vram_MiB / (vram_needed_MiB / ngl) - 1));
+          " MiB - Should adjust ngl to " +
+          std::to_string(free_vram_MiB / (vram_needed_MiB / ngl) - 1));
     }
 
     if (ram_needed_MiB > free_ram_MiB) {
diff --git a/engine/services/model_service.h b/engine/services/model_service.h
index 47d61c154..7b6375e54 100644
--- a/engine/services/model_service.h
+++ b/engine/services/model_service.h
@@ -3,10 +3,10 @@
 #include <memory>
 #include <optional>
 #include <string>
+#include "common/engine_servicei.h"
 #include "config/model_config.h"
 #include "services/download_service.h"
 #include "services/inference_service.h"
-#include "common/engine_servicei.h"
 
 struct ModelPullInfo {
   std::string id;
@@ -26,12 +26,15 @@ struct StartParameterOverride {
   std::optional<std::string> cache_type;
   std::optional<std::string> mmproj;
   std::optional<std::string> model_path;
-  bool bypass_model_check() const { return mmproj.has_value(); }
+  bool bypass_llama_model_path = false;
+  bool bypass_model_check() const {
+    return mmproj.has_value() || bypass_llama_model_path;
+  }
 };
 
 struct StartModelResult {
- bool success;
- std::optional<std::string> warning;
+  bool success;
+  std::optional<std::string> warning;
 };
 
 class ModelService {
@@ -89,6 +92,8 @@ class ModelService {
       const std::string& url, std::optional<std::string> temp_model_id,
       std::optional<std::string> temp_name);
 
+  bool HasModel(const std::string& id) const;
+
  private:
   /**
    * Handle downloading model which have following pattern: author/model_name