diff --git a/engine/commands/chat_cmd.cc b/engine/commands/chat_cmd.cc
index e535fa704..67ed651a4 100644
--- a/engine/commands/chat_cmd.cc
+++ b/engine/commands/chat_cmd.cc
@@ -1,8 +1,10 @@
 #include "chat_cmd.h"
 #include "httplib.h"
 
+#include "cortex_upd_cmd.h"
 #include "trantor/utils/Logger.h"
 #include "utils/logging_utils.h"
+#include "server_start_cmd.h"
 
 namespace commands {
 namespace {
@@ -33,6 +35,15 @@ ChatCmd::ChatCmd(std::string host, int port, const config::ModelConfig& mc)
     : host_(std::move(host)), port_(port), mc_(mc) {}
 
 void ChatCmd::Exec(std::string msg) {
+  // Check if server is started
+  {
+    if (!commands::IsServerAlive(host_, port_)) {
+      CLI_LOG("Server is not started yet, please run `"
+              << commands::GetCortexBinary() << " start` to start server!");
+      return;
+    }
+  }
+
   auto address = host_ + ":" + std::to_string(port_);
   // Check if model is loaded
   // TODO(sang) only llamacpp support modelstatus for now
diff --git a/engine/commands/cortex_upd_cmd.cc b/engine/commands/cortex_upd_cmd.cc
index 7c2a1f423..3c892f6fc 100644
--- a/engine/commands/cortex_upd_cmd.cc
+++ b/engine/commands/cortex_upd_cmd.cc
@@ -15,7 +15,7 @@ void CortexUpdCmd::Exec(std::string v) {
   {
     auto config = file_manager_utils::GetCortexConfig();
     httplib::Client cli(config.apiServerHost + ":" + config.apiServerPort);
-    auto res = cli.Get("/health/healthz");
+    auto res = cli.Get("/healthz");
     if (res) {
       CLI_LOG("Server is running. Stopping server before updating!");
       commands::ServerStopCmd ssc(config.apiServerHost,
diff --git a/engine/commands/model_start_cmd.cc b/engine/commands/model_start_cmd.cc
index 83d051891..2eb137dac 100644
--- a/engine/commands/model_start_cmd.cc
+++ b/engine/commands/model_start_cmd.cc
@@ -1,7 +1,10 @@
 #include "model_start_cmd.h"
+#include "cortex_upd_cmd.h"
 #include "httplib.h"
 #include "nlohmann/json.hpp"
+#include "server_start_cmd.h"
 #include "trantor/utils/Logger.h"
+#include "utils/file_manager_utils.h"
 #include "utils/logging_utils.h"
 
 namespace commands {
@@ -10,7 +13,15 @@ ModelStartCmd::ModelStartCmd(std::string host, int port,
     : host_(std::move(host)), port_(port), mc_(mc) {}
 
 bool ModelStartCmd::Exec() {
+  // Check if server is started
+  if (!commands::IsServerAlive(host_, port_)) {
+    CLI_LOG("Server is not started yet, please run `"
+            << commands::GetCortexBinary() << " start` to start server!");
+    return false;
+  }
+
   httplib::Client cli(host_ + ":" + std::to_string(port_));
+
   nlohmann::json json_data;
   if (mc_.files.size() > 0) {
     // TODO(sang) support multiple files
diff --git a/engine/commands/run_cmd.cc b/engine/commands/run_cmd.cc
index 64bc50d6f..1fb3706d7 100644
--- a/engine/commands/run_cmd.cc
+++ b/engine/commands/run_cmd.cc
@@ -2,7 +2,13 @@
 #include "chat_cmd.h"
 #include "cmd_info.h"
 #include "config/yaml_config.h"
+#include "engine_install_cmd.h"
+#include "httplib.h"
+#include "model_pull_cmd.h"
 #include "model_start_cmd.h"
+#include "server_start_cmd.h"
+#include "trantor/utils/Logger.h"
+#include "utils/cortex_utils.h"
 #include "utils/file_manager_utils.h"
 
 namespace commands {
@@ -15,7 +21,7 @@ void RunCmd::Exec() {
   // TODO should we clean all resource if something fails?
   // Check if model existed. If not, download it
   {
-    auto model_conf = model_service_.GetDownloadedModel(model_id_);
+    auto model_conf = model_service_.GetDownloadedModel(model_file + ".yaml");
     if (!model_conf.has_value()) {
       model_service_.DownloadModel(model_id_);
     }
@@ -35,6 +41,17 @@ void RunCmd::Exec() {
     }
   }
 
+  // Start server if it is not running
+  {
+    if (!commands::IsServerAlive(host_, port_)) {
+      CLI_LOG("Starting server ...");
+      commands::ServerStartCmd ssc;
+      if(!ssc.Exec(host_, port_)) {
+        return;
+      }
+    }
+  }
+
   // Start model
   config::YamlHandler yaml_handler;
   yaml_handler.ModelConfigFromFile(
diff --git a/engine/commands/server_start_cmd.cc b/engine/commands/server_start_cmd.cc
new file mode 100644
index 000000000..613554c83
--- /dev/null
+++ b/engine/commands/server_start_cmd.cc
@@ -0,0 +1,106 @@
+#include "server_start_cmd.h"
+#include "commands/cortex_upd_cmd.h"
+#include "httplib.h"
+#include "trantor/utils/Logger.h"
+#include "utils/cortex_utils.h"
+#include "utils/file_manager_utils.h"
+#include "utils/logging_utils.h"
+
+namespace commands {
+
+namespace {
+bool TryConnectToServer(const std::string& host, int port) {
+  constexpr const auto kMaxRetry = 3u;
+  auto count = 0u;
+  // Check if server is started
+  while (true) {
+    if (IsServerAlive(host, port))
+      break;
+    // Wait for server up
+    std::this_thread::sleep_for(std::chrono::seconds(1));
+    if (count++ == kMaxRetry) {
+      std::cerr << "Could not start server" << std::endl;
+      return false;
+    }
+  }
+  return true;
+}
+}  // namespace
+
+ServerStartCmd::ServerStartCmd() {}
+
+bool ServerStartCmd::Exec(const std::string& host, int port) {
+#if defined(_WIN32) || defined(_WIN64)
+  // Windows-specific code to create a new process
+  STARTUPINFO si;
+  PROCESS_INFORMATION pi;
+
+  ZeroMemory(&si, sizeof(si));
+  si.cb = sizeof(si);
+  ZeroMemory(&pi, sizeof(pi));
+  auto exe = commands::GetCortexBinary();
+  std::string cmds =
+      cortex_utils::GetCurrentPath() + "/" + exe + " --start-server";
+  // Create child process
+  if (!CreateProcess(
+          NULL,  // No module name (use command line)
+          const_cast<char*>(
+              cmds.c_str()),  // Command line (replace with your actual executable)
+          NULL,               // Process handle not inheritable
+          NULL,               // Thread handle not inheritable
+          FALSE,              // Set handle inheritance to FALSE
+          0,                  // No creation flags
+          NULL,               // Use parent's environment block
+          NULL,               // Use parent's starting directory
+          &si,                // Pointer to STARTUPINFO structure
+          &pi))               // Pointer to PROCESS_INFORMATION structure
+  {
+    std::cout << "Could not start server: " << GetLastError() << std::endl;
+    return false;
+  } else {
+    if(!TryConnectToServer(host, port)) {
+        return false;
+    }
+    std::cout << "Server started" << std::endl;
+  }
+
+#else
+  // Unix-like system-specific code to fork a child process
+  pid_t pid = fork();
+
+  if (pid < 0) {
+    // Fork failed
+    std::cerr << "Could not start server: " << std::endl;
+    return false;
+  } else if (pid == 0) {
+    // No need to configure LD_LIBRARY_PATH for macOS
+#if !defined(__APPLE__) || !defined(__MACH__)
+    const char* name = "LD_LIBRARY_PATH";
+    auto data = getenv(name);
+    std::string v;
+    if (auto g = getenv(name); g) {
+      v += g;
+    }
+    CTL_INF("LD_LIBRARY_PATH: " << v);
+    auto data_path = file_manager_utils::GetCortexDataPath();
+    auto llamacpp_path = data_path / "engines" / "cortex.llamacpp/";
+    auto trt_path = data_path / "engines" / "cortex.tensorrt-llm/";
+    auto new_v = trt_path.string() + ":" + llamacpp_path.string() + ":" + v;
+    setenv(name, new_v.c_str(), true);
+    CTL_INF("LD_LIBRARY_PATH: " << getenv(name));
+#endif
+    auto exe = commands::GetCortexBinary();
+    std::string p = cortex_utils::GetCurrentPath() + "/" + exe;
+    execl(p.c_str(), exe.c_str(), "--start-server", (char*)0);
+  } else {
+    // Parent process
+    if(!TryConnectToServer(host, port)) {
+        return false;
+    }
+    std::cout << "Server started" << std::endl;
+  }
+#endif
+  return true;
+}
+
+};  // namespace commands
\ No newline at end of file
diff --git a/engine/commands/server_start_cmd.h b/engine/commands/server_start_cmd.h
new file mode 100644
index 000000000..cb74c5ebc
--- /dev/null
+++ b/engine/commands/server_start_cmd.h
@@ -0,0 +1,21 @@
+#pragma once
+#include <string>
+#include "httplib.h"
+
+namespace commands {
+
+inline bool IsServerAlive(const std::string& host, int port) {
+  httplib::Client cli(host + ":" + std::to_string(port));
+  auto res = cli.Get("/healthz");
+  if (res && res->status == httplib::StatusCode::OK_200) {
+    return true;
+  }
+  return false;
+}
+
+class ServerStartCmd {
+ public:
+  ServerStartCmd();
+  bool Exec(const std::string& host, int port);
+};
+}  // namespace commands
\ No newline at end of file
diff --git a/engine/controllers/command_line_parser.cc b/engine/controllers/command_line_parser.cc
index 824f9dcc9..3046d1e70 100644
--- a/engine/controllers/command_line_parser.cc
+++ b/engine/controllers/command_line_parser.cc
@@ -13,6 +13,7 @@
 #include "commands/model_start_cmd.h"
 #include "commands/model_stop_cmd.h"
 #include "commands/run_cmd.h"
+#include "commands/server_start_cmd.h"
 #include "commands/server_stop_cmd.h"
 #include "config/yaml_config.h"
 #include "services/engine_service.h"
@@ -174,6 +175,21 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) {
     });
   }
 
+  auto start_cmd = app_.add_subcommand("start", "Start the API server");
+  int port = std::stoi(config.apiServerPort);
+  start_cmd->add_option("-p, --port", port, "Server port to listen");
+  start_cmd->callback([&config, &port] {
+    if (port != stoi(config.apiServerPort)) {
+      CTL_INF("apiServerPort changed from " << config.apiServerPort << " to "
+                                            << port);
+      auto config_path = file_manager_utils::GetConfigurationPath();
+      config.apiServerPort = std::to_string(port);
+      config_yaml_utils::DumpYamlConfig(config, config_path.string());
+    }
+    commands::ServerStartCmd ssc;
+    ssc.Exec(config.apiServerHost, std::stoi(config.apiServerPort));
+  });
+
   auto stop_cmd = app_.add_subcommand("stop", "Stop the API server");
 
   stop_cmd->callback([&config] {
@@ -208,6 +224,10 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) {
   }
 
   CLI11_PARSE(app_, argc, argv);
+  if (argc == 1) {
+    CLI_LOG(app_.help());
+    return true;
+  }
 
   // Check new update, only check for stable release for now
 #ifdef CORTEX_CPP_VERSION
diff --git a/engine/e2e-test/test_runner.py b/engine/e2e-test/test_runner.py
index bedf8d39d..dd634d747 100644
--- a/engine/e2e-test/test_runner.py
+++ b/engine/e2e-test/test_runner.py
@@ -50,7 +50,10 @@ def start_server() -> bool:
 def start_server_nix() -> bool:
     executable = getExecutablePath()
     process = subprocess.Popen(
-        executable, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
+        [executable] + ['start', '-p', '3928'], 
+        stdout=subprocess.PIPE, 
+        stderr=subprocess.PIPE, 
+        text=True
     )
 
     start_time = time.time()
@@ -77,7 +80,7 @@ def start_server_nix() -> bool:
 def start_server_windows() -> bool:
     executable = getExecutablePath()
     process = subprocess.Popen(
-        executable,
+        [executable] + ['start', '-p', '3928'],
         stdout=subprocess.PIPE,
         stderr=subprocess.PIPE,
         text=True,
diff --git a/engine/main.cc b/engine/main.cc
index 06513d638..bdac8148c 100644
--- a/engine/main.cc
+++ b/engine/main.cc
@@ -85,72 +85,6 @@ void RunServer() {
   // return 0;
 }
 
-void ForkProcess() {
-#if defined(_WIN32) || defined(_WIN64)
-  // Windows-specific code to create a new process
-  STARTUPINFO si;
-  PROCESS_INFORMATION pi;
-
-  ZeroMemory(&si, sizeof(si));
-  si.cb = sizeof(si);
-  ZeroMemory(&pi, sizeof(pi));
-  auto exe = commands::GetCortexBinary();
-  std::string cmds =
-      cortex_utils::GetCurrentPath() + "/" + exe + " --start-server";
-  // Create child process
-  if (!CreateProcess(
-          NULL,  // No module name (use command line)
-          const_cast<char*>(
-              cmds.c_str()),  // Command line (replace with your actual executable)
-          NULL,               // Process handle not inheritable
-          NULL,               // Thread handle not inheritable
-          FALSE,              // Set handle inheritance to FALSE
-          0,                  // No creation flags
-          NULL,               // Use parent's environment block
-          NULL,               // Use parent's starting directory
-          &si,                // Pointer to STARTUPINFO structure
-          &pi))               // Pointer to PROCESS_INFORMATION structure
-  {
-    std::cout << "Could not start server: " << GetLastError() << std::endl;
-  } else {
-    std::cout << "Server started" << std::endl;
-  }
-
-#else
-  // Unix-like system-specific code to fork a child process
-  pid_t pid = fork();
-
-  if (pid < 0) {
-    // Fork failed
-    std::cerr << "Could not start server: " << std::endl;
-    return;
-  } else if (pid == 0) {
-    // No need to configure LD_LIBRARY_PATH for macOS
-#if !defined(__APPLE__) || !defined(__MACH__)
-    const char* name = "LD_LIBRARY_PATH";
-    auto data = getenv(name);
-    std::string v;
-    if (auto g = getenv(name); g) {
-      v += g;
-    }
-    CTL_INF("LD_LIBRARY_PATH: " << v);
-    auto data_path = file_manager_utils::GetCortexDataPath();
-    auto llamacpp_path = data_path / "engines" / "cortex.llamacpp/";
-    auto trt_path = data_path / "engines" / "cortex.tensorrt-llm/";
-    auto new_v = trt_path.string() + ":" + llamacpp_path.string() + ":" + v;
-    setenv(name, new_v.c_str(), true);
-    CTL_INF("LD_LIBRARY_PATH: " << getenv(name));
-#endif
-    auto exe = commands::GetCortexBinary();
-    std::string p = cortex_utils::GetCurrentPath() + "/" + exe;
-    execl(p.c_str(), exe.c_str(), "--start-server", (char*)0);
-  } else {
-    // Parent process
-    std::cout << "Server started" << std::endl;
-  }
-#endif
-}
-
 int main(int argc, char* argv[]) {
   // Stop the program if the system is not supported
   auto system_info = system_info_utils::GetSystemInfo();
@@ -195,41 +129,34 @@ int main(int argc, char* argv[]) {
     }
   }
 
-  if (argc > 1) {
-    if (strcmp(argv[1], "--start-server") == 0) {
-      RunServer();
-      return 0;
-    } else {
-      bool verbose = false;
-      for (int i = 0; i < argc; i++) {
-        if (strcmp(argv[i], "--verbose") == 0) {
-          verbose = true;
-        }
-      }
+  if (argc > 1 && strcmp(argv[1], "--start-server") == 0) {
+    RunServer();
+    return 0;
+  }
 
-      trantor::FileLogger asyncFileLogger;
-      if (!verbose) {
-        auto config = file_manager_utils::GetCortexConfig();
-        std::filesystem::create_directories(
-            std::filesystem::path(config.logFolderPath) /
-            std::filesystem::path(cortex_utils::logs_folder));
-        asyncFileLogger.setFileName(config.logFolderPath + "/" +
-                                    cortex_utils::logs_cli_base_name);
-        asyncFileLogger.setMaxLines(
-            config.maxLogLines);  // Keep last 100000 lines
-        asyncFileLogger.startLogging();
-        trantor::Logger::setOutputFunction(
-            [&](const char* msg, const uint64_t len) {
-              asyncFileLogger.output_(msg, len);
-            },
-            [&]() { asyncFileLogger.flush(); });
-      }
-      CommandLineParser clp;
-      clp.SetupCommand(argc, argv);
-      return 0;
+  bool verbose = false;
+  for (int i = 0; i < argc; i++) {
+    if (strcmp(argv[i], "--verbose") == 0) {
+      verbose = true;
     }
   }
-
-  ForkProcess();
+  trantor::FileLogger asyncFileLogger;
+  if (!verbose) {
+    auto config = file_manager_utils::GetCortexConfig();
+    std::filesystem::create_directories(
+        std::filesystem::path(config.logFolderPath) /
+        std::filesystem::path(cortex_utils::logs_folder));
+    asyncFileLogger.setFileName(config.logFolderPath + "/" +
+                                cortex_utils::logs_cli_base_name);
+    asyncFileLogger.setMaxLines(config.maxLogLines);  // Keep last 100000 lines
+    asyncFileLogger.startLogging();
+    trantor::Logger::setOutputFunction(
+        [&](const char* msg, const uint64_t len) {
+          asyncFileLogger.output_(msg, len);
+        },
+        [&]() { asyncFileLogger.flush(); });
+  }
+  CommandLineParser clp;
+  clp.SetupCommand(argc, argv);
   return 0;
 }